Skip to content

Commit

Permalink
fixed XML-entity decoding error
Browse files Browse the repository at this point in the history
  • Loading branch information
BartJongejan committed Oct 19, 2016
1 parent f78dc1a commit bead852
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
6 changes: 6 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
-------------------------------
Version 7.15 (19 October 2016)
-------------------------------
Fixed error in the decoding of XML-entities if the decoded string comprised
more than one byte.

-------------------------------
Version 7.14 (13 October 2016)
-------------------------------
Expand Down
4 changes: 2 additions & 2 deletions src/cstlemma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ You should have received a copy of the GNU General Public License
along with CSTLEMMA; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define CSTLEMMAVERSION "7.14"
#define CSTLEMMADATE "2016.10.13"
#define CSTLEMMAVERSION "7.15"
#define CSTLEMMADATE "2016.10.19"
#define CSTLEMMACOPYRIGHT "2002-2016 Center for Sprogteknologi"

#include "lemmatiser.h"
Expand Down
16 changes: 12 additions & 4 deletions src/wordReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,11 +562,19 @@ int wordReader::rawput(bool (wordReader::*fnc)(int kar),int kar)
int wordReader::nrawput(bool (wordReader::*fnc)(int kar),char * c)
{
while(*c)
if(!rawput(fnc,*c++))
return false;
//if(!rawput(fnc,*c++))
rawput(fnc,*c++);
return true;
}

int myUnicodeToUtf8(int w,char * s,size_t len)
{
int writtenlength = UnicodeToUtf8(w,s,len);
if(writtenlength >= 0)
s[writtenlength] = '\0';
return writtenlength;
}

int wordReader::charref(bool (wordReader::*fnc)(int kar),int kar)
{
if(kar == ';')
Expand Down Expand Up @@ -594,7 +602,7 @@ int wordReader::charref(bool (wordReader::*fnc)(int kar),int kar)
N = (buf[1] == 'x') ? strtoul(buf+2,NULL,16) : strtoul(buf+1,NULL,10);
p = buf;
xput = &wordReader::Put;
if(UnicodeToUtf8(N,tmp,sizeof(tmp)))
if(myUnicodeToUtf8(N,tmp,sizeof(tmp)))
{
return nrawput(fnc,tmp);
}
Expand All @@ -619,7 +627,7 @@ int wordReader::charref(bool (wordReader::*fnc)(int kar),int kar)
char tmp[22];
p = buf;
xput = &wordReader::Put;
if(UnicodeToUtf8(pItem->code,tmp,sizeof(tmp)))
if(myUnicodeToUtf8(pItem->code,tmp,sizeof(tmp)))
{
return nrawput(fnc,tmp);
}
Expand Down

0 comments on commit bead852

Please sign in to comment.