version 1.44, 2000/07/05 14:03:39
|
version 1.45, 2000/07/10 23:00:21
|
Line 48 xmlCharEncodingHandlerPtr xmlUTF16LEHand
|
Line 48 xmlCharEncodingHandlerPtr xmlUTF16LEHand
|
xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; |
xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; |
|
|
#ifdef LIBXML_ICONV_ENABLED |
#ifdef LIBXML_ICONV_ENABLED |
#if 0 |
#if 1 |
#define DEBUG_ENCODING /* Define this to get encoding traces */ |
#define DEBUG_ENCODING /* Define this to get encoding traces */ |
#endif |
#endif |
#endif |
#endif |
Line 195 int
|
Line 195 int
|
isolat1ToUTF8(unsigned char* out, int *outlen, |
isolat1ToUTF8(unsigned char* out, int *outlen, |
const unsigned char* in, int *inlen) { |
const unsigned char* in, int *inlen) { |
unsigned char* outstart = out; |
unsigned char* outstart = out; |
|
const unsigned char* base = in; |
const unsigned char* processed = in; |
const unsigned char* processed = in; |
unsigned char* outend = out + *outlen; |
unsigned char* outend = out + *outlen; |
const unsigned char* inend = in + *inlen; |
const unsigned char* inend; |
unsigned char c; |
unsigned int c; |
|
int bits; |
|
|
while (in < inend) { |
inend = in + (*inlen); |
c= *in++; |
while ((in < inend) && (out - outstart + 5 < *outlen)) { |
if (c < 0x80) { |
c= *in++; |
|
|
|
/* assertion: c is a single UTF-4 value */ |
|
if (out >= outend) |
|
break; |
|
if (c < 0x80) { *out++= c; bits= -6; } |
|
else { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; } |
|
|
|
for ( ; bits >= 0; bits-= 6) { |
if (out >= outend) |
if (out >= outend) |
break; |
break; |
*out++ = c; |
*out++= ((c >> bits) & 0x3F) | 0x80; |
} |
|
else { |
|
if (out + 1 >= outend) break; |
|
*out++ = 0xC0 | (c >> 6); |
|
*out++ = 0x80 | (0x3F & c); |
|
} |
} |
processed = in; |
processed = (const unsigned char*) in; |
} |
} |
*outlen = out - outstart; |
*outlen = out - outstart; |
*inlen = processed - in; |
*inlen = processed - base; |
|
|
return(0); |
return(0); |
} |
} |
|
|
Line 239 isolat1ToUTF8(unsigned char* out, int *o
|
Line 243 isolat1ToUTF8(unsigned char* out, int *o
|
int |
int |
UTF8Toisolat1(unsigned char* out, int *outlen, |
UTF8Toisolat1(unsigned char* out, int *outlen, |
const unsigned char* in, int *inlen) { |
const unsigned char* in, int *inlen) { |
unsigned char* outstart = out; |
|
const unsigned char* processed = in; |
const unsigned char* processed = in; |
unsigned char* outend = out + *outlen; |
const unsigned char* outend; |
const unsigned char* inend = in + *inlen; |
const unsigned char* outstart = out; |
unsigned char c; |
const unsigned char* instart = in; |
|
const unsigned char* inend; |
|
unsigned int c, d; |
|
int trailing; |
|
|
|
if (in == NULL) { |
|
/* |
|
* initialization nothing to do |
|
*/ |
|
*outlen = 0; |
|
*inlen = 0; |
|
return(0); |
|
} |
|
inend = in + (*inlen); |
|
outend = out + (*outlen); |
while (in < inend) { |
while (in < inend) { |
c= *in++; |
d = *in++; |
if (c < 0x80) { |
if (d < 0x80) { c= d; trailing= 0; } |
if (out >= outend) return(-1); |
else if (d < 0xC0) { |
*out++= c; |
/* trailing byte in leading position */ |
} |
*outlen = out - outstart; |
else if (in == inend) { |
*inlen = processed - instart; |
break; |
return(-2); |
|
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } |
|
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } |
|
else if (d < 0xF8) { c= d & 0x07; trailing= 3; } |
|
else { |
|
/* no chance for this in IsoLat1 */ |
|
*outlen = out - outstart; |
|
*inlen = processed - instart; |
|
return(-2); |
} |
} |
else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) { |
|
/* a two byte utf-8 and can be encoding as isolate1 */ |
if (inend - in < trailing) { |
*out++= ((c & 0x03) << 6) | (*in++ & 0x3F); |
break; |
|
} |
|
|
|
for ( ; trailing; trailing--) { |
|
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) |
|
break; |
|
c <<= 6; |
|
c |= d & 0x3F; |
} |
} |
else { |
|
|
/* assertion: c is a single UTF-4 value */ |
|
if (c <= 0xFF) { |
|
if (out >= outend) |
|
break; |
|
*out++ = c; |
|
} else { |
|
/* no chance for this in IsoLat1 */ |
*outlen = out - outstart; |
*outlen = out - outstart; |
*inlen = processed - in; |
*inlen = processed - instart; |
return(-2); |
return(-2); |
} |
} |
processed = in; |
processed = in; |
} |
} |
*outlen = out - outstart; |
*outlen = out - outstart; |
*inlen = processed - in; |
*inlen = processed - instart; |
return(0); |
return(0); |
} |
} |
|
|
Line 410 UTF8ToUTF16LE(unsigned char* outb, int *
|
Line 448 UTF8ToUTF16LE(unsigned char* outb, int *
|
if (d < 0x80) { c= d; trailing= 0; } |
if (d < 0x80) { c= d; trailing= 0; } |
else if (d < 0xC0) { |
else if (d < 0xC0) { |
/* trailing byte in leading position */ |
/* trailing byte in leading position */ |
*outlen = out - outstart; |
*outlen = (out - outstart) * 2; |
*inlen = processed - in; |
*inlen = processed - in; |
return(-2); |
return(-2); |
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } |
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } |
Line 418 UTF8ToUTF16LE(unsigned char* outb, int *
|
Line 456 UTF8ToUTF16LE(unsigned char* outb, int *
|
else if (d < 0xF8) { c= d & 0x07; trailing= 3; } |
else if (d < 0xF8) { c= d & 0x07; trailing= 3; } |
else { |
else { |
/* no chance for this in UTF-16 */ |
/* no chance for this in UTF-16 */ |
*outlen = out - outstart; |
*outlen = (out - outstart) * 2; |
*inlen = processed - in; |
*inlen = processed - in; |
return(-2); |
return(-2); |
} |
} |
Line 973 xmlInitCharEncodingHandlers(void) {
|
Line 1011 xmlInitCharEncodingHandlers(void) {
|
xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); |
xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); |
xmlUTF16BEHandler = |
xmlUTF16BEHandler = |
xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); |
xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); |
#if 0 |
#if 1 |
xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); |
xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); |
#endif |
#endif |
} |
} |