version 1.46, 2000/07/11 00:28:58
|
version 1.47, 2000/07/11 09:06:07
|
Line 178 xmlCheckUTF8(const unsigned char *utf)
|
Line 178 xmlCheckUTF8(const unsigned char *utf)
|
} |
} |
|
|
/** |
/** |
|
* asciiToUTF8: |
|
* @out: a pointer to an array of bytes to store the result |
|
* @outlen: the length of @out |
|
* @in: a pointer to an array of ASCII chars |
|
* @inlen: the length of @in |
|
* |
|
* Take a block of ASCII chars in and try to convert it to an UTF-8 |
|
* block of chars out. |
|
* Returns 0 if success, or -1 otherwise |
|
* The value of @inlen after return is the number of octets consumed |
|
* as the return value is positive, else unpredictiable. |
|
* The value of @outlen after return is the number of ocetes consumed. |
|
*/ |
|
int |
|
asciiToUTF8(unsigned char* out, int *outlen, |
|
const unsigned char* in, int *inlen) { |
|
unsigned char* outstart = out; |
|
const unsigned char* base = in; |
|
const unsigned char* processed = in; |
|
unsigned char* outend = out + *outlen; |
|
const unsigned char* inend; |
|
unsigned int c; |
|
int bits; |
|
|
|
inend = in + (*inlen); |
|
while ((in < inend) && (out - outstart + 5 < *outlen)) { |
|
c= *in++; |
|
|
|
/* assertion: c is a single UTF-4 value */ |
|
if (out >= outend) |
|
break; |
|
if (c < 0x80) { *out++= c; bits= -6; } |
|
else { |
|
*outlen = out - outstart; |
|
*inlen = processed - base; |
|
return(-1); |
|
} |
|
|
|
for ( ; bits >= 0; bits-= 6) { |
|
if (out >= outend) |
|
break; |
|
*out++= ((c >> bits) & 0x3F) | 0x80; |
|
} |
|
processed = (const unsigned char*) in; |
|
} |
|
*outlen = out - outstart; |
|
*inlen = processed - base; |
|
return(0); |
|
} |
|
|
|
/** |
|
* UTF8Toascii: |
|
* @out: a pointer to an array of bytes to store the result |
|
* @outlen: the length of @out |
|
* @in: a pointer to an array of UTF-8 chars |
|
* @inlen: the length of @in |
|
* |
|
* Take a block of UTF-8 chars in and try to convert it to an ASCII |
|
* block of chars out. |
|
* |
|
* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise |
|
* The value of @inlen after return is the number of octets consumed |
|
* as the return value is positive, else unpredictiable. |
|
* The value of @outlen after return is the number of ocetes consumed. |
|
*/ |
|
int |
|
UTF8Toascii(unsigned char* out, int *outlen, |
|
const unsigned char* in, int *inlen) { |
|
const unsigned char* processed = in; |
|
const unsigned char* outend; |
|
const unsigned char* outstart = out; |
|
const unsigned char* instart = in; |
|
const unsigned char* inend; |
|
unsigned int c, d; |
|
int trailing; |
|
|
|
if (in == NULL) { |
|
/* |
|
* initialization nothing to do |
|
*/ |
|
*outlen = 0; |
|
*inlen = 0; |
|
return(0); |
|
} |
|
inend = in + (*inlen); |
|
outend = out + (*outlen); |
|
while (in < inend) { |
|
d = *in++; |
|
if (d < 0x80) { c= d; trailing= 0; } |
|
else if (d < 0xC0) { |
|
/* trailing byte in leading position */ |
|
*outlen = out - outstart; |
|
*inlen = processed - instart; |
|
return(-2); |
|
} else if (d < 0xE0) { c= d & 0x1F; trailing= 1; } |
|
else if (d < 0xF0) { c= d & 0x0F; trailing= 2; } |
|
else if (d < 0xF8) { c= d & 0x07; trailing= 3; } |
|
else { |
|
/* no chance for this in Ascii */ |
|
*outlen = out - outstart; |
|
*inlen = processed - instart; |
|
return(-2); |
|
} |
|
|
|
if (inend - in < trailing) { |
|
break; |
|
} |
|
|
|
for ( ; trailing; trailing--) { |
|
if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) |
|
break; |
|
c <<= 6; |
|
c |= d & 0x3F; |
|
} |
|
|
|
/* assertion: c is a single UTF-4 value */ |
|
if (c < 0x80) { |
|
if (out >= outend) |
|
break; |
|
*out++ = c; |
|
} else { |
|
/* no chance for this in Ascii */ |
|
*outlen = out - outstart; |
|
*inlen = processed - instart; |
|
return(-2); |
|
} |
|
processed = in; |
|
} |
|
*outlen = out - outstart; |
|
*inlen = processed - instart; |
|
return(0); |
|
} |
|
|
|
/** |
* isolat1ToUTF8: |
* isolat1ToUTF8: |
* @out: a pointer to an array of bytes to store the result |
* @out: a pointer to an array of bytes to store the result |
* @outlen: the length of @out |
* @outlen: the length of @out |
Line 233 isolat1ToUTF8(unsigned char* out, int *o
|
Line 367 isolat1ToUTF8(unsigned char* out, int *o
|
* |
* |
* Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 |
* Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 |
* block of chars out. |
* block of chars out. |
* TODO: UTF8Toisolat1 need a fallback mechanism ... |
|
* |
* |
* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise |
* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise |
* The value of @inlen after return is the number of octets consumed |
* The value of @inlen after return is the number of octets consumed |
Line 405 UTF16LEToUTF8(unsigned char* out, int *o
|
Line 538 UTF16LEToUTF8(unsigned char* out, int *o
|
* |
* |
* Take a block of UTF-8 chars in and try to convert it to an UTF-16LE |
* Take a block of UTF-8 chars in and try to convert it to an UTF-16LE |
* block of chars out. |
* block of chars out. |
* TODO: UTF8ToUTF16LE need a fallback mechanism ... |
|
* |
* |
* Returns the number of byte written, or -1 by lack of space, or -2 |
* Returns the number of byte written, or -1 by lack of space, or -2 |
* if the transcoding failed. |
* if the transcoding failed. |
Line 616 UTF16BEToUTF8(unsigned char* out, int *o
|
Line 748 UTF16BEToUTF8(unsigned char* out, int *o
|
* |
* |
* Take a block of UTF-8 chars in and try to convert it to an UTF-16BE |
* Take a block of UTF-8 chars in and try to convert it to an UTF-16BE |
* block of chars out. |
* block of chars out. |
* TODO: UTF8ToUTF16BE need a fallback mechanism ... |
|
* |
* |
* Returns the number of byte written, or -1 by lack of space, or -2 |
* Returns the number of byte written, or -1 by lack of space, or -2 |
* if the transcoding failed. |
* if the transcoding failed. |
Line 1011 xmlInitCharEncodingHandlers(void) {
|
Line 1142 xmlInitCharEncodingHandlers(void) {
|
xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); |
xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE); |
xmlUTF16BEHandler = |
xmlUTF16BEHandler = |
xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); |
xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE); |
#if 1 |
|
xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); |
xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1); |
#endif |
xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii); |
} |
} |
|
|
/** |
/** |