version 1.29, 2000/04/03 18:45:48
|
version 1.30, 2000/04/30 09:10:18
|
Line 34
|
Line 34
|
#ifdef HAVE_STDLIB_H |
#ifdef HAVE_STDLIB_H |
#include <stdlib.h> |
#include <stdlib.h> |
#endif |
#endif |
|
#include <libxml/xmlversion.h> |
|
#ifdef LIBXML_ICONV_ENABLED |
|
#ifdef HAVE_ERRNO_H |
|
#include <errno.h> |
|
#endif |
|
#endif |
#include <libxml/encoding.h> |
#include <libxml/encoding.h> |
#include <libxml/xmlmemory.h> |
#include <libxml/xmlmemory.h> |
|
|
xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; |
xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL; |
xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; |
xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL; |
|
|
|
#ifdef LIBXML_ICONV_ENABLED |
|
#define DEBUG_ENCODING /* Define this to get encoding traces */ |
|
#endif |
|
|
/* |
/* |
* From rfc2044: encoding of the Unicode values on UTF-8: |
* From rfc2044: encoding of the Unicode values on UTF-8: |
* |
* |
Line 636 xmlParseCharEncoding(const char* name)
|
Line 646 xmlParseCharEncoding(const char* name)
|
if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); |
if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9); |
|
|
if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); |
if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP); |
if (!strcmp(upper, "Shift_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); |
if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); |
if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); |
if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); |
|
|
|
#ifdef DEBUG_ENCODING |
|
fprintf(stderr, "Unknown encoding %s\n", name); |
|
#endif |
return(XML_CHAR_ENCODING_ERROR); |
return(XML_CHAR_ENCODING_ERROR); |
} |
} |
|
|
Line 712 xmlNewCharEncodingHandler(const char *na
|
Line 726 xmlNewCharEncodingHandler(const char *na
|
* registers and returns the handler. |
* registers and returns the handler. |
*/ |
*/ |
xmlRegisterCharEncodingHandler(handler); |
xmlRegisterCharEncodingHandler(handler); |
|
#ifdef DEBUG_ENCODING |
|
fprintf(stderr, "Registered encoding handler for %s\n", name); |
|
#endif |
return(handler); |
return(handler); |
} |
} |
|
|
Line 798 xmlRegisterCharEncodingHandler(xmlCharEn
|
Line 815 xmlRegisterCharEncodingHandler(xmlCharEn
|
*/ |
*/ |
xmlCharEncodingHandlerPtr |
xmlCharEncodingHandlerPtr |
xmlGetCharEncodingHandler(xmlCharEncoding enc) { |
xmlGetCharEncodingHandler(xmlCharEncoding enc) { |
|
xmlCharEncodingHandlerPtr handler; |
|
|
if (handlers == NULL) xmlInitCharEncodingHandlers(); |
if (handlers == NULL) xmlInitCharEncodingHandlers(); |
switch (enc) { |
switch (enc) { |
case XML_CHAR_ENCODING_ERROR: |
case XML_CHAR_ENCODING_ERROR: |
Line 811 xmlGetCharEncodingHandler(xmlCharEncodin
|
Line 830 xmlGetCharEncodingHandler(xmlCharEncodin
|
case XML_CHAR_ENCODING_UTF16BE: |
case XML_CHAR_ENCODING_UTF16BE: |
return(xmlUTF16BEHandler); |
return(xmlUTF16BEHandler); |
case XML_CHAR_ENCODING_EBCDIC: |
case XML_CHAR_ENCODING_EBCDIC: |
return(NULL); |
handler = xmlFindCharEncodingHandler("EBCDIC"); |
|
if (handler != NULL) return(handler); |
|
handler = xmlFindCharEncodingHandler("ebcdic"); |
|
if (handler != NULL) return(handler); |
|
break; |
case XML_CHAR_ENCODING_UCS4LE: |
case XML_CHAR_ENCODING_UCS4LE: |
return(NULL); |
handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4"); |
|
if (handler != NULL) return(handler); |
|
handler = xmlFindCharEncodingHandler("UCS-4"); |
|
if (handler != NULL) return(handler); |
|
handler = xmlFindCharEncodingHandler("UCS4"); |
|
if (handler != NULL) return(handler); |
|
break; |
case XML_CHAR_ENCODING_UCS4BE: |
case XML_CHAR_ENCODING_UCS4BE: |
return(NULL); |
handler = xmlFindCharEncodingHandler("UCS4BE"); |
|
if (handler != NULL) return(handler); |
|
break; |
case XML_CHAR_ENCODING_UCS4_2143: |
case XML_CHAR_ENCODING_UCS4_2143: |
return(NULL); |
break; |
case XML_CHAR_ENCODING_UCS4_3412: |
case XML_CHAR_ENCODING_UCS4_3412: |
return(NULL); |
break; |
case XML_CHAR_ENCODING_UCS2: |
case XML_CHAR_ENCODING_UCS2: |
return(NULL); |
handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2"); |
|
if (handler != NULL) return(handler); |
|
handler = xmlFindCharEncodingHandler("UCS-2"); |
|
if (handler != NULL) return(handler); |
|
handler = xmlFindCharEncodingHandler("UCS2"); |
|
if (handler != NULL) return(handler); |
|
break; |
case XML_CHAR_ENCODING_8859_1: |
case XML_CHAR_ENCODING_8859_1: |
return(NULL); |
|
case XML_CHAR_ENCODING_8859_2: |
case XML_CHAR_ENCODING_8859_2: |
return(NULL); |
|
case XML_CHAR_ENCODING_8859_3: |
case XML_CHAR_ENCODING_8859_3: |
return(NULL); |
|
case XML_CHAR_ENCODING_8859_4: |
case XML_CHAR_ENCODING_8859_4: |
return(NULL); |
|
case XML_CHAR_ENCODING_8859_5: |
case XML_CHAR_ENCODING_8859_5: |
return(NULL); |
|
case XML_CHAR_ENCODING_8859_6: |
case XML_CHAR_ENCODING_8859_6: |
return(NULL); |
|
case XML_CHAR_ENCODING_8859_7: |
case XML_CHAR_ENCODING_8859_7: |
return(NULL); |
|
case XML_CHAR_ENCODING_8859_8: |
case XML_CHAR_ENCODING_8859_8: |
return(NULL); |
|
case XML_CHAR_ENCODING_8859_9: |
case XML_CHAR_ENCODING_8859_9: |
return(NULL); |
return(NULL); |
case XML_CHAR_ENCODING_2022_JP: |
case XML_CHAR_ENCODING_2022_JP: |
|
handler = xmlFindCharEncodingHandler("ISO-2022-JP"); |
|
if (handler != NULL) return(handler); |
|
break; |
case XML_CHAR_ENCODING_SHIFT_JIS: |
case XML_CHAR_ENCODING_SHIFT_JIS: |
|
handler = xmlFindCharEncodingHandler("SHIFT-JIS"); |
|
if (handler != NULL) return(handler); |
|
handler = xmlFindCharEncodingHandler("SHIFT_JIS"); |
|
if (handler != NULL) return(handler); |
|
handler = xmlFindCharEncodingHandler("Shift_JIS"); |
|
if (handler != NULL) return(handler); |
|
break; |
case XML_CHAR_ENCODING_EUC_JP: |
case XML_CHAR_ENCODING_EUC_JP: |
return(NULL); |
handler = xmlFindCharEncodingHandler("EUC-JP"); |
|
if (handler != NULL) return(handler); |
|
break; |
|
default: |
|
break; |
} |
} |
|
|
|
#ifdef DEBUG_ENCODING |
|
fprintf(stderr, "No handler found for encoding %d\n", enc); |
|
#endif |
return(NULL); |
return(NULL); |
} |
} |
|
|
Line 858 xmlGetCharEncodingHandler(xmlCharEncodin
|
Line 905 xmlGetCharEncodingHandler(xmlCharEncodin
|
*/ |
*/ |
xmlCharEncodingHandlerPtr |
xmlCharEncodingHandlerPtr |
xmlFindCharEncodingHandler(const char *name) { |
xmlFindCharEncodingHandler(const char *name) { |
char upper[500]; |
#ifdef LIBXML_ICONV_ENABLED |
|
char pseudoname[150]; |
|
iconv_t icv_in, icv_out; |
|
xmlCharEncodingHandlerPtr enc; |
|
#endif /* LIBXML_ICONV_ENABLED */ |
|
char upper[100]; |
int i; |
int i; |
|
|
if (handlers == NULL) xmlInitCharEncodingHandlers(); |
if (handlers == NULL) xmlInitCharEncodingHandlers(); |
if (name == NULL) return(xmlDefaultCharEncodingHandler); |
if (name == NULL) return(xmlDefaultCharEncodingHandler); |
if (name[0] == 0) return(xmlDefaultCharEncodingHandler); |
if (name[0] == 0) return(xmlDefaultCharEncodingHandler); |
|
|
for (i = 0;i < 499;i++) { |
for (i = 0;i < 99;i++) { |
upper[i] = toupper(name[i]); |
upper[i] = toupper(name[i]); |
if (upper[i] == 0) break; |
if (upper[i] == 0) break; |
} |
} |
upper[i] = 0; |
upper[i] = 0; |
|
|
for (i = 0;i < nbCharEncodingHandler; i++) |
for (i = 0;i < nbCharEncodingHandler; i++) |
if (!strcmp(name, handlers[i]->name)) |
if (!strcmp(upper, handlers[i]->name)) { |
|
#ifdef DEBUG_ENCODING |
|
fprintf(stderr, "Found registered handler for encoding %s\n", name); |
|
#endif |
return(handlers[i]); |
return(handlers[i]); |
|
} |
|
|
|
#ifdef LIBXML_ICONV_ENABLED |
|
/* check whether iconv can handle this */ |
|
icv_out = iconv_open("UTF-8", name); |
|
icv_in = iconv_open(name, "UTF-8"); |
|
if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) { |
|
enc = malloc(sizeof(xmlCharEncodingHandler)); |
|
enc->name = xmlMemStrdup(pseudoname); |
|
enc->input = NULL; |
|
enc->output = NULL; |
|
enc->iconv_in = icv_in; |
|
enc->iconv_out = icv_out; |
|
#ifdef DEBUG_ENCODING |
|
fprintf(stderr, "Found iconv handler for encoding %s\n", name); |
|
#endif |
|
return enc; |
|
} else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) { |
|
fprintf(stderr, "iconv : problems with filters for '%s'\n", name); |
|
} |
|
#endif /* LIBXML_ICONV_ENABLED */ |
|
#ifdef DEBUG_ENCODING |
|
fprintf(stderr, "No handler found for encoding %s\n", name); |
|
#endif |
return(NULL); |
return(NULL); |
} |
} |
|
|
|
#ifdef LIBXML_ICONV_ENABLED |
|
/** |
|
* xmlIconvWrapper: |
|
* @cd: iconv converter data structure |
|
* @out: a pointer to an array of bytes to store the result |
|
* @outlen: the length of @out |
|
* @in: a pointer to an array of ISO Latin 1 chars |
|
* @inlen: the length of @in |
|
* |
|
* Returns 0 if success, or |
|
* -1 by lack of space, or |
|
* -2 if the transcoding fails (for *in is not valid utf8 string or |
|
* the result of transformation can't fit into the encoding we want), or |
|
* -3 if there the last byte can't form a single output char. |
|
* |
|
* The value of @inlen after return is the number of octets consumed |
|
* as the return value is positive, else unpredictiable. |
|
* The value of @outlen after return is the number of ocetes consumed. |
|
*/ |
|
static int |
|
xmlIconvWrapper(iconv_t cd, |
|
unsigned char *out, int *outlen, |
|
const unsigned char *in, int *inlen) { |
|
|
|
size_t icv_inlen = *inlen, icv_outlen = *outlen; |
|
const char *icv_in = (const char *) in; |
|
char *icv_out = (char *) out; |
|
int ret; |
|
|
|
ret = iconv(cd, |
|
&icv_in, &icv_inlen, |
|
&icv_out, &icv_outlen); |
|
*inlen -= icv_inlen; |
|
*outlen -= icv_outlen; |
|
if (icv_inlen != 0 || ret == (size_t) -1) { |
|
#ifdef EILSEQ |
|
if (errno == EILSEQ) { |
|
return -3; |
|
} else |
|
#endif |
|
#ifdef E2BIG |
|
if (errno == E2BIG) { |
|
return -1; |
|
} else |
|
#endif |
|
#ifdef EINVAL |
|
if (errno == EINVAL) { |
|
return -2; |
|
} |
|
#endif |
|
else { |
|
return -3; |
|
} |
|
} |
|
return 0; |
|
} |
|
#endif /* LIBXML_ICONV_ENABLED */ |
|
|
|
/** |
|
* xmlCharEncInFunc: |
|
* @handler: char enconding transformation data structure |
|
* @out: a pointer to an array of bytes to store the result |
|
* @outlen: the length of @out |
|
* @in: a pointer to an array of ISO Latin 1 chars |
|
* @inlen: the length of @in |
|
* |
|
* Generic front-end for the encoding handler input function |
|
* |
|
* The value of @inlen after return is the number of octets consumed |
|
* as the return value is positive, else unpredictiable. |
|
* The value of @outlen after return is the number of ocetes consumed. |
|
* |
|
* Returns 0 if success, or |
|
* -1 by lack of space, or |
|
* -2 if the transcoding fails (for *in is not valid utf8 string or |
|
* the result of transformation can't fit into the encoding we want), or |
|
* -3 if there the last byte can't form a single output char. |
|
*/ |
|
int |
|
xmlCharEncInFunc(xmlCharEncodingHandler *handler, |
|
unsigned char *out, int *outlen, |
|
const unsigned char *in, int *inlen) { |
|
int ret = -2; |
|
|
|
if (handler->input != NULL) { |
|
ret = handler->input(out, *outlen, in, inlen); |
|
if (ret >= 0) { |
|
*outlen = ret; |
|
ret = 0; |
|
} |
|
} |
|
#ifdef LIBXML_ICONV_ENABLED |
|
else if (handler->iconv_out != NULL) { |
|
ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen); |
|
} |
|
#endif /* LIBXML_ICONV_ENABLED */ |
|
#ifdef DEBUG_ENCODING |
|
switch (ret) { |
|
case 0: |
|
fprintf(stderr, "converted %d bytes to %d bytes of input\n", |
|
*inlen, *outlen); |
|
break; |
|
case -1: |
|
fprintf(stderr, "input conversion failed by lack of space\n"); |
|
break; |
|
case -2: |
|
fprintf(stderr, "input conversion failed due to input error\n"); |
|
break; |
|
case -3: |
|
fprintf(stderr,"input conversion failed can't form input char\n"); |
|
break; |
|
default: |
|
fprintf(stderr,"Unknown input conversion failed %d\n", ret); |
|
} |
|
#endif |
|
return(ret); |
|
} |
|
|
|
/** |
|
* xmlCharEncOutFunc: |
|
* @handler: char enconding transformation data structure |
|
* @out: a pointer to an array of bytes to store the result |
|
* @outlen: the length of @out |
|
* @in: a pointer to an array of ISO Latin 1 chars |
|
* @inlen: the length of @in |
|
* |
|
* Generic front-end for hencoding handler output function |
|
* |
|
* The value of @inlen after return is the number of octets consumed |
|
* as the return value is positive, else unpredictiable. |
|
* The value of @outlen after return is the number of ocetes consumed. |
|
* |
|
* Returns 0 if success, or |
|
* -1 by lack of space, or |
|
* -2 if the transcoding fails (for *in is not valid utf8 string or |
|
* the result of transformation can't fit into the encoding we want), or |
|
* -3 if there the last byte can't form a single output char. |
|
*/ |
|
int |
|
xmlCharEncOutFunc(xmlCharEncodingHandler *handler, |
|
unsigned char* out, int *outlen, |
|
const unsigned char* in, int *inlen) { |
|
int ret = -2; |
|
|
|
if (handler->output != NULL) { |
|
ret = handler->output(out, *outlen, in, inlen); |
|
if (ret >= 0) { |
|
*outlen = ret; |
|
ret = 0; |
|
} |
|
} |
|
#ifdef LIBXML_ICONV_ENABLED |
|
else if (handler->iconv_out != NULL) { |
|
ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen); |
|
} |
|
#endif /* LIBXML_ICONV_ENABLED */ |
|
#ifdef DEBUG_ENCODING |
|
switch (ret) { |
|
case 0: |
|
fprintf(stderr, "converted %d bytes to %d bytes of output\n", |
|
*inlen, *outlen); |
|
break; |
|
case -1: |
|
fprintf(stderr, "output conversion failed by lack of space\n"); |
|
break; |
|
case -2: |
|
fprintf(stderr, "output conversion failed due to output error\n"); |
|
break; |
|
case -3: |
|
fprintf(stderr,"output conversion failed can't form output char\n"); |
|
break; |
|
default: |
|
fprintf(stderr,"Unknown output conversion failed %d\n", ret); |
|
} |
|
#endif |
|
return(ret); |
|
} |
|
|
|
/** |
|
* xmlCharEncCloseFunc: |
|
* @handler: char enconding transformation data structure |
|
* |
|
* Generic front-end for hencoding handler close function |
|
* |
|
* Returns 0 if success, or -1 in case of error |
|
*/ |
|
int |
|
xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) { |
|
int ret = 0; |
|
#ifdef LIBXML_ICONV_ENABLED |
|
if (handler->iconv_out != NULL) { |
|
if (iconv_close(handler->iconv_out)) |
|
ret = -1; |
|
} |
|
if (handler->iconv_in != NULL) { |
|
if (iconv_close(handler->iconv_in)) |
|
ret = -1; |
|
} |
|
#endif /* LIBXML_ICONV_ENABLED */ |
|
#ifdef DEBUG_ENCODING |
|
if (ret) |
|
fprintf(stderr, "failed to close the encoding handler\n"); |
|
else |
|
fprintf(stderr, "closed the encoding handler\n"); |
|
|
|
#endif |
|
return(ret); |
|
} |
|
|