Annotation of XML/encoding.c, revision 1.27
1.1 daniel 1: /*
2: * encoding.c : implements the encoding conversion functions needed for XML
3: *
4: * Related specs:
5: * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies
6: * [ISO-10646] UTF-8 and UTF-16 in Annexes
7: * [ISO-8859-1] ISO Latin-1 characters codes.
8: * [UNICODE] The Unicode Consortium, "The Unicode Standard --
9: * Worldwide Character Encoding -- Version 1.0", Addison-
10: * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is
11: * described in Unicode Technical Report #4.
12: * [US-ASCII] Coded Character Set--7-bit American Standard Code for
13: * Information Interchange, ANSI X3.4-1986.
14: *
1.9 daniel 15: * Original code for IsoLatin1 and UTF-16 by "Martin J. Duerst" <duerst@w3.org>
1.1 daniel 16: *
17: * See Copyright for the status of this software.
18: *
19: * Daniel.Veillard@w3.org
20: */
21:
1.21 daniel 22: #ifdef WIN32
23: #include "win32config.h"
24: #else
1.14 daniel 25: #include "config.h"
1.17 daniel 26: #endif
27:
28: #include <stdio.h>
29: #include <string.h>
30:
31: #ifdef HAVE_CTYPE_H
1.7 daniel 32: #include <ctype.h>
1.17 daniel 33: #endif
1.20 daniel 34: #ifdef HAVE_STDLIB_H
35: #include <stdlib.h>
36: #endif
1.1 daniel 37: #include "encoding.h"
1.16 daniel 38: #include "xmlmemory.h"
1.3 daniel 39:
1.25 daniel 40: xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
41: xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
42:
1.3 daniel 43: /*
44: * From rfc2044: encoding of the Unicode values on UTF-8:
45: *
46: * UCS-4 range (hex.) UTF-8 octet sequence (binary)
47: * 0000 0000-0000 007F 0xxxxxxx
48: * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
49: * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
50: *
51: * I hope we won't use values > 0xFFFF anytime soon !
52: */
1.1 daniel 53:
54: /**
1.22 daniel 55: * xmlCheckUTF8: Check utf-8 string for legality.
56: * @utf: Pointer to putative utf-8 encoded string.
57: *
58: * Checks @utf for being valid utf-8. @utf is assumed to be
59: * null-terminated. This function is not super-strict, as it will
60: * allow longer utf-8 sequences than necessary. Note that Java is
61: * capable of producing these sequences if provoked. Also note, this
62: * routine checks for the 4-byte maxiumum size, but does not check for
63: * 0x10ffff maximum value.
64: *
65: * Return value: true if @utf is valid.
66: **/
67: int
68: xmlCheckUTF8(const unsigned char *utf)
69: {
70: int ix;
71: unsigned char c;
72:
73: for (ix = 0; (c = utf[ix]);) {
74: if (c & 0x80) {
75: if ((utf[ix + 1] & 0xc0) != 0x80)
76: return(0);
77: if ((c & 0xe0) == 0xe0) {
78: if ((utf[ix + 2] & 0xc0) != 0x80)
79: return(0);
80: if ((c & 0xf0) == 0xf0) {
81: if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
82: return(0);
83: ix += 4;
84: /* 4-byte code */
85: } else
86: /* 3-byte code */
87: ix += 3;
88: } else
89: /* 2-byte code */
90: ix += 2;
91: } else
92: /* 1-byte code */
93: ix++;
94: }
95: return(1);
96: }
97:
98: /**
1.1 daniel 99: * isolat1ToUTF8:
1.18 daniel 100: * @out: a pointer to an array of bytes to store the result
101: * @outlen: the length of @out
102: * @in: a pointer to an array of ISO Latin 1 chars
103: * @inlen: the length of @in
1.1 daniel 104: *
105: * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
106: * block of chars out.
1.6 daniel 107: * Returns the number of byte written, or -1 by lack of space.
1.1 daniel 108: */
109: int
1.25 daniel 110: isolat1ToUTF8(unsigned char* out, int outlen,
111: const unsigned char* in, int *inlen) {
1.1 daniel 112: unsigned char* outstart= out;
113: unsigned char* outend= out+outlen;
1.25 daniel 114: const unsigned char* inend= in+*inlen;
1.1 daniel 115: unsigned char c;
116:
117: while (in < inend) {
118: c= *in++;
119: if (c < 0x80) {
120: if (out >= outend) return -1;
121: *out++ = c;
122: }
123: else {
124: if (out >= outend) return -1;
125: *out++ = 0xC0 | (c >> 6);
126: if (out >= outend) return -1;
127: *out++ = 0x80 | (0x3F & c);
128: }
129: }
130: return out-outstart;
131: }
132:
133: /**
134: * UTF8Toisolat1:
1.18 daniel 135: * @out: a pointer to an array of bytes to store the result
136: * @outlen: the length of @out
137: * @in: a pointer to an array of UTF-8 chars
138: * @inlen: the length of @in
1.1 daniel 139: *
140: * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
141: * block of chars out.
1.15 daniel 142: * TODO: UTF8Toisolat1 need a fallback mechanism ...
143: *
1.6 daniel 144: * Returns the number of byte written, or -1 by lack of space, or -2
1.23 daniel 145: * if the transcoding faile (for *in is not valid utf8 string or
146: * the result of transformation can't fit into the encoding we want)
1.1 daniel 147: */
148: int
1.25 daniel 149: UTF8Toisolat1(unsigned char* out, int outlen,
150: const unsigned char* in, int *inlen) {
1.1 daniel 151: unsigned char* outstart= out;
152: unsigned char* outend= out+outlen;
1.25 daniel 153: const unsigned char* inend= in+*inlen;
1.1 daniel 154: unsigned char c;
155:
156: while (in < inend) {
157: c= *in++;
158: if (c < 0x80) {
159: if (out >= outend) return -1;
160: *out++= c;
161: }
1.23 daniel 162: else if (in == inend) {
163: *inlen -= 1;
164: break;
165: }
166: else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
167: /* a two byte utf-8 and can be encoding as isolate1 */
1.1 daniel 168: *out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
1.23 daniel 169: }
170: else return -2;
171: /* TODO : some should be represent as "&#x____;" */
1.1 daniel 172: }
173: return out-outstart;
174: }
175:
176: /**
177: * UTF16ToUTF8:
1.18 daniel 178: * @out: a pointer to an array of bytes to store the result
179: * @outlen: the length of @out
1.25 daniel 180: * @inb: a pointer to an array of UTF-16 passwd as a byte array
181: * @inlenb: the length of @in in UTF-16 chars
1.1 daniel 182: *
183: * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
184: * block of chars out.
1.25 daniel 185: *
1.6 daniel 186: * Returns the number of byte written, or -1 by lack of space.
1.1 daniel 187: */
188: int
1.25 daniel 189: UTF16ToUTF8(unsigned char* out, int outlen,
190: const unsigned char* inb, int *inlenb)
1.1 daniel 191: {
192: unsigned char* outstart= out;
193: unsigned char* outend= out+outlen;
1.25 daniel 194: unsigned short* in = (unsigned short*) inb;
195: unsigned short* inend;
196: unsigned int c, d, inlen;
1.1 daniel 197: int bits;
198:
1.25 daniel 199: inlen = *inlenb / 2;
200: inend= in + inlen;
1.1 daniel 201: while (in < inend) {
202: c= *in++;
203: if ((c & 0xFC00) == 0xD800) { /* surrogates */
204: if ((in<inend) && (((d=*in++) & 0xFC00) == 0xDC00)) {
205: c &= 0x03FF;
206: c <<= 10;
207: c |= d & 0x03FF;
208: c += 0x10000;
209: }
1.27 ! daniel 210: else
! 211: return -1;
1.1 daniel 212: }
213:
1.25 daniel 214: /* assertion: c is a single UTF-4 value */
1.27 ! daniel 215: if (out >= outend)
! 216: return -1;
1.1 daniel 217: if (c < 0x80) { *out++= c; bits= -6; }
1.26 daniel 218: else if (c < 0x800) { *out++= ((c >> 6) & 0x1F) | 0xC0; bits= 0; }
219: else if (c < 0x10000) { *out++= ((c >> 12) & 0x0F) | 0xE0; bits= 6; }
220: else { *out++= ((c >> 18) & 0x07) | 0xF0; bits= 12; }
1.1 daniel 221:
1.26 daniel 222: for ( ; bits >= 0; bits-= 6) {
1.27 ! daniel 223: if (out >= outend)
! 224: return -1;
1.26 daniel 225: *out++= ((c >> bits) & 0x3F) | 0x80;
1.1 daniel 226: }
227: }
228: return out-outstart;
229: }
230:
231: /**
232: * UTF8ToUTF16:
1.25 daniel 233: * @outb: a pointer to an array of bytes to store the result
234: * @outlen: the length of @outb
1.18 daniel 235: * @in: a pointer to an array of UTF-8 chars
236: * @inlen: the length of @in
1.1 daniel 237: *
238: * Take a block of UTF-8 chars in and try to convert it to an UTF-16
239: * block of chars out.
1.15 daniel 240: * TODO: UTF8ToUTF16 need a fallback mechanism ...
241: *
1.6 daniel 242: * Returns the number of byte written, or -1 by lack of space, or -2
1.25 daniel 243: * if the transcoding failed.
1.1 daniel 244: */
245: int
1.25 daniel 246: UTF8ToUTF16(unsigned char* outb, int outlen,
247: const unsigned char* in, int *inlen)
1.1 daniel 248: {
1.25 daniel 249: unsigned short* out = (unsigned short*) outb;
1.1 daniel 250: unsigned short* outstart= out;
251: unsigned short* outend= out+outlen;
1.25 daniel 252: const unsigned char* inend= in+*inlen;
1.1 daniel 253: unsigned int c, d, trailing;
254:
1.25 daniel 255: outlen /= 2; /* convert in short length */
1.1 daniel 256: while (in < inend) {
257: d= *in++;
258: if (d < 0x80) { c= d; trailing= 0; }
259: else if (d < 0xC0) return -2; /* trailing byte in leading position */
260: else if (d < 0xE0) { c= d & 0x1F; trailing= 1; }
261: else if (d < 0xF0) { c= d & 0x0F; trailing= 2; }
262: else if (d < 0xF8) { c= d & 0x07; trailing= 3; }
263: else return -2; /* no chance for this in UTF-16 */
264:
265: for ( ; trailing; trailing--) {
266: if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80)) return -1;
267: c <<= 6;
268: c |= d & 0x3F;
269: }
270:
271: /* assertion: c is a single UTF-4 value */
272: if (c < 0x10000) {
273: if (out >= outend) return -1;
274: *out++ = c;
275: }
276: else if (c < 0x110000) {
277: if (out+1 >= outend) return -1;
278: c -= 0x10000;
279: *out++ = 0xD800 | (c >> 10);
280: *out++ = 0xDC00 | (c & 0x03FF);
281: }
282: else return -1;
283: }
284: return out-outstart;
285: }
286:
1.7 daniel 287: /**
288: * xmlDetectCharEncoding:
289: * @in: a pointer to the first bytes of the XML entity, must be at least
290: * 4 bytes long.
1.25 daniel 291: * @len: pointer to the length of the buffer
1.7 daniel 292: *
293: * Guess the encoding of the entity using the first bytes of the entity content
294: * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
295: *
296: * Returns one of the XML_CHAR_ENCODING_... values.
297: */
298: xmlCharEncoding
1.25 daniel 299: xmlDetectCharEncoding(const unsigned char* in, int len)
1.7 daniel 300: {
1.25 daniel 301: if (len >= 4) {
302: if ((in[0] == 0x00) && (in[1] == 0x00) &&
303: (in[2] == 0x00) && (in[3] == 0x3C))
304: return(XML_CHAR_ENCODING_UCS4BE);
305: if ((in[0] == 0x3C) && (in[1] == 0x00) &&
306: (in[2] == 0x00) && (in[3] == 0x00))
307: return(XML_CHAR_ENCODING_UCS4LE);
308: if ((in[0] == 0x00) && (in[1] == 0x00) &&
309: (in[2] == 0x3C) && (in[3] == 0x00))
310: return(XML_CHAR_ENCODING_UCS4_2143);
311: if ((in[0] == 0x00) && (in[1] == 0x3C) &&
312: (in[2] == 0x00) && (in[3] == 0x00))
313: return(XML_CHAR_ENCODING_UCS4_3412);
314: if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
315: (in[2] == 0xA7) && (in[3] == 0x94))
316: return(XML_CHAR_ENCODING_EBCDIC);
317: if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
318: (in[2] == 0x78) && (in[3] == 0x6D))
319: return(XML_CHAR_ENCODING_UTF8);
320: }
321: if (len >= 2) {
322: if ((in[0] == 0xFE) && (in[1] == 0xFF))
323: return(XML_CHAR_ENCODING_UTF16BE);
324: if ((in[0] == 0xFF) && (in[1] == 0xFE))
325: return(XML_CHAR_ENCODING_UTF16LE);
326: }
1.7 daniel 327: return(XML_CHAR_ENCODING_NONE);
328: }
329:
330: /**
331: * xmlParseCharEncoding:
1.18 daniel 332: * @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
1.7 daniel 333: *
334: * Conpare the string to the known encoding schemes already known. Note
335: * that the comparison is case insensitive accordingly to the section
336: * [XML] 4.3.3 Character Encoding in Entities.
337: *
338: * Returns one of the XML_CHAR_ENCODING_... values or XML_CHAR_ENCODING_NONE
339: * if not recognized.
340: */
341: xmlCharEncoding
1.8 daniel 342: xmlParseCharEncoding(const char* name)
1.7 daniel 343: {
344: char upper[500];
345: int i;
346:
347: for (i = 0;i < 499;i++) {
348: upper[i] = toupper(name[i]);
349: if (upper[i] == 0) break;
350: }
351: upper[i] = 0;
352:
353: if (!strcmp(upper, "")) return(XML_CHAR_ENCODING_NONE);
354: if (!strcmp(upper, "UTF-8")) return(XML_CHAR_ENCODING_UTF8);
355: if (!strcmp(upper, "UTF8")) return(XML_CHAR_ENCODING_UTF8);
356:
357: /*
358: * NOTE: if we were able to parse this, the endianness of UTF16 is
359: * already found and in use
360: */
361: if (!strcmp(upper, "UTF-16")) return(XML_CHAR_ENCODING_UTF16LE);
362: if (!strcmp(upper, "UTF16")) return(XML_CHAR_ENCODING_UTF16LE);
363:
364: if (!strcmp(upper, "ISO-10646-UCS-2")) return(XML_CHAR_ENCODING_UCS2);
365: if (!strcmp(upper, "UCS-2")) return(XML_CHAR_ENCODING_UCS2);
366: if (!strcmp(upper, "UCS2")) return(XML_CHAR_ENCODING_UCS2);
367:
368: /*
369: * NOTE: if we were able to parse this, the endianness of UCS4 is
370: * already found and in use
371: */
372: if (!strcmp(upper, "ISO-10646-UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
373: if (!strcmp(upper, "UCS-4")) return(XML_CHAR_ENCODING_UCS4LE);
374: if (!strcmp(upper, "UCS4")) return(XML_CHAR_ENCODING_UCS4LE);
375:
376:
377: if (!strcmp(upper, "ISO-8859-1")) return(XML_CHAR_ENCODING_8859_1);
378: if (!strcmp(upper, "ISO-LATIN-1")) return(XML_CHAR_ENCODING_8859_1);
379: if (!strcmp(upper, "ISO LATIN 1")) return(XML_CHAR_ENCODING_8859_1);
380:
381: if (!strcmp(upper, "ISO-8859-2")) return(XML_CHAR_ENCODING_8859_2);
382: if (!strcmp(upper, "ISO-LATIN-2")) return(XML_CHAR_ENCODING_8859_2);
383: if (!strcmp(upper, "ISO LATIN 2")) return(XML_CHAR_ENCODING_8859_2);
384:
385: if (!strcmp(upper, "ISO-8859-3")) return(XML_CHAR_ENCODING_8859_3);
386: if (!strcmp(upper, "ISO-8859-4")) return(XML_CHAR_ENCODING_8859_4);
387: if (!strcmp(upper, "ISO-8859-5")) return(XML_CHAR_ENCODING_8859_5);
388: if (!strcmp(upper, "ISO-8859-6")) return(XML_CHAR_ENCODING_8859_6);
389: if (!strcmp(upper, "ISO-8859-7")) return(XML_CHAR_ENCODING_8859_7);
390: if (!strcmp(upper, "ISO-8859-8")) return(XML_CHAR_ENCODING_8859_8);
391: if (!strcmp(upper, "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
392:
393: if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
394: if (!strcmp(upper, "Shift_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
395: if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
396: return(XML_CHAR_ENCODING_ERROR);
397: }
1.9 daniel 398:
399: /****************************************************************
400: * *
401: * Char encoding handlers *
402: * *
403: ****************************************************************/
404:
405: /* the size should be growable, but it's not a big deal ... */
406: #define MAX_ENCODING_HANDLERS 50
407: static xmlCharEncodingHandlerPtr *handlers = NULL;
408: static int nbCharEncodingHandler = 0;
409:
410: /*
411: * The default is UTF-8 for XML, that's also the default used for the
412: * parser internals, so the default encoding handler is NULL
413: */
414:
415: static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
416:
417: /**
418: * xmlNewCharEncodingHandler:
1.18 daniel 419: * @name: the encoding name, in UTF-8 format (ASCII actually)
1.9 daniel 420: * @input: the xmlCharEncodingInputFunc to read that encoding
421: * @output: the xmlCharEncodingOutputFunc to write that encoding
422: *
423: * Create and registers an xmlCharEncodingHandler.
424: * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
425: */
426: xmlCharEncodingHandlerPtr
1.25 daniel 427: xmlNewCharEncodingHandler(const char *name,
428: xmlCharEncodingInputFunc input,
1.9 daniel 429: xmlCharEncodingOutputFunc output) {
430: xmlCharEncodingHandlerPtr handler;
431: char upper[500];
432: int i;
433: char *up = 0;
434:
435: /*
436: * Keep only the uppercase version of the encoding.
437: */
438: if (name == NULL) {
439: fprintf(stderr, "xmlNewCharEncodingHandler : no name !\n");
440: return(NULL);
441: }
442: for (i = 0;i < 499;i++) {
443: upper[i] = toupper(name[i]);
444: if (upper[i] == 0) break;
445: }
446: upper[i] = 0;
1.16 daniel 447: up = xmlMemStrdup(upper);
1.9 daniel 448: if (up == NULL) {
449: fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
450: return(NULL);
451: }
452:
453: /*
454: * allocate and fill-up an handler block.
455: */
456: handler = (xmlCharEncodingHandlerPtr)
1.16 daniel 457: xmlMalloc(sizeof(xmlCharEncodingHandler));
1.9 daniel 458: if (handler == NULL) {
459: fprintf(stderr, "xmlNewCharEncodingHandler : out of memory !\n");
460: return(NULL);
461: }
462: handler->input = input;
463: handler->output = output;
464: handler->name = up;
465:
466: /*
467: * registers and returns the handler.
468: */
469: xmlRegisterCharEncodingHandler(handler);
470: return(handler);
471: }
472:
473: /**
474: * xmlInitCharEncodingHandlers:
475: *
476: * Initialize the char encoding support, it registers the default
477: * encoding supported.
1.18 daniel 478: * NOTE: while public, this function usually doesn't need to be called
1.9 daniel 479: * in normal processing.
480: */
481: void
482: xmlInitCharEncodingHandlers(void) {
483: if (handlers != NULL) return;
484:
485: handlers = (xmlCharEncodingHandlerPtr *)
1.16 daniel 486: xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
1.9 daniel 487:
488: if (handlers == NULL) {
489: fprintf(stderr, "xmlInitCharEncodingHandlers : out of memory !\n");
490: return;
491: }
1.10 daniel 492: xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
1.25 daniel 493: xmlUTF16LEHandler =
494: xmlNewCharEncodingHandler("UTF-16LE", UTF16ToUTF8, UTF8ToUTF16);
1.10 daniel 495: xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
1.9 daniel 496: }
497:
498: /**
1.19 daniel 499: * xmlCleanupCharEncodingHandlers:
500: *
501: * Cleanup the memory allocated for the char encoding support, it
502: * unregisters all the encoding handlers.
503: */
504: void
505: xmlCleanupCharEncodingHandlers(void) {
506: if (handlers == NULL) return;
507:
508: for (;nbCharEncodingHandler > 0;) {
509: nbCharEncodingHandler--;
510: if (handlers[nbCharEncodingHandler] != NULL) {
511: xmlFree(handlers[nbCharEncodingHandler]->name);
512: xmlFree(handlers[nbCharEncodingHandler]);
513: }
514: }
515: xmlFree(handlers);
516: handlers = NULL;
517: nbCharEncodingHandler = 0;
518: xmlDefaultCharEncodingHandler = NULL;
519: }
520:
521: /**
1.9 daniel 522: * xmlRegisterCharEncodingHandler:
523: * @handler: the xmlCharEncodingHandlerPtr handler block
524: *
525: * Register the char encoding handler, surprizing, isn't it ?
526: */
527: void
528: xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
529: if (handlers == NULL) xmlInitCharEncodingHandlers();
530: if (handler == NULL) {
531: fprintf(stderr, "xmlRegisterCharEncodingHandler: NULL handler !\n");
532: return;
533: }
534:
535: if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
536: fprintf(stderr,
537: "xmlRegisterCharEncodingHandler: Too many handler registered\n");
538: fprintf(stderr, "\tincrease MAX_ENCODING_HANDLERS : %s\n", __FILE__);
539: return;
540: }
541: handlers[nbCharEncodingHandler++] = handler;
542: }
543:
544: /**
545: * xmlGetCharEncodingHandler:
546: * @enc: an xmlCharEncoding value.
547: *
548: * Search in the registrered set the handler able to read/write that encoding.
549: *
550: * Returns the handler or NULL if not found
551: */
552: xmlCharEncodingHandlerPtr
553: xmlGetCharEncodingHandler(xmlCharEncoding enc) {
554: if (handlers == NULL) xmlInitCharEncodingHandlers();
1.25 daniel 555: switch (enc) {
556: case XML_CHAR_ENCODING_ERROR:
557: return(NULL);
558: case XML_CHAR_ENCODING_NONE:
559: return(NULL);
560: case XML_CHAR_ENCODING_UTF8:
561: return(NULL);
562: case XML_CHAR_ENCODING_UTF16LE:
563: return(xmlUTF16LEHandler);
564: case XML_CHAR_ENCODING_UTF16BE:
565: return(xmlUTF16BEHandler);
566: case XML_CHAR_ENCODING_EBCDIC:
567: return(NULL);
568: case XML_CHAR_ENCODING_UCS4LE:
569: return(NULL);
570: case XML_CHAR_ENCODING_UCS4BE:
571: return(NULL);
572: case XML_CHAR_ENCODING_UCS4_2143:
573: return(NULL);
574: case XML_CHAR_ENCODING_UCS4_3412:
575: return(NULL);
576: case XML_CHAR_ENCODING_UCS2:
577: return(NULL);
578: case XML_CHAR_ENCODING_8859_1:
579: return(NULL);
580: case XML_CHAR_ENCODING_8859_2:
581: return(NULL);
582: case XML_CHAR_ENCODING_8859_3:
583: return(NULL);
584: case XML_CHAR_ENCODING_8859_4:
585: return(NULL);
586: case XML_CHAR_ENCODING_8859_5:
587: return(NULL);
588: case XML_CHAR_ENCODING_8859_6:
589: return(NULL);
590: case XML_CHAR_ENCODING_8859_7:
591: return(NULL);
592: case XML_CHAR_ENCODING_8859_8:
593: return(NULL);
594: case XML_CHAR_ENCODING_8859_9:
595: return(NULL);
596: case XML_CHAR_ENCODING_2022_JP:
597: case XML_CHAR_ENCODING_SHIFT_JIS:
598: case XML_CHAR_ENCODING_EUC_JP:
599: return(NULL);
600: }
1.9 daniel 601: return(NULL);
602: }
603:
604: /**
605: * xmlGetCharEncodingHandler:
606: * @enc: a string describing the char encoding.
607: *
608: * Search in the registrered set the handler able to read/write that encoding.
609: *
610: * Returns the handler or NULL if not found
611: */
612: xmlCharEncodingHandlerPtr
613: xmlFindCharEncodingHandler(const char *name) {
614: char upper[500];
615: int i;
616:
617: if (handlers == NULL) xmlInitCharEncodingHandlers();
618: if (name == NULL) return(xmlDefaultCharEncodingHandler);
619: if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
620:
621: for (i = 0;i < 499;i++) {
622: upper[i] = toupper(name[i]);
623: if (upper[i] == 0) break;
624: }
625: upper[i] = 0;
626:
627: for (i = 0;i < nbCharEncodingHandler; i++)
628: if (!strcmp(name, handlers[i]->name))
629: return(handlers[i]);
630:
631: return(NULL);
632: }
633:
Webmaster