XML/encoding.c - diff

Return to encoding.c CVS log

Up to [Public] / XML

Diff for /XML/encoding.c between versions 1.46 and 1.47

-version 1.46, 2000/07/11 00:28:58
+version 1.47, 2000/07/11 09:06:07
  Line 178  xmlCheckUTF8(const unsigned char *utf)
  }
  /**
+  * asciiToUTF8:
+  * @out:  a pointer to an array of bytes to store the result
+  * @outlen:  the length of @out
+  * @in:  a pointer to an array of ASCII chars
+  * @inlen:  the length of @in
+  *
+  * Take a block of ASCII chars in and try to convert it to an UTF-8
+  * block of chars out.
+  * Returns 0 if success, or -1 otherwise
+  * The value of @inlen after return is the number of octets consumed
+  *     as the return value is positive, else unpredictiable.
+  * The value of @outlen after return is the number of ocetes consumed.
+  */
+ int
+ asciiToUTF8(unsigned char* out, int *outlen,
+               const unsigned char* in, int *inlen) {
+     unsigned char* outstart = out;
+     const unsigned char* base = in;
+     const unsigned char* processed = in;
+     unsigned char* outend = out + *outlen;
+     const unsigned char* inend;
+     unsigned int c;
+     int bits;
+     inend = in + (*inlen);
+     while ((in < inend) && (out - outstart + 5 < *outlen)) {
+         c= *in++;
+         /* assertion: c is a single UTF-4 value */
+         if (out >= outend)
+             break;
+         if      (c <    0x80) {  *out++=  c;                bits= -6; }
+         else {
+             *outlen = out - outstart;
+             *inlen = processed - base;
+             return(-1);
+         }
+         for ( ; bits >= 0; bits-= 6) {
+             if (out >= outend)
+                 break;
+             *out++= ((c >> bits) & 0x3F) | 0x80;
+         }
+         processed = (const unsigned char*) in;
+     }
+     *outlen = out - outstart;
+     *inlen = processed - base;
+     return(0);
+ }
+ /**
+  * UTF8Toascii:
+  * @out:  a pointer to an array of bytes to store the result
+  * @outlen:  the length of @out
+  * @in:  a pointer to an array of UTF-8 chars
+  * @inlen:  the length of @in
+  *
+  * Take a block of UTF-8 chars in and try to convert it to an ASCII
+  * block of chars out.
+  *
+  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
+  * The value of @inlen after return is the number of octets consumed
+  *     as the return value is positive, else unpredictiable.
+  * The value of @outlen after return is the number of ocetes consumed.
+  */
+ int
+ UTF8Toascii(unsigned char* out, int *outlen,
+               const unsigned char* in, int *inlen) {
+     const unsigned char* processed = in;
+     const unsigned char* outend;
+     const unsigned char* outstart = out;
+     const unsigned char* instart = in;
+     const unsigned char* inend;
+     unsigned int c, d;
+     int trailing;
+     if (in == NULL) {
+         /*
+          * initialization nothing to do
+          */
+         *outlen = 0;
+         *inlen = 0;
+         return(0);
+     }
+     inend = in + (*inlen);
+     outend = out + (*outlen);
+     while (in < inend) {
+         d = *in++;
+         if      (d < 0x80)  { c= d; trailing= 0; }
+         else if (d < 0xC0) {
+             /* trailing byte in leading position */
+             *outlen = out - outstart;
+             *inlen = processed - instart;
+             return(-2);
+         } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
+         else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
+         else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
+         else {
+             /* no chance for this in Ascii */
+             *outlen = out - outstart;
+             *inlen = processed - instart;
+             return(-2);
+         }
+         if (inend - in < trailing) {
+             break;
+         }
+         for ( ; trailing; trailing--) {
+             if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
+                 break;
+             c <<= 6;
+             c |= d & 0x3F;
+         }
+         /* assertion: c is a single UTF-4 value */
+         if (c < 0x80) {
+             if (out >= outend)
+                 break;
+             *out++ = c;
+         } else {
+             /* no chance for this in Ascii */
+             *outlen = out - outstart;
+             *inlen = processed - instart;
+             return(-2);
+         }
+         processed = in;
+     }
+     *outlen = out - outstart;
+     *inlen = processed - instart;
+     return(0);
+ }
+ /**
   * isolat1ToUTF8:
   * @out:  a pointer to an array of bytes to store the result
   * @outlen:  the length of @out
- Line 233  isolat1ToUTF8(unsigned char* out, int *o
+ Line 367  isolat1ToUTF8(unsigned char* out, int *o
   *
   * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
   * block of chars out.
-  * TODO: UTF8Toisolat1 need a fallback mechanism ...
   *
   * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
   * The value of @inlen after return is the number of octets consumed
- Line 405  UTF16LEToUTF8(unsigned char* out, int *o
+ Line 538  UTF16LEToUTF8(unsigned char* out, int *o
   *
   * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
   * block of chars out.
-  * TODO: UTF8ToUTF16LE need a fallback mechanism ...
   *
   * Returns the number of byte written, or -1 by lack of space, or -2
   *     if the transcoding failed.
- Line 616  UTF16BEToUTF8(unsigned char* out, int *o
+ Line 748  UTF16BEToUTF8(unsigned char* out, int *o
   *
   * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
   * block of chars out.
-  * TODO: UTF8ToUTF16BE need a fallback mechanism ...
   *
   * Returns the number of byte written, or -1 by lack of space, or -2
   *     if the transcoding failed.
- Line 1011  xmlInitCharEncodingHandlers(void) {
+ Line 1142  xmlInitCharEncodingHandlers(void) {
            xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
      xmlUTF16BEHandler =
            xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
- #if 1
      xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
- #endif
+     xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
  }
  /**

Webmaster

Removed from v.1.46
changed lines
	Added in v.1.47