Diff for /XML/encoding.c between versions 1.46 and 1.47

version 1.46, 2000/07/11 00:28:58 version 1.47, 2000/07/11 09:06:07
Line 178  xmlCheckUTF8(const unsigned char *utf) Line 178  xmlCheckUTF8(const unsigned char *utf)
 }  }
   
 /**  /**
    * asciiToUTF8:
    * @out:  a pointer to an array of bytes to store the result
    * @outlen:  the length of @out
    * @in:  a pointer to an array of ASCII chars
    * @inlen:  the length of @in
    *
    * Take a block of ASCII chars in and try to convert it to an UTF-8
    * block of chars out.
    * Returns 0 if success, or -1 otherwise
    * The value of @inlen after return is the number of octets consumed
    *     as the return value is positive, else unpredictiable.
    * The value of @outlen after return is the number of ocetes consumed.
    */
   int
   asciiToUTF8(unsigned char* out, int *outlen,
                 const unsigned char* in, int *inlen) {
       unsigned char* outstart = out;
       const unsigned char* base = in;
       const unsigned char* processed = in;
       unsigned char* outend = out + *outlen;
       const unsigned char* inend;
       unsigned int c;
       int bits;
   
       inend = in + (*inlen);
       while ((in < inend) && (out - outstart + 5 < *outlen)) {
           c= *in++;
   
           /* assertion: c is a single UTF-4 value */
           if (out >= outend)
               break;
           if      (c <    0x80) {  *out++=  c;                bits= -6; }
           else { 
               *outlen = out - outstart;
               *inlen = processed - base;
               return(-1);
           }
    
           for ( ; bits >= 0; bits-= 6) {
               if (out >= outend)
                   break;
               *out++= ((c >> bits) & 0x3F) | 0x80;
           }
           processed = (const unsigned char*) in;
       }
       *outlen = out - outstart;
       *inlen = processed - base;
       return(0);
   }
   
   /**
    * UTF8Toascii:
    * @out:  a pointer to an array of bytes to store the result
    * @outlen:  the length of @out
    * @in:  a pointer to an array of UTF-8 chars
    * @inlen:  the length of @in
    *
    * Take a block of UTF-8 chars in and try to convert it to an ASCII
    * block of chars out.
    *
    * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
    * The value of @inlen after return is the number of octets consumed
    *     as the return value is positive, else unpredictiable.
    * The value of @outlen after return is the number of ocetes consumed.
    */
   int
   UTF8Toascii(unsigned char* out, int *outlen,
                 const unsigned char* in, int *inlen) {
       const unsigned char* processed = in;
       const unsigned char* outend;
       const unsigned char* outstart = out;
       const unsigned char* instart = in;
       const unsigned char* inend;
       unsigned int c, d;
       int trailing;
   
       if (in == NULL) {
           /*
            * initialization nothing to do
            */
           *outlen = 0;
           *inlen = 0;
           return(0);
       }
       inend = in + (*inlen);
       outend = out + (*outlen);
       while (in < inend) {
           d = *in++;
           if      (d < 0x80)  { c= d; trailing= 0; }
           else if (d < 0xC0) {
               /* trailing byte in leading position */
               *outlen = out - outstart;
               *inlen = processed - instart;
               return(-2);
           } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
           else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
           else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
           else {
               /* no chance for this in Ascii */
               *outlen = out - outstart;
               *inlen = processed - instart;
               return(-2);
           }
   
           if (inend - in < trailing) {
               break;
           } 
   
           for ( ; trailing; trailing--) {
               if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
                   break;
               c <<= 6;
               c |= d & 0x3F;
           }
   
           /* assertion: c is a single UTF-4 value */
           if (c < 0x80) {
               if (out >= outend)
                   break;
               *out++ = c;
           } else {
               /* no chance for this in Ascii */
               *outlen = out - outstart;
               *inlen = processed - instart;
               return(-2);
           }
           processed = in;
       }
       *outlen = out - outstart;
       *inlen = processed - instart;
       return(0);
   }
   
   /**
  * isolat1ToUTF8:   * isolat1ToUTF8:
  * @out:  a pointer to an array of bytes to store the result   * @out:  a pointer to an array of bytes to store the result
  * @outlen:  the length of @out   * @outlen:  the length of @out
Line 233  isolat1ToUTF8(unsigned char* out, int *o Line 367  isolat1ToUTF8(unsigned char* out, int *o
  *   *
  * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1   * Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1
  * block of chars out.   * block of chars out.
  * TODO: UTF8Toisolat1 need a fallback mechanism ...  
  *   *
  * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise   * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
  * The value of @inlen after return is the number of octets consumed   * The value of @inlen after return is the number of octets consumed
Line 405  UTF16LEToUTF8(unsigned char* out, int *o Line 538  UTF16LEToUTF8(unsigned char* out, int *o
  *   *
  * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE   * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
  * block of chars out.   * block of chars out.
  * TODO: UTF8ToUTF16LE need a fallback mechanism ...  
  *   *
  * Returns the number of byte written, or -1 by lack of space, or -2   * Returns the number of byte written, or -1 by lack of space, or -2
  *     if the transcoding failed.    *     if the transcoding failed. 
Line 616  UTF16BEToUTF8(unsigned char* out, int *o Line 748  UTF16BEToUTF8(unsigned char* out, int *o
  *   *
  * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE   * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
  * block of chars out.   * block of chars out.
  * TODO: UTF8ToUTF16BE need a fallback mechanism ...  
  *   *
  * Returns the number of byte written, or -1 by lack of space, or -2   * Returns the number of byte written, or -1 by lack of space, or -2
  *     if the transcoding failed.    *     if the transcoding failed. 
Line 1011  xmlInitCharEncodingHandlers(void) { Line 1142  xmlInitCharEncodingHandlers(void) {
           xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);            xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
     xmlUTF16BEHandler =       xmlUTF16BEHandler = 
           xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);            xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
 #if 1  
     xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);      xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
 #endif      xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
 }  }
   
 /**  /**

Removed from v.1.46  
changed lines
  Added in v.1.47


Webmaster