Diff for /XML/encoding.c between versions 1.29 and 1.30

version 1.29, 2000/04/03 18:45:48 version 1.30, 2000/04/30 09:10:18
Line 34 Line 34
 #ifdef HAVE_STDLIB_H  #ifdef HAVE_STDLIB_H
 #include <stdlib.h>  #include <stdlib.h>
 #endif  #endif
   #include <libxml/xmlversion.h>
   #ifdef LIBXML_ICONV_ENABLED
   #ifdef HAVE_ERRNO_H
   #include <errno.h>
   #endif
   #endif
 #include <libxml/encoding.h>  #include <libxml/encoding.h>
 #include <libxml/xmlmemory.h>  #include <libxml/xmlmemory.h>
   
 xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;  xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
 xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;  xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
   
   #ifdef LIBXML_ICONV_ENABLED
   #define DEBUG_ENCODING  /* Define this to get encoding traces */
   #endif
   
 /*  /*
  * From rfc2044: encoding of the Unicode values on UTF-8:   * From rfc2044: encoding of the Unicode values on UTF-8:
  *   *
Line 636  xmlParseCharEncoding(const char* name) Line 646  xmlParseCharEncoding(const char* name)
     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);      if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
   
     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);      if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
     if (!strcmp(upper, "Shift_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);      if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);      if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
   
   #ifdef DEBUG_ENCODING
       fprintf(stderr, "Unknown encoding %s\n", name);
   #endif
     return(XML_CHAR_ENCODING_ERROR);      return(XML_CHAR_ENCODING_ERROR);
 }  }
   
Line 712  xmlNewCharEncodingHandler(const char *na Line 726  xmlNewCharEncodingHandler(const char *na
      * registers and returns the handler.       * registers and returns the handler.
      */       */
     xmlRegisterCharEncodingHandler(handler);      xmlRegisterCharEncodingHandler(handler);
   #ifdef DEBUG_ENCODING
       fprintf(stderr, "Registered encoding handler for %s\n", name);
   #endif
     return(handler);      return(handler);
 }  }
   
Line 798  xmlRegisterCharEncodingHandler(xmlCharEn Line 815  xmlRegisterCharEncodingHandler(xmlCharEn
  */   */
 xmlCharEncodingHandlerPtr  xmlCharEncodingHandlerPtr
 xmlGetCharEncodingHandler(xmlCharEncoding enc) {  xmlGetCharEncodingHandler(xmlCharEncoding enc) {
       xmlCharEncodingHandlerPtr handler;
   
     if (handlers == NULL) xmlInitCharEncodingHandlers();      if (handlers == NULL) xmlInitCharEncodingHandlers();
     switch (enc) {      switch (enc) {
         case XML_CHAR_ENCODING_ERROR:          case XML_CHAR_ENCODING_ERROR:
Line 811  xmlGetCharEncodingHandler(xmlCharEncodin Line 830  xmlGetCharEncodingHandler(xmlCharEncodin
         case XML_CHAR_ENCODING_UTF16BE:          case XML_CHAR_ENCODING_UTF16BE:
             return(xmlUTF16BEHandler);              return(xmlUTF16BEHandler);
         case XML_CHAR_ENCODING_EBCDIC:          case XML_CHAR_ENCODING_EBCDIC:
             return(NULL);              handler = xmlFindCharEncodingHandler("EBCDIC");
               if (handler != NULL) return(handler);
               handler = xmlFindCharEncodingHandler("ebcdic");
               if (handler != NULL) return(handler);
               break;
         case XML_CHAR_ENCODING_UCS4LE:          case XML_CHAR_ENCODING_UCS4LE:
             return(NULL);              handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
               if (handler != NULL) return(handler);
               handler = xmlFindCharEncodingHandler("UCS-4");
               if (handler != NULL) return(handler);
               handler = xmlFindCharEncodingHandler("UCS4");
               if (handler != NULL) return(handler);
               break;
         case XML_CHAR_ENCODING_UCS4BE:          case XML_CHAR_ENCODING_UCS4BE:
             return(NULL);              handler = xmlFindCharEncodingHandler("UCS4BE");
               if (handler != NULL) return(handler);
               break;
         case XML_CHAR_ENCODING_UCS4_2143:          case XML_CHAR_ENCODING_UCS4_2143:
             return(NULL);              break;
         case XML_CHAR_ENCODING_UCS4_3412:          case XML_CHAR_ENCODING_UCS4_3412:
             return(NULL);              break;
         case XML_CHAR_ENCODING_UCS2:          case XML_CHAR_ENCODING_UCS2:
             return(NULL);              handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
               if (handler != NULL) return(handler);
               handler = xmlFindCharEncodingHandler("UCS-2");
               if (handler != NULL) return(handler);
               handler = xmlFindCharEncodingHandler("UCS2");
               if (handler != NULL) return(handler);
               break;
         case XML_CHAR_ENCODING_8859_1:          case XML_CHAR_ENCODING_8859_1:
             return(NULL);  
         case XML_CHAR_ENCODING_8859_2:          case XML_CHAR_ENCODING_8859_2:
             return(NULL);  
         case XML_CHAR_ENCODING_8859_3:          case XML_CHAR_ENCODING_8859_3:
             return(NULL);  
         case XML_CHAR_ENCODING_8859_4:          case XML_CHAR_ENCODING_8859_4:
             return(NULL);  
         case XML_CHAR_ENCODING_8859_5:          case XML_CHAR_ENCODING_8859_5:
             return(NULL);  
         case XML_CHAR_ENCODING_8859_6:          case XML_CHAR_ENCODING_8859_6:
             return(NULL);  
         case XML_CHAR_ENCODING_8859_7:          case XML_CHAR_ENCODING_8859_7:
             return(NULL);  
         case XML_CHAR_ENCODING_8859_8:          case XML_CHAR_ENCODING_8859_8:
             return(NULL);  
         case XML_CHAR_ENCODING_8859_9:          case XML_CHAR_ENCODING_8859_9:
             return(NULL);              return(NULL);
         case XML_CHAR_ENCODING_2022_JP:          case XML_CHAR_ENCODING_2022_JP:
               handler = xmlFindCharEncodingHandler("ISO-2022-JP");
               if (handler != NULL) return(handler);
               break;
         case XML_CHAR_ENCODING_SHIFT_JIS:          case XML_CHAR_ENCODING_SHIFT_JIS:
               handler = xmlFindCharEncodingHandler("SHIFT-JIS");
               if (handler != NULL) return(handler);
               handler = xmlFindCharEncodingHandler("SHIFT_JIS");
               if (handler != NULL) return(handler);
               handler = xmlFindCharEncodingHandler("Shift_JIS");
               if (handler != NULL) return(handler);
               break;
         case XML_CHAR_ENCODING_EUC_JP:          case XML_CHAR_ENCODING_EUC_JP:
             return(NULL);              handler = xmlFindCharEncodingHandler("EUC-JP");
               if (handler != NULL) return(handler);
               break;
           default: 
               break;
     }      }
       
   #ifdef DEBUG_ENCODING
       fprintf(stderr, "No handler found for encoding %d\n", enc);
   #endif
     return(NULL);      return(NULL);
 }  }
   
Line 858  xmlGetCharEncodingHandler(xmlCharEncodin Line 905  xmlGetCharEncodingHandler(xmlCharEncodin
  */   */
 xmlCharEncodingHandlerPtr  xmlCharEncodingHandlerPtr
 xmlFindCharEncodingHandler(const char *name) {  xmlFindCharEncodingHandler(const char *name) {
     char upper[500];  #ifdef LIBXML_ICONV_ENABLED
       char pseudoname[150];
       iconv_t icv_in, icv_out;
       xmlCharEncodingHandlerPtr enc;
   #endif /* LIBXML_ICONV_ENABLED */
       char upper[100];
     int i;      int i;
   
     if (handlers == NULL) xmlInitCharEncodingHandlers();      if (handlers == NULL) xmlInitCharEncodingHandlers();
     if (name == NULL) return(xmlDefaultCharEncodingHandler);      if (name == NULL) return(xmlDefaultCharEncodingHandler);
     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);      if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
   
     for (i = 0;i < 499;i++) {      for (i = 0;i < 99;i++) {
         upper[i] = toupper(name[i]);          upper[i] = toupper(name[i]);
         if (upper[i] == 0) break;          if (upper[i] == 0) break;
     }      }
     upper[i] = 0;      upper[i] = 0;
   
     for (i = 0;i < nbCharEncodingHandler; i++)      for (i = 0;i < nbCharEncodingHandler; i++)
         if (!strcmp(name, handlers[i]->name))          if (!strcmp(upper, handlers[i]->name)) {
   #ifdef DEBUG_ENCODING
               fprintf(stderr, "Found registered handler for encoding %s\n", name);
   #endif
             return(handlers[i]);              return(handlers[i]);
           }
   
   #ifdef LIBXML_ICONV_ENABLED
       /* check whether iconv can handle this */
       icv_out = iconv_open("UTF-8", name);
       icv_in = iconv_open(name, "UTF-8");
       if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
               enc = malloc(sizeof(xmlCharEncodingHandler));
               enc->name = xmlMemStrdup(pseudoname);
               enc->input = NULL;
               enc->output = NULL;
               enc->iconv_in = icv_in;
               enc->iconv_out = icv_out;
   #ifdef DEBUG_ENCODING
               fprintf(stderr, "Found iconv handler for encoding %s\n", name);
   #endif
               return enc;
       } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
               fprintf(stderr, "iconv : problems with filters for '%s'\n", name);
       }
   #endif /* LIBXML_ICONV_ENABLED */
   #ifdef DEBUG_ENCODING
       fprintf(stderr, "No handler found for encoding %s\n", name);
   #endif
     return(NULL);      return(NULL);
 }  }
   
   #ifdef LIBXML_ICONV_ENABLED
   /**
    * xmlIconvWrapper:
    * @cd:         iconv converter data structure
    * @out:  a pointer to an array of bytes to store the result
    * @outlen:  the length of @out
    * @in:  a pointer to an array of ISO Latin 1 chars
    * @inlen:  the length of @in
    *
    * Returns 0 if success, or 
    *     -1 by lack of space, or
    *     -2 if the transcoding fails (for *in is not valid utf8 string or
    *        the result of transformation can't fit into the encoding we want), or
    *     -3 if there the last byte can't form a single output char.
    *     
    * The value of @inlen after return is the number of octets consumed
    *     as the return value is positive, else unpredictiable.
    * The value of @outlen after return is the number of ocetes consumed.
    */
   static int
   xmlIconvWrapper(iconv_t cd,
           unsigned char *out, int *outlen,
           const unsigned char *in, int *inlen) {
   
           size_t icv_inlen = *inlen, icv_outlen = *outlen;
           const char *icv_in = (const char *) in;
           char *icv_out = (char *) out;
           int ret;
   
           ret = iconv(cd,
                   &icv_in, &icv_inlen,
                   &icv_out, &icv_outlen);
           *inlen -= icv_inlen;
           *outlen -= icv_outlen;
           if (icv_inlen != 0 || ret == (size_t) -1) {
   #ifdef EILSEQ
                   if (errno == EILSEQ) {
                           return -3;
                   } else
   #endif
   #ifdef E2BIG
                   if (errno == E2BIG) {
                           return -1;
                   } else
   #endif
   #ifdef EINVAL
                   if (errno == EINVAL) {
                           return -2;
                   }
   #endif
                   else {
                           return -3;
                   }
           }
           return 0;
   }
   #endif /* LIBXML_ICONV_ENABLED */
   
   /**
    * xmlCharEncInFunc:
    * @handler:    char enconding transformation data structure
    * @out:  a pointer to an array of bytes to store the result
    * @outlen:  the length of @out
    * @in:  a pointer to an array of ISO Latin 1 chars
    * @inlen:  the length of @in
    *     
    * Generic front-end for the encoding handler input function
    *     
    * The value of @inlen after return is the number of octets consumed
    *     as the return value is positive, else unpredictiable.
    * The value of @outlen after return is the number of ocetes consumed.
    *
    * Returns 0 if success, or 
    *     -1 by lack of space, or
    *     -2 if the transcoding fails (for *in is not valid utf8 string or
    *        the result of transformation can't fit into the encoding we want), or
    *     -3 if there the last byte can't form a single output char.
    */
   int
   xmlCharEncInFunc(xmlCharEncodingHandler *handler,
       unsigned char *out, int *outlen,
       const unsigned char *in, int *inlen) {
       int ret = -2;
   
       if (handler->input != NULL) {
           ret = handler->input(out, *outlen, in, inlen);
           if (ret >= 0) {
               *outlen = ret;
               ret = 0;
           }
       }
   #ifdef LIBXML_ICONV_ENABLED
       else if (handler->iconv_out != NULL) {
           ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
       }
   #endif /* LIBXML_ICONV_ENABLED */
   #ifdef DEBUG_ENCODING
       switch (ret) {
           case 0:
               fprintf(stderr, "converted %d bytes to %d bytes of input\n",
                       *inlen, *outlen);
               break;
           case -1:
               fprintf(stderr, "input conversion failed by lack of space\n");
               break;
           case -2:
               fprintf(stderr, "input conversion failed due to input error\n");
               break;
           case -3:
               fprintf(stderr,"input conversion failed can't form input char\n");
               break;
           default:
               fprintf(stderr,"Unknown input conversion failed %d\n", ret);
       }
   #endif
       return(ret);
   }
   
   /**
    * xmlCharEncOutFunc:
    * @handler:    char enconding transformation data structure
    * @out:  a pointer to an array of bytes to store the result
    * @outlen:  the length of @out
    * @in:  a pointer to an array of ISO Latin 1 chars
    * @inlen:  the length of @in
    *     
    * Generic front-end for hencoding handler output function
    *
    * The value of @inlen after return is the number of octets consumed
    *     as the return value is positive, else unpredictiable.
    * The value of @outlen after return is the number of ocetes consumed.
    *
    * Returns 0 if success, or 
    *     -1 by lack of space, or
    *     -2 if the transcoding fails (for *in is not valid utf8 string or
    *        the result of transformation can't fit into the encoding we want), or
    *     -3 if there the last byte can't form a single output char.
    */
   int
   xmlCharEncOutFunc(xmlCharEncodingHandler *handler,
       unsigned char* out, int *outlen,
       const unsigned char* in, int *inlen) {
       int ret = -2;
   
       if (handler->output != NULL) {
           ret = handler->output(out, *outlen, in, inlen);
           if (ret >= 0) {
               *outlen = ret;
               ret = 0;
           }
       }
   #ifdef LIBXML_ICONV_ENABLED
       else if (handler->iconv_out != NULL) {
           ret = xmlIconvWrapper(handler->iconv_out, out, outlen, in, inlen);
       }
   #endif /* LIBXML_ICONV_ENABLED */
   #ifdef DEBUG_ENCODING
       switch (ret) {
           case 0:
               fprintf(stderr, "converted %d bytes to %d bytes of output\n",
                       *inlen, *outlen);
               break;
           case -1:
               fprintf(stderr, "output conversion failed by lack of space\n");
               break;
           case -2:
               fprintf(stderr, "output conversion failed due to output error\n");
               break;
           case -3:
               fprintf(stderr,"output conversion failed can't form output char\n");
               break;
           default:
               fprintf(stderr,"Unknown output conversion failed %d\n", ret);
       }
   #endif
       return(ret);
   }
   
   /**
    * xmlCharEncCloseFunc:
    * @handler:    char enconding transformation data structure
    *     
    * Generic front-end for hencoding handler close function
    *
    * Returns 0 if success, or -1 in case of error
    */
   int
   xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
       int ret = 0;
   #ifdef LIBXML_ICONV_ENABLED
       if (handler->iconv_out != NULL) {
           if (iconv_close(handler->iconv_out))
               ret = -1;
       }
       if (handler->iconv_in != NULL) {
           if (iconv_close(handler->iconv_in))
               ret = -1;
       }
   #endif /* LIBXML_ICONV_ENABLED */
   #ifdef DEBUG_ENCODING
       if (ret)
           fprintf(stderr, "failed to close the encoding handler\n");
       else
           fprintf(stderr, "closed the encoding handler\n");
   
   #endif
       return(ret);
   }
   

Removed from v.1.29  
changed lines
  Added in v.1.30


Webmaster