Annotation of libwww/Library/src/HTEscape.c, revision 2.2

2.2     ! frystyk     1: /*                                                                  HTEscape.c
        !             2: **     ESCAPE AND UNESACPE ILLEGAL CHARACTERS IN A URI
        !             3: **
        !             4: **     (c) COPYRIGHT CERN 1994.
        !             5: **     Please first read the full copyright statement in the file COPYRIGH.
2.1       frystyk     6: **
                      7: ** history:
                      8: **     Nov 13 94       Spawned from HTParse, as it then can be used in utility
                      9: **                     programs without loading the whole library
                     10: */
                     11: 
                     12: /* Platform dependent stuff */
                     13: #include "HTUtils.h"
                     14: #include "tcp.h"
                     15: 
                     16: /* Library Includes */
                     17: #include "HTEscape.h"                                   /* Implemented here */
                     18: 
                     19: #define HEX_ESCAPE '%'
                     20: 
                     21: /* ------------------------------------------------------------------------- */
                     22: 
                     23: /*             Escape undesirable characters using %           HTEscape()
                     24: **             -------------------------------------
                     25: **
                     26: **     This function takes a pointer to a string in which
                     27: **     some characters may be unacceptable unescaped.
                     28: **     It returns a string which has these characters
                     29: **     represented by a '%' character followed by two hex digits.
                     30: **
                     31: **     In the tradition of being conservative in what you do and liberal
                     32: **     in what you accept, we encode some characters which in fact are
                     33: **     allowed in URLs unencoded -- so DON'T use the table below for
                     34: **     parsing! 
                     35: **
                     36: **     Unlike HTUnEscape(), this routine returns a malloced string.
                     37: **
                     38: */
                     39: 
                     40: /* Not BOTH static AND const at the same time in gcc :-(, Henrik 18/03-94 
                     41: **  code gen error in gcc when making random access to
                     42: **  static const table(!!)  */
                     43: /* PRIVATE CONST unsigned char isAcceptable[96] = */
                     44: PRIVATE unsigned char isAcceptable[96] =
                     45: 
                     46: /* Overencodes */
                     47: /*     Bit 0           xalpha          -- see HTFile.h
                     48: **     Bit 1           xpalpha         -- as xalpha but with plus.
                     49: **     Bit 2 ...       path            -- as xpalpha but with /
                     50: */
                     51:     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
                     52:     {    0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4,      /* 2x   !"#$%&'()*+,-./  */
                     53:          7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0,      /* 3x  0123456789:;<=>?  */
                     54:         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,       /* 4x  @ABCDEFGHIJKLMNO  */
                     55:         7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7,       /* 5X  PQRSTUVWXYZ[\]^_  */
                     56:         0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,       /* 6x  `abcdefghijklmno  */
                     57:         7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 };     /* 7X  pqrstuvwxyz{\}~  DEL */
                     58: 
                     59: PRIVATE char *hex = "0123456789ABCDEF";
                     60: 
                     61: PUBLIC char * HTEscape ARGS2 (CONST char *, str,
                     62:        unsigned char, mask)
                     63: {
                     64: #define ACCEPTABLE(a)  ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
                     65:     CONST char * p;
                     66:     char * q;
                     67:     char * result;
                     68:     int unacceptable = 0;
                     69:     for(p=str; *p; p++)
                     70:         if (!ACCEPTABLE((unsigned char)TOASCII(*p)))
                     71:                unacceptable++;
                     72:     result = (char *) malloc(p-str + unacceptable+ unacceptable + 1);
                     73:     if (result == NULL) outofmem(__FILE__, "HTEscape");
                     74:     for(q=result, p=str; *p; p++) {
                     75:        unsigned char a = TOASCII(*p);
                     76:        if (!ACCEPTABLE(a)) {
                     77:            *q++ = HEX_ESCAPE;  /* Means hex commming */
                     78:            *q++ = hex[a >> 4];
                     79:            *q++ = hex[a & 15];
                     80:        }
                     81:        else *q++ = *p;
                     82:     }
                     83:     *q++ = 0;                  /* Terminate */
                     84:     return result;
                     85: }
                     86: 
                     87: 
                     88: /*             Decode %xx escaped characters                   HTUnEscape()
                     89: **             -----------------------------
                     90: **
                     91: **     This function takes a pointer to a string in which some
                     92: **     characters may have been encoded in %xy form, where xy is
                     93: **     the acsii hex code for character 16x+y.
                     94: **     The string is converted in place, as it will never grow.
                     95: */
                     96: 
                     97: PRIVATE char from_hex ARGS1(char, c)
                     98: {
                     99:     return  c >= '0' && c <= '9' ?  c - '0' 
                    100:            : c >= 'A' && c <= 'F'? c - 'A' + 10
                    101:            : c - 'a' + 10;     /* accept small letters just in case */
                    102: }
                    103: 
                    104: PUBLIC char * HTUnEscape ARGS1( char *, str)
                    105: {
                    106:     char * p = str;
                    107:     char * q = str;
                    108: 
                    109:     if (!str) {                                              /* Just for safety ;-) */
                    110:        if (URI_TRACE)
                    111:            fprintf(stderr, "HTUnEscape.. Called with NULL argument.\n");
                    112:        return "";
                    113:     }
                    114:     while(*p) {
                    115:         if (*p == HEX_ESCAPE) {
                    116:            p++;
                    117:            if (*p) *q = from_hex(*p++) * 16;
                    118:            if (*p) *q = FROMASCII(*q + from_hex(*p++));
                    119:            q++;
                    120:        } else {
                    121:            *q++ = *p++; 
                    122:        }
                    123:     }
                    124:     
                    125:     *q++ = 0;
                    126:     return str;
                    127:     
                    128: } /* HTUnEscape */
                    129: 

Webmaster