Annotation of libwww/Library/src/HTEscape.c, revision 2.5

2.2       frystyk     1: /*                                                                  HTEscape.c
                      2: **     ESCAPE AND UNESACPE ILLEGAL CHARACTERS IN A URI
                      3: **
2.5     ! frystyk     4: **     (c) COPYRIGHT MIT 1995.
2.2       frystyk     5: **     Please first read the full copyright statement in the file COPYRIGH.
2.1       frystyk     6: **
                      7: ** history:
                      8: **     Nov 13 94       Spawned from HTParse, as it then can be used in utility
                      9: **                     programs without loading the whole library
                     10: */
                     11: 
2.4       frystyk    12: /* Library include files */
                     13: #include "tcp.h"
2.1       frystyk    14: #include "HTUtils.h"
                     15: #include "HTEscape.h"                                   /* Implemented here */
                     16: 
                     17: #define HEX_ESCAPE '%'
                     18: 
                     19: /* ------------------------------------------------------------------------- */
                     20: 
                     21: /*             Escape undesirable characters using %           HTEscape()
                     22: **             -------------------------------------
                     23: **
                     24: **     This function takes a pointer to a string in which
                     25: **     some characters may be unacceptable unescaped.
                     26: **     It returns a string which has these characters
                     27: **     represented by a '%' character followed by two hex digits.
                     28: **
                     29: **     In the tradition of being conservative in what you do and liberal
                     30: **     in what you accept, we encode some characters which in fact are
                     31: **     allowed in URLs unencoded -- so DON'T use the table below for
                     32: **     parsing! 
                     33: **
                     34: **     Unlike HTUnEscape(), this routine returns a malloced string.
                     35: **
                     36: */
                     37: 
                     38: /* Not BOTH static AND const at the same time in gcc :-(, Henrik 18/03-94 
                     39: **  code gen error in gcc when making random access to
                     40: **  static const table(!!)  */
                     41: /* PRIVATE CONST unsigned char isAcceptable[96] = */
                     42: PRIVATE unsigned char isAcceptable[96] =
                     43: 
                     44: /* Overencodes */
                     45: /*     Bit 0           xalpha          -- see HTFile.h
                     46: **     Bit 1           xpalpha         -- as xalpha but with plus.
                     47: **     Bit 2 ...       path            -- as xpalpha but with /
                     48: */
                     49:     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
                     50:     {    0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4,      /* 2x   !"#$%&'()*+,-./  */
                     51:          7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0,      /* 3x  0123456789:;<=>?  */
                     52:         7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,       /* 4x  @ABCDEFGHIJKLMNO  */
                     53:         7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7,       /* 5X  PQRSTUVWXYZ[\]^_  */
                     54:         0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,       /* 6x  `abcdefghijklmno  */
                     55:         7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 };     /* 7X  pqrstuvwxyz{\}~  DEL */
                     56: 
                     57: PRIVATE char *hex = "0123456789ABCDEF";
                     58: 
                     59: PUBLIC char * HTEscape ARGS2 (CONST char *, str,
                     60:        unsigned char, mask)
                     61: {
                     62: #define ACCEPTABLE(a)  ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
                     63:     CONST char * p;
                     64:     char * q;
                     65:     char * result;
                     66:     int unacceptable = 0;
                     67:     for(p=str; *p; p++)
                     68:         if (!ACCEPTABLE((unsigned char)TOASCII(*p)))
                     69:                unacceptable++;
                     70:     result = (char *) malloc(p-str + unacceptable+ unacceptable + 1);
                     71:     if (result == NULL) outofmem(__FILE__, "HTEscape");
                     72:     for(q=result, p=str; *p; p++) {
                     73:        unsigned char a = TOASCII(*p);
                     74:        if (!ACCEPTABLE(a)) {
                     75:            *q++ = HEX_ESCAPE;  /* Means hex commming */
                     76:            *q++ = hex[a >> 4];
                     77:            *q++ = hex[a & 15];
                     78:        }
                     79:        else *q++ = *p;
                     80:     }
                     81:     *q++ = 0;                  /* Terminate */
                     82:     return result;
                     83: }
                     84: 
                     85: 
                     86: /*             Decode %xx escaped characters                   HTUnEscape()
                     87: **             -----------------------------
                     88: **
                     89: **     This function takes a pointer to a string in which some
                     90: **     characters may have been encoded in %xy form, where xy is
                     91: **     the acsii hex code for character 16x+y.
                     92: **     The string is converted in place, as it will never grow.
                     93: */
                     94: 
                     95: PRIVATE char from_hex ARGS1(char, c)
                     96: {
                     97:     return  c >= '0' && c <= '9' ?  c - '0' 
                     98:            : c >= 'A' && c <= 'F'? c - 'A' + 10
                     99:            : c - 'a' + 10;     /* accept small letters just in case */
                    100: }
                    101: 
                    102: PUBLIC char * HTUnEscape ARGS1( char *, str)
                    103: {
                    104:     char * p = str;
                    105:     char * q = str;
                    106: 
                    107:     if (!str) {                                              /* Just for safety ;-) */
                    108:        if (URI_TRACE)
2.4       frystyk   109:            fprintf(TDEST, "HTUnEscape.. Called with NULL argument.\n");
2.1       frystyk   110:        return "";
                    111:     }
                    112:     while(*p) {
                    113:         if (*p == HEX_ESCAPE) {
                    114:            p++;
                    115:            if (*p) *q = from_hex(*p++) * 16;
                    116:            if (*p) *q = FROMASCII(*q + from_hex(*p++));
                    117:            q++;
                    118:        } else {
                    119:            *q++ = *p++; 
                    120:        }
                    121:     }
                    122:     
                    123:     *q++ = 0;
                    124:     return str;
                    125:     
                    126: } /* HTUnEscape */
                    127: 

Webmaster