Annotation of libwww/Library/src/HTEscape.c, revision 2.20

2.2       frystyk     1: /*                                                                  HTEscape.c
                      2: **     ESCAPE AND UNESACPE ILLEGAL CHARACTERS IN A URI
                      3: **
2.5       frystyk     4: **     (c) COPYRIGHT MIT 1995.
2.2       frystyk     5: **     Please first read the full copyright statement in the file COPYRIGH.
2.20    ! frystyk     6: **     @(#) $Id: HTEscape.c,v 2.19 1998/05/24 23:03:13 frystyk Exp $
2.1       frystyk     7: **
                      8: ** history:
                      9: **     Nov 13 94       Spawned from HTParse, as it then can be used in utility
                     10: **                     programs without loading the whole library
                     11: */
                     12: 
2.4       frystyk    13: /* Library include files */
2.17      frystyk    14: #include "wwwsys.h"
2.1       frystyk    15: #include "HTUtils.h"
                     16: #include "HTEscape.h"                                   /* Implemented here */
                     17: 
                     18: #define HEX_ESCAPE '%'
2.6       frystyk    19: #define ACCEPTABLE(a)  ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
                     20: 
                     21: /*
2.13      frystyk    22: **  Not BOTH static AND const at the same time in gcc :-(, Henrik 18/03-94 
                     23: **  code gen error in gcc when making random access to static const table(!!)
2.6       frystyk    24: */
                     25: 
                     26: /*
                     27: **     Bit 0           xalpha          -- see HTFile.h
                     28: **     Bit 1           xpalpha         -- as xalpha but with plus.
                     29: **     Bit 2 ...       path            -- as xpalpha but with /
                     30: */
                     31: PRIVATE unsigned char isAcceptable[96] =
2.18      frystyk    32: {/* 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 0x8 0x9 0xA 0xB 0xC 0xD 0xE 0xF */
2.19      frystyk    33:     0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xF,0xE,0x0,0xF,0xF,0xC, /* 2x  !"#$%&'()*+,-./   */
                     34:     0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0x8,0x0,0x0,0x0,0x0,0x0, /* 3x 0123456789:;<=>?   */
2.18      frystyk    35:     0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF, /* 4x @ABCDEFGHIJKLMNO   */
                     36:     0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0x0,0x0,0x0,0x0,0xF, /* 5X PQRSTUVWXYZ[\]^_   */
                     37:     0x0,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF, /* 6x `abcdefghijklmno   */
                     38:     0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0xF,0x0,0x0,0x0,0x0,0x0  /* 7X pqrstuvwxyz{\}~DEL */
2.6       frystyk    39: };
                     40: PRIVATE char *hex = "0123456789ABCDEF";
2.1       frystyk    41: 
                     42: /* ------------------------------------------------------------------------- */
                     43: 
                     44: /*             Escape undesirable characters using %           HTEscape()
                     45: **             -------------------------------------
                     46: **
                     47: **     This function takes a pointer to a string in which
                     48: **     some characters may be unacceptable unescaped.
                     49: **     It returns a string which has these characters
                     50: **     represented by a '%' character followed by two hex digits.
                     51: **
                     52: **     In the tradition of being conservative in what you do and liberal
                     53: **     in what you accept, we encode some characters which in fact are
                     54: **     allowed in URLs unencoded -- so DON'T use the table below for
                     55: **     parsing! 
                     56: **
2.13      frystyk    57: **     Unlike HTUnEscape(), this routine returns a HT_MALLOCed string.
2.1       frystyk    58: **
                     59: */
2.13      frystyk    60: PUBLIC char * HTEscape (const char * str, HTURIEncoding mask)
2.1       frystyk    61: {
2.13      frystyk    62:     const char * p;
2.1       frystyk    63:     char * q;
                     64:     char * result;
                     65:     int unacceptable = 0;
2.20    ! frystyk    66:     if (!str) return NULL;
2.1       frystyk    67:     for(p=str; *p; p++)
                     68:         if (!ACCEPTABLE((unsigned char)TOASCII(*p)))
                     69:                unacceptable++;
2.11      frystyk    70:     if ((result = (char  *) HT_MALLOC(p-str + unacceptable+ unacceptable + 1)) == NULL)
                     71:         HT_OUTOFMEM("HTEscape");
2.1       frystyk    72:     for(q=result, p=str; *p; p++) {
                     73:        unsigned char a = TOASCII(*p);
                     74:        if (!ACCEPTABLE(a)) {
                     75:            *q++ = HEX_ESCAPE;  /* Means hex commming */
                     76:            *q++ = hex[a >> 4];
                     77:            *q++ = hex[a & 15];
                     78:        }
                     79:        else *q++ = *p;
                     80:     }
                     81:     *q++ = 0;                  /* Terminate */
                     82:     return result;
                     83: }
                     84: 
                     85: 
2.16      frystyk    86: PUBLIC char HTAsciiHexToChar (char c)
                     87: {
                     88:     return  c >= '0' && c <= '9' ?  c - '0' 
                     89:            : c >= 'A' && c <= 'F'? c - 'A' + 10
                     90:            : c - 'a' + 10;     /* accept small letters just in case */
                     91: }
                     92: 
2.1       frystyk    93: /*             Decode %xx escaped characters                   HTUnEscape()
                     94: **             -----------------------------
                     95: **
                     96: **     This function takes a pointer to a string in which some
                     97: **     characters may have been encoded in %xy form, where xy is
                     98: **     the acsii hex code for character 16x+y.
                     99: **     The string is converted in place, as it will never grow.
                    100: */
2.9       frystyk   101: PUBLIC char * HTUnEscape (char * str)
2.1       frystyk   102: {
                    103:     char * p = str;
                    104:     char * q = str;
                    105: 
                    106:     if (!str) {                                              /* Just for safety ;-) */
                    107:        if (URI_TRACE)
2.12      eric      108:            HTTrace("HTUnEscape.. Called with NULL argument.\n");
2.20    ! frystyk   109:        return NULL;
2.1       frystyk   110:     }
                    111:     while(*p) {
                    112:         if (*p == HEX_ESCAPE) {
                    113:            p++;
2.16      frystyk   114:            if (*p) *q = HTAsciiHexToChar(*p++) * 16;
                    115: #if 1
                    116:            /* Suggestion from Markku Savela */
                    117:            if (*p) *q = FROMASCII(*q + HTAsciiHexToChar(*p)), ++p;
2.15      eric      118:            q++;
2.16      frystyk   119: #else 
                    120:            if (*p) *q = FROMASCII(*q + HTAsciiHexToChar(*p));
2.7       frystyk   121:            p++, q++;
2.16      frystyk   122: #endif
2.1       frystyk   123:        } else {
                    124:            *q++ = *p++; 
                    125:        }
                    126:     }
                    127:     
                    128:     *q++ = 0;
                    129:     return str;
                    130:     
                    131: } /* HTUnEscape */
                    132: 

Webmaster