Annotation of libwww/Library/src/HTParse.c, revision 2.27
1.1 timbl 1: /* Parse HyperText Document Address HTParse.c
2: ** ================================
2.26 frystyk 3: **
4: ** history:
5: ** May 12 94 TAB added as legal char in HTCleanTelnetString
6: **
1.1 timbl 7: */
2.27 ! frystyk 8: #include "tcp.h"
1.1 timbl 9: #include "HTUtils.h"
10: #include "HTParse.h"
11:
2.6 timbl 12: #define HEX_ESCAPE '%'
13:
1.1 timbl 14: struct struct_parts {
2.20 timbl 15: char * access; /* Now known as "scheme" */
1.1 timbl 16: char * host;
17: char * absolute;
18: char * relative;
19: /* char * search; no - treated as part of path */
20: char * anchor;
21: };
22:
23: /* Strip white space off a string
24: ** ------------------------------
25: **
26: ** On exit,
27: ** Return value points to first non-white character, or to 0 if none.
28: ** All trailing white space is OVERWRITTEN with zero.
29: */
30:
2.13 luotonen 31: PUBLIC char * HTStrip ARGS1(char *, s)
1.1 timbl 32: {
33: #define SPACE(c) ((c==' ')||(c=='\t')||(c=='\n'))
34: char * p=s;
2.13 luotonen 35: if (!s) return NULL; /* Doesn't dump core if NULL */
36: for(p=s;*p;p++); /* Find end of string */
1.1 timbl 37: for(p--;p>=s;p--) {
38: if(SPACE(*p)) *p=0; /* Zap trailing blanks */
39: else break;
40: }
41: while(SPACE(*s))s++; /* Strip leading blanks */
42: return s;
43: }
44:
45:
46: /* Scan a filename for its consituents
47: ** -----------------------------------
48: **
49: ** On entry,
50: ** name points to a document name which may be incomplete.
51: ** On exit,
52: ** absolute or relative may be nonzero (but not both).
53: ** host, anchor and access may be nonzero if they were specified.
54: ** Any which are nonzero point to zero terminated strings.
55: */
56: #ifdef __STDC__
57: PRIVATE void scan(char * name, struct struct_parts *parts)
58: #else
59: PRIVATE void scan(name, parts)
60: char * name;
61: struct struct_parts *parts;
62: #endif
63: {
64: char * after_access;
65: char * p;
66: int length = strlen(name);
67:
68: parts->access = 0;
69: parts->host = 0;
70: parts->absolute = 0;
71: parts->relative = 0;
72: parts->anchor = 0;
73:
74: after_access = name;
75: for(p=name; *p; p++) {
76: if (*p==':') {
77: *p = 0;
2.20 timbl 78: parts->access = after_access; /* Scheme has been specified */
1.1 timbl 79: after_access = p+1;
2.22 luotonen 80: if (0==strcasecomp("URL", parts->access)) {
2.20 timbl 81: parts->access = NULL; /* Ignore IETF's URL: pre-prefix */
82: } else break;
1.1 timbl 83: }
2.20 timbl 84: if (*p=='/') break; /* Access has not been specified */
1.1 timbl 85: if (*p=='#') break;
86: }
87:
88: for(p=name+length-1; p>=name; p--) {
89: if (*p =='#') {
90: parts->anchor=p+1;
91: *p=0; /* terminate the rest */
92: }
93: }
94: p = after_access;
95: if (*p=='/'){
96: if (p[1]=='/') {
97: parts->host = p+2; /* host has been specified */
98: *p=0; /* Terminate access */
99: p=strchr(parts->host,'/'); /* look for end of host name if any */
100: if(p) {
101: *p=0; /* Terminate host */
102: parts->absolute = p+1; /* Root has been found */
103: }
104: } else {
105: parts->absolute = p+1; /* Root found but no host */
106: }
107: } else {
108: parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
109: }
110:
2.16 timbl 111: #ifdef OLD_CODE
1.1 timbl 112: /* Access specified but no host: the anchor was not really one
2.16 timbl 113: e.g. news:j462#36487@foo.bar -- JFG 10/jul/92, from bug report */
114: /* This kludge doesn't work for example when coming across
115: file:/usr/local/www/fred#123
116: which loses its anchor. Correct approach in news is to
117: escape weird characters not allowed in URL. TBL 21/dec/93
118: */
1.1 timbl 119: if (parts->access && ! parts->host && parts->anchor) {
120: *(parts->anchor - 1) = '#'; /* Restore the '#' in the address */
121: parts->anchor = 0;
122: }
2.16 timbl 123: #endif
1.1 timbl 124:
125: #ifdef NOT_DEFINED /* search is just treated as part of path */
126: {
127: char *p = relative ? relative : absolute;
128: if (p) {
129: char * q = strchr(p, '?'); /* Any search string? */
130: if (q) {
131: *q = 0; /* If so, chop that off. */
132: parts->search = q+1;
133: }
134: }
135: }
136: #endif
137: } /*scan */
138:
139:
140: /* Parse a Name relative to another name
141: ** -------------------------------------
142: **
143: ** This returns those parts of a name which are given (and requested)
144: ** substituting bits from the related name where necessary.
145: **
146: ** On entry,
147: ** aName A filename given
148: ** relatedName A name relative to which aName is to be parsed
149: ** wanted A mask for the bits which are wanted.
150: **
151: ** On exit,
152: ** returns A pointer to a malloc'd string which MUST BE FREED
153: */
154: #ifdef __STDC__
155: char * HTParse(const char * aName, const char * relatedName, int wanted)
156: #else
157: char * HTParse(aName, relatedName, wanted)
158: char * aName;
159: char * relatedName;
160: int wanted;
161: #endif
162:
163: {
164: char * result = 0;
165: char * return_value = 0;
166: int len;
167: char * name = 0;
168: char * rel = 0;
169: char * p;
2.12 timbl 170: char * access;
1.1 timbl 171: struct struct_parts given, related;
172:
173: /* Make working copies of input strings to cut up:
174: */
175: len = strlen(aName)+strlen(relatedName)+10;
176: result=(char *)malloc(len); /* Lots of space: more than enough */
177: if (result == NULL) outofmem(__FILE__, "HTParse");
178:
179: StrAllocCopy(name, aName);
180: StrAllocCopy(rel, relatedName);
181:
182: scan(name, &given);
183: scan(rel, &related);
184: result[0]=0; /* Clear string */
2.12 timbl 185: access = given.access ? given.access : related.access;
1.1 timbl 186: if (wanted & PARSE_ACCESS)
2.12 timbl 187: if (access) {
188: strcat(result, access);
1.1 timbl 189: if(wanted & PARSE_PUNCTUATION) strcat(result, ":");
190: }
191:
192: if (given.access && related.access) /* If different, inherit nothing. */
193: if (strcmp(given.access, related.access)!=0) {
194: related.host=0;
195: related.absolute=0;
196: related.relative=0;
197: related.anchor=0;
198: }
199:
200: if (wanted & PARSE_HOST)
201: if(given.host || related.host) {
2.12 timbl 202: char * tail = result + strlen(result);
1.1 timbl 203: if(wanted & PARSE_PUNCTUATION) strcat(result, "//");
204: strcat(result, given.host ? given.host : related.host);
2.12 timbl 205: #define CLEAN_URLS
206: #ifdef CLEAN_URLS
207: /* Ignore default port numbers, and trailing dots on FQDNs
208: which will only cause identical adreesses to look different */
209: {
210: char * p;
211: p = strchr(tail, ':');
212: if (p && access) { /* Port specified */
213: if ( ( strcmp(access, "http") == 0
214: && strcmp(p, ":80") == 0 )
215: ||
216: ( strcmp(access, "gopher") == 0
217: && strcmp(p, ":70") == 0 )
218: )
219: *p = (char)0; /* It is the default: ignore it */
220: }
221: if (!p) p = tail + strlen(tail); /* After hostname */
2.21 frystyk 222: if (*p) { /* Henrik 17/04-94 */
223: p--; /* End of hostname */
224: if (*p == '.') *p = (char)0; /* chop final . */
225: }
2.12 timbl 226: }
227: #endif
1.1 timbl 228: }
229:
230: if (given.host && related.host) /* If different hosts, inherit no path. */
231: if (strcmp(given.host, related.host)!=0) {
232: related.absolute=0;
233: related.relative=0;
234: related.anchor=0;
235: }
236:
237: if (wanted & PARSE_PATH) {
238: if(given.absolute) { /* All is given */
239: if(wanted & PARSE_PUNCTUATION) strcat(result, "/");
240: strcat(result, given.absolute);
241: } else if(related.absolute) { /* Adopt path not name */
242: strcat(result, "/");
243: strcat(result, related.absolute);
244: if (given.relative) {
245: p = strchr(result, '?'); /* Search part? */
246: if (!p) p=result+strlen(result)-1;
247: for (; *p!='/'; p--); /* last / */
248: p[1]=0; /* Remove filename */
249: strcat(result, given.relative); /* Add given one */
250: HTSimplify (result);
251: }
252: } else if(given.relative) {
253: strcat(result, given.relative); /* what we've got */
254: } else if(related.relative) {
255: strcat(result, related.relative);
256: } else { /* No inheritance */
257: strcat(result, "/");
258: }
259: }
260:
261: if (wanted & PARSE_ANCHOR)
262: if(given.anchor || related.anchor) {
263: if(wanted & PARSE_PUNCTUATION) strcat(result, "#");
264: strcat(result, given.anchor ? given.anchor : related.anchor);
265: }
266: free(rel);
267: free(name);
268:
269: StrAllocCopy(return_value, result);
270: free(result);
271: return return_value; /* exactly the right length */
272: }
273:
2.11 timbl 274:
2.21 frystyk 275: #if 0 /* NOT USED FOR THE MOMENT */
2.15 luotonen 276: /*
277: ** As strcpy() but guaranteed to work correctly
278: ** with overlapping parameters. AL 7 Feb 1994
279: */
280: PRIVATE void ari_strcpy ARGS2(char *, to,
281: char *, from)
282: {
283: char * tmp;
284:
285: if (!to || !from) return;
286:
287: tmp = (char*)malloc(strlen(from)+1);
288: if (!tmp) outofmem(__FILE__, "my_strcpy");
289:
290: strcpy(tmp, from);
291: strcpy(to, tmp);
292: free(tmp);
293: }
2.21 frystyk 294: #endif
295:
2.20 timbl 296:
297: /* Simplify a URI
298: // --------------
299: // A URI is allowed to contain the seqeunce xxx/../ which may be
1.1 timbl 300: // replaced by "" , and the seqeunce "/./" which may be replaced by "/".
2.20 timbl 301: // Simplification helps us recognize duplicate URIs.
1.1 timbl 302: //
303: // Thus, /etc/junk/../fred becomes /etc/fred
304: // /etc/junk/./fred becomes /etc/junk/fred
2.11 timbl 305: //
306: // but we should NOT change
307: // http://fred.xxx.edu/../..
308: //
309: // or ../../albert.html
2.26 frystyk 310: //
311: // In the same manner, the following prefixed are preserved:
312: //
313: // ./<etc>
314: // //<etc>
315: //
316: // In order to avoid empty URLs the following URLs become:
317: //
318: // /fred/.. becomes /fred/..
319: // /fred/././.. becomes /fred/..
2.27 ! frystyk 320: // /fred/.././junk/.././ becomes /fred/..
2.26 frystyk 321: //
1.1 timbl 322: */
2.14 luotonen 323: PUBLIC void HTSimplify ARGS1(char *, filename)
1.1 timbl 324: {
2.19 frystyk 325: int tokcnt = 0;
326: char *strptr;
327: char *urlptr;
2.27 ! frystyk 328: BOOL prefix = NO; /* If prefix == YES then we can delete all segments */
2.19 frystyk 329: if (!filename || !*filename) /* Just to be sure! */
330: return;
331:
2.27 ! frystyk 332: if (TRACE)
! 333: fprintf(stderr, "HTSimplify.. `%s\' ", filename);
! 334:
2.19 frystyk 335: /* Skip prefix, starting ./ and starting ///<etc> */
2.27 ! frystyk 336: if ((urlptr = strstr(filename, "://")) != NULL) { /* Find prefix */
! 337: urlptr += 3;
! 338: prefix = YES;
! 339: } else if ((urlptr = strstr(filename, ":/")) != NULL) {
! 340: urlptr += 2;
! 341: prefix = YES;
! 342: } else
! 343: urlptr = filename;
! 344: if (*urlptr == '.' && *(urlptr+1) == '/') { /* Starting ./<etc> */
2.19 frystyk 345: urlptr += 2;
2.27 ! frystyk 346: prefix = YES;
! 347: } else if (*urlptr == '/') { /* Some URLs start //<file> */
2.19 frystyk 348: while (*++urlptr == '/');
2.27 ! frystyk 349: prefix = YES;
2.19 frystyk 350: }
2.27 ! frystyk 351: if (!*urlptr) { /* If nothing left */
! 352: if (TRACE)
! 353: fprintf(stderr, "No simplification possible\n");
2.19 frystyk 354: return;
2.27 ! frystyk 355: }
2.19 frystyk 356:
357: /* Now we have the string we want to work with */
358: strptr = urlptr;
359: while (*strptr++) { /* Count number of delimiters */
360: if (*strptr == '/')
361: tokcnt++;
362: }
363: {
364: BOOL slashtail = NO;
2.27 ! frystyk 365: int segcnt = 0; /* Number of 'real segments' (not '.' and '..') */
2.19 frystyk 366: char *empty = "";
367: char *url = NULL;
368: char **tokptr;
369: char **tokstart;
370: StrAllocCopy(url, urlptr);
371:
372: /* Does the URL end with a slash? */
373: if(*(filename+strlen(filename)-1) == '/')
374: slashtail = YES;
375:
376: /* I allocate cnt+2 as I don't know if the url is terminated by '/' */
377: if ((tokstart = (char **) calloc(tokcnt+2, sizeof(char *))) == NULL)
378: outofmem(__FILE__, "HTSimplify");
379:
2.27 ! frystyk 380: /* Read the tokens forwards and count `real' segments */
2.19 frystyk 381: tokptr = tokstart;
2.27 ! frystyk 382: *tokptr = strtok(url, "/");
! 383: if (strcmp(*tokptr, ".") && strcmp(*tokptr, ".."))
! 384: segcnt++;
! 385: tokptr++;
! 386: while ((strptr = strtok(NULL, "/")) != NULL) {
! 387: if (strcmp(strptr, ".") && strcmp(strptr, ".."))
! 388: segcnt++;
! 389: else if (!strcmp(strptr, "..") && !segcnt)
! 390: prefix = YES;
2.19 frystyk 391: *tokptr++ = strptr;
2.27 ! frystyk 392: }
! 393:
! 394: #if 0
! 395: {
! 396: char **test = tokstart;
! 397: fprintf(stderr, "--- start ---\n");
! 398: fprintf(stderr, "Filename:\t`%s\'\n", filename);
! 399: while (*test)
! 400: fprintf(stderr, "Token:\t\t`%s\'\n", *test++);
! 401: fprintf(stderr, "Segments:\t%d\n", segcnt);
! 402: }
! 403: #endif
! 404:
2.19 frystyk 405: /* Scan backwards for '.' and '..' */
406: tokptr--;
407: while(tokptr >= tokstart) {
408: if (!strcmp(*tokptr, ".")) {
409: *tokptr = empty;
410: } else if (!strcmp(*tokptr, "..")) {
411: char **pptr = tokptr-1;
412: while (pptr >= tokstart) {
2.26 frystyk 413: if (**pptr && strcmp(*pptr, "..") && strcmp(*pptr, ".") &&
2.27 ! frystyk 414: (segcnt > 1 || prefix)) {
2.19 frystyk 415: *pptr = empty;
416: *tokptr = empty;
2.27 ! frystyk 417: segcnt--;
2.19 frystyk 418: break;
419: }
420: pptr--;
421: }
422: }
423: tokptr--;
424: }
425:
426: /* Write the rest out forwards */
427: *urlptr = '\0';
428: while (*++tokptr) {
429: if (**tokptr) {
2.27 ! frystyk 430: if (*urlptr) /* Don't want two in the beginning */
! 431: strcat(urlptr, "/");
2.19 frystyk 432: strcat(urlptr, *tokptr);
433: }
434: }
2.27 ! frystyk 435:
! 436: if (slashtail == YES && *(urlptr+(int)strlen(urlptr)-1) != '/')
2.19 frystyk 437: strcat(urlptr, "/");
2.27 ! frystyk 438: #if 0
! 439: {
! 440: char **test = tokstart;
! 441: while (*test)
! 442: fprintf(stderr, "Token:\t\t`%s\'\n", *test++);
! 443: fprintf(stderr, "Segments:\t%d\n", segcnt);
! 444: fprintf(stderr, "Filename:\t`%s\'\n", filename);
! 445: fprintf(stderr, "--- end ---\n\n");
! 446: }
! 447: #endif
2.19 frystyk 448: free(url);
449: free(tokstart);
450: }
451: if (TRACE)
2.27 ! frystyk 452: fprintf(stderr, "into\n............ `%s'\n", filename);
2.19 frystyk 453: }
454: #ifdef OLD_CODE
2.17 frystyk 455: char * p = filename;
1.1 timbl 456: char * q;
2.17 frystyk 457:
458: if (p) {
459: while (*p && (*p == '/' || *p == '.')) /* Pass starting / or .'s */
460: p++;
461: while(*p) {
462: if (*p=='/') {
1.1 timbl 463: if ((p[1]=='.') && (p[2]=='.') && (p[3]=='/' || !p[3] )) {
2.11 timbl 464: for (q=p-1; (q>=filename) && (*q!='/'); q--); /* prev slash */
465: if (q[0]=='/' && 0!=strncmp(q, "/../", 4)
466: &&!(q-1>filename && q[-1]=='/')) {
2.15 luotonen 467: ari_strcpy(q, p+3); /* Remove /xxx/.. */
1.1 timbl 468: if (!*filename) strcpy(filename, "/");
469: p = q-1; /* Start again with prev slash */
2.11 timbl 470: } else { /* xxx/.. leave it! */
2.9 timbl 471: #ifdef BUG_CODE
2.15 luotonen 472: ari_strcpy(filename, p[3] ? p+4 : p+3); /* rm xxx/../ */
1.1 timbl 473: p = filename; /* Start again */
2.9 timbl 474: #endif
1.1 timbl 475: }
476: } else if ((p[1]=='.') && (p[2]=='/' || !p[2])) {
2.15 luotonen 477: ari_strcpy(p, p+2); /* Remove a slash and a dot */
2.13 luotonen 478: } else if (p[-1] != ':') {
479: while (p[1] == '/') {
2.15 luotonen 480: ari_strcpy(p, p+1); /* Remove multiple slashes */
2.13 luotonen 481: }
1.1 timbl 482: }
2.17 frystyk 483: }
484: p++;
485: } /* end while (*p) */
486: } /* end if (p) */
1.1 timbl 487: }
2.19 frystyk 488: #endif /* OLD_CODE */
1.1 timbl 489:
490:
491: /* Make Relative Name
492: ** ------------------
493: **
494: ** This function creates and returns a string which gives an expression of
495: ** one address as related to another. Where there is no relation, an absolute
496: ** address is retured.
497: **
498: ** On entry,
499: ** Both names must be absolute, fully qualified names of nodes
500: ** (no anchor bits)
501: **
502: ** On exit,
503: ** The return result points to a newly allocated name which, if
504: ** parsed by HTParse relative to relatedName, will yield aName.
505: ** The caller is responsible for freeing the resulting name later.
506: **
507: */
508: #ifdef __STDC__
509: char * HTRelative(const char * aName, const char *relatedName)
510: #else
511: char * HTRelative(aName, relatedName)
512: char * aName;
513: char * relatedName;
514: #endif
515: {
516: char * result = 0;
517: CONST char *p = aName;
518: CONST char *q = relatedName;
519: CONST char * after_access = 0;
520: CONST char * path = 0;
521: CONST char * last_slash = 0;
522: int slashes = 0;
523:
524: for(;*p; p++, q++) { /* Find extent of match */
525: if (*p!=*q) break;
526: if (*p==':') after_access = p+1;
527: if (*p=='/') {
528: last_slash = p;
529: slashes++;
530: if (slashes==3) path=p;
531: }
532: }
533:
534: /* q, p point to the first non-matching character or zero */
535:
536: if (!after_access) { /* Different access */
537: StrAllocCopy(result, aName);
538: } else if (slashes<3){ /* Different nodes */
539: StrAllocCopy(result, after_access);
2.21 frystyk 540: #if 0
1.1 timbl 541: } else if (slashes==3){ /* Same node, different path */
542: StrAllocCopy(result, path);
2.21 frystyk 543: #endif
1.1 timbl 544: } else { /* Some path in common */
545: int levels= 0;
546: for(; *q && (*q!='#'); q++) if (*q=='/') levels++;
547: result = (char *)malloc(3*levels + strlen(last_slash) + 1);
548: if (result == NULL) outofmem(__FILE__, "HTRelative");
549: result[0]=0;
550: for(;levels; levels--)strcat(result, "../");
551: strcat(result, last_slash+1);
552: }
2.21 frystyk 553: if (TRACE) fprintf(stderr,
554: "HTRelative.. `%s' expressed relative to `%s' is `%s'\n",
555: aName, relatedName, result);
1.1 timbl 556: return result;
557: }
2.1 timbl 558:
559:
2.6 timbl 560: /* Escape undesirable characters using % HTEscape()
561: ** -------------------------------------
562: **
563: ** This function takes a pointer to a string in which
564: ** some characters may be unacceptable unescaped.
565: ** It returns a string which has these characters
566: ** represented by a '%' character followed by two hex digits.
567: **
2.20 timbl 568: ** In the tradition of being conservative in what you do and liberal
569: ** in what you accept, we encode some characters which in fact are
570: ** allowed in URLs unencoded -- so DON'T use the table below for
571: ** parsing!
572: **
2.6 timbl 573: ** Unlike HTUnEscape(), this routine returns a malloced string.
2.20 timbl 574: **
2.6 timbl 575: */
576:
2.20 timbl 577: /* Not BOTH static AND const at the same time in gcc :-(, Henrik 18/03-94
578: ** code gen error in gcc when making random access to
579: ** static const table(!!) */
2.19 frystyk 580: /* PRIVATE CONST unsigned char isAcceptable[96] = */
581: PRIVATE unsigned char isAcceptable[96] =
2.6 timbl 582:
2.20 timbl 583: /* Overencodes */
2.6 timbl 584: /* Bit 0 xalpha -- see HTFile.h
585: ** Bit 1 xpalpha -- as xalpha but with plus.
2.20 timbl 586: ** Bit 2 ... path -- as xpalpha but with /
2.6 timbl 587: */
588: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
589: { 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
590: 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
591: 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
592: 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
593: 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
594: 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
595:
596: PRIVATE char *hex = "0123456789ABCDEF";
597:
2.8 timbl 598: PUBLIC char * HTEscape ARGS2 (CONST char *, str,
2.6 timbl 599: unsigned char, mask)
600: {
601: #define ACCEPTABLE(a) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
602: CONST char * p;
603: char * q;
604: char * result;
605: int unacceptable = 0;
606: for(p=str; *p; p++)
607: if (!ACCEPTABLE((unsigned char)TOASCII(*p)))
608: unacceptable++;
609: result = (char *) malloc(p-str + unacceptable+ unacceptable + 1);
610: if (result == NULL) outofmem(__FILE__, "HTEscape");
611: for(q=result, p=str; *p; p++) {
612: unsigned char a = TOASCII(*p);
613: if (!ACCEPTABLE(a)) {
614: *q++ = HEX_ESCAPE; /* Means hex commming */
615: *q++ = hex[a >> 4];
616: *q++ = hex[a & 15];
617: }
618: else *q++ = *p;
619: }
620: *q++ = 0; /* Terminate */
621: return result;
622: }
623:
624:
2.1 timbl 625: /* Decode %xx escaped characters HTUnEscape()
626: ** -----------------------------
627: **
628: ** This function takes a pointer to a string in which some
629: ** characters may have been encoded in %xy form, where xy is
630: ** the acsii hex code for character 16x+y.
631: ** The string is converted in place, as it will never grow.
632: */
633:
634: PRIVATE char from_hex ARGS1(char, c)
635: {
2.6 timbl 636: return c >= '0' && c <= '9' ? c - '0'
637: : c >= 'A' && c <= 'F'? c - 'A' + 10
638: : c - 'a' + 10; /* accept small letters just in case */
2.1 timbl 639: }
640:
641: PUBLIC char * HTUnEscape ARGS1( char *, str)
642: {
643: char * p = str;
644: char * q = str;
2.25 frystyk 645:
646: if (!str) { /* Just for safety ;-) */
647: if (TRACE)
648: fprintf(stderr, "HTUnEscape.. Called with NULL argument.\n");
649: return "";
650: }
2.1 timbl 651: while(*p) {
2.6 timbl 652: if (*p == HEX_ESCAPE) {
2.1 timbl 653: p++;
654: if (*p) *q = from_hex(*p++) * 16;
655: if (*p) *q = FROMASCII(*q + from_hex(*p++));
656: q++;
657: } else {
658: *q++ = *p++;
659: }
660: }
661:
662: *q++ = 0;
663: return str;
664:
665: } /* HTUnEscape */
666:
667:
2.24 luotonen 668: /* HTCleanTelnetString()
669: * Make sure that the given string doesn't contain characters that
670: * could cause security holes, such as newlines in ftp, gopher,
671: * news or telnet URLs; more specifically: allows everything between
2.26 frystyk 672: * ASCII 20-7E, and also A0-FE, inclusive. Also TAB ('\t') allowed!
2.24 luotonen 673: *
674: * On entry,
675: * str the string that is *modified* if necessary. The
676: * string will be truncated at the first illegal
677: * character that is encountered.
678: * On exit,
679: * returns YES, if the string was modified.
680: * NO, otherwise.
681: */
682: PUBLIC BOOL HTCleanTelnetString ARGS1(char *, str)
683: {
684: char * cur = str;
685:
686: if (!str) return NO;
687:
688: while (*cur) {
689: int a = TOASCII(*cur);
2.26 frystyk 690: if (a != 0x9 && (a < 0x20 || (a > 0x7E && a < 0xA0) || a > 0xFE)) {
2.24 luotonen 691: CTRACE(stderr, "Illegal..... character in URL: \"%s\"\n",str);
692: *cur = 0;
693: CTRACE(stderr, "Truncated... \"%s\"\n",str);
694: return YES;
695: }
696: cur++;
697: }
698: return NO;
699: }
700:
Webmaster