Annotation of libwww/Library/src/HTParse.c, revision 2.29
1.1 timbl 1: /* Parse HyperText Document Address HTParse.c
2: ** ================================
2.26 frystyk 3: **
4: ** history:
5: ** May 12 94 TAB added as legal char in HTCleanTelnetString
6: **
1.1 timbl 7: */
2.27 frystyk 8: #include "tcp.h"
1.1 timbl 9: #include "HTUtils.h"
10: #include "HTParse.h"
11:
2.6 timbl 12: #define HEX_ESCAPE '%'
13:
1.1 timbl 14: struct struct_parts {
2.20 timbl 15: char * access; /* Now known as "scheme" */
1.1 timbl 16: char * host;
17: char * absolute;
18: char * relative;
19: /* char * search; no - treated as part of path */
20: char * anchor;
21: };
22:
23: /* Strip white space off a string
24: ** ------------------------------
25: **
26: ** On exit,
27: ** Return value points to first non-white character, or to 0 if none.
28: ** All trailing white space is OVERWRITTEN with zero.
29: */
30:
2.13 luotonen 31: PUBLIC char * HTStrip ARGS1(char *, s)
1.1 timbl 32: {
33: #define SPACE(c) ((c==' ')||(c=='\t')||(c=='\n'))
34: char * p=s;
2.13 luotonen 35: if (!s) return NULL; /* Doesn't dump core if NULL */
36: for(p=s;*p;p++); /* Find end of string */
1.1 timbl 37: for(p--;p>=s;p--) {
38: if(SPACE(*p)) *p=0; /* Zap trailing blanks */
39: else break;
40: }
41: while(SPACE(*s))s++; /* Strip leading blanks */
42: return s;
43: }
44:
45:
46: /* Scan a filename for its consituents
47: ** -----------------------------------
48: **
49: ** On entry,
50: ** name points to a document name which may be incomplete.
51: ** On exit,
52: ** absolute or relative may be nonzero (but not both).
53: ** host, anchor and access may be nonzero if they were specified.
54: ** Any which are nonzero point to zero terminated strings.
55: */
56: #ifdef __STDC__
57: PRIVATE void scan(char * name, struct struct_parts *parts)
58: #else
59: PRIVATE void scan(name, parts)
60: char * name;
61: struct struct_parts *parts;
62: #endif
63: {
64: char * after_access;
65: char * p;
66: int length = strlen(name);
67:
68: parts->access = 0;
69: parts->host = 0;
70: parts->absolute = 0;
71: parts->relative = 0;
72: parts->anchor = 0;
73:
74: after_access = name;
75: for(p=name; *p; p++) {
76: if (*p==':') {
77: *p = 0;
2.20 timbl 78: parts->access = after_access; /* Scheme has been specified */
1.1 timbl 79: after_access = p+1;
2.22 luotonen 80: if (0==strcasecomp("URL", parts->access)) {
2.20 timbl 81: parts->access = NULL; /* Ignore IETF's URL: pre-prefix */
82: } else break;
1.1 timbl 83: }
2.20 timbl 84: if (*p=='/') break; /* Access has not been specified */
1.1 timbl 85: if (*p=='#') break;
86: }
87:
88: for(p=name+length-1; p>=name; p--) {
89: if (*p =='#') {
90: parts->anchor=p+1;
91: *p=0; /* terminate the rest */
92: }
93: }
94: p = after_access;
95: if (*p=='/'){
96: if (p[1]=='/') {
97: parts->host = p+2; /* host has been specified */
98: *p=0; /* Terminate access */
99: p=strchr(parts->host,'/'); /* look for end of host name if any */
100: if(p) {
101: *p=0; /* Terminate host */
102: parts->absolute = p+1; /* Root has been found */
103: }
104: } else {
105: parts->absolute = p+1; /* Root found but no host */
106: }
107: } else {
108: parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
109: }
110:
2.16 timbl 111: #ifdef OLD_CODE
1.1 timbl 112: /* Access specified but no host: the anchor was not really one
2.16 timbl 113: e.g. news:j462#36487@foo.bar -- JFG 10/jul/92, from bug report */
114: /* This kludge doesn't work for example when coming across
115: file:/usr/local/www/fred#123
116: which loses its anchor. Correct approach in news is to
117: escape weird characters not allowed in URL. TBL 21/dec/93
118: */
1.1 timbl 119: if (parts->access && ! parts->host && parts->anchor) {
120: *(parts->anchor - 1) = '#'; /* Restore the '#' in the address */
121: parts->anchor = 0;
122: }
2.16 timbl 123: #endif
1.1 timbl 124:
125: #ifdef NOT_DEFINED /* search is just treated as part of path */
126: {
127: char *p = relative ? relative : absolute;
128: if (p) {
129: char * q = strchr(p, '?'); /* Any search string? */
130: if (q) {
131: *q = 0; /* If so, chop that off. */
132: parts->search = q+1;
133: }
134: }
135: }
136: #endif
137: } /*scan */
138:
139:
140: /* Parse a Name relative to another name
141: ** -------------------------------------
142: **
143: ** This returns those parts of a name which are given (and requested)
144: ** substituting bits from the related name where necessary.
145: **
146: ** On entry,
147: ** aName A filename given
148: ** relatedName A name relative to which aName is to be parsed
149: ** wanted A mask for the bits which are wanted.
150: **
151: ** On exit,
152: ** returns A pointer to a malloc'd string which MUST BE FREED
153: */
154: #ifdef __STDC__
155: char * HTParse(const char * aName, const char * relatedName, int wanted)
156: #else
157: char * HTParse(aName, relatedName, wanted)
158: char * aName;
159: char * relatedName;
160: int wanted;
161: #endif
162:
163: {
164: char * result = 0;
165: char * return_value = 0;
166: int len;
167: char * name = 0;
168: char * rel = 0;
169: char * p;
2.12 timbl 170: char * access;
1.1 timbl 171: struct struct_parts given, related;
172:
173: /* Make working copies of input strings to cut up:
174: */
175: len = strlen(aName)+strlen(relatedName)+10;
176: result=(char *)malloc(len); /* Lots of space: more than enough */
177: if (result == NULL) outofmem(__FILE__, "HTParse");
178:
179: StrAllocCopy(name, aName);
180: StrAllocCopy(rel, relatedName);
181:
182: scan(name, &given);
183: scan(rel, &related);
184: result[0]=0; /* Clear string */
2.12 timbl 185: access = given.access ? given.access : related.access;
1.1 timbl 186: if (wanted & PARSE_ACCESS)
2.12 timbl 187: if (access) {
188: strcat(result, access);
1.1 timbl 189: if(wanted & PARSE_PUNCTUATION) strcat(result, ":");
190: }
191:
192: if (given.access && related.access) /* If different, inherit nothing. */
193: if (strcmp(given.access, related.access)!=0) {
194: related.host=0;
195: related.absolute=0;
196: related.relative=0;
197: related.anchor=0;
198: }
199:
200: if (wanted & PARSE_HOST)
201: if(given.host || related.host) {
2.12 timbl 202: char * tail = result + strlen(result);
1.1 timbl 203: if(wanted & PARSE_PUNCTUATION) strcat(result, "//");
204: strcat(result, given.host ? given.host : related.host);
2.12 timbl 205: #define CLEAN_URLS
206: #ifdef CLEAN_URLS
207: /* Ignore default port numbers, and trailing dots on FQDNs
208: which will only cause identical adreesses to look different */
209: {
210: char * p;
211: p = strchr(tail, ':');
212: if (p && access) { /* Port specified */
213: if ( ( strcmp(access, "http") == 0
214: && strcmp(p, ":80") == 0 )
215: ||
216: ( strcmp(access, "gopher") == 0
217: && strcmp(p, ":70") == 0 )
218: )
219: *p = (char)0; /* It is the default: ignore it */
220: }
221: if (!p) p = tail + strlen(tail); /* After hostname */
2.21 frystyk 222: if (*p) { /* Henrik 17/04-94 */
223: p--; /* End of hostname */
224: if (*p == '.') *p = (char)0; /* chop final . */
225: }
2.12 timbl 226: }
227: #endif
1.1 timbl 228: }
229:
230: if (given.host && related.host) /* If different hosts, inherit no path. */
231: if (strcmp(given.host, related.host)!=0) {
232: related.absolute=0;
233: related.relative=0;
234: related.anchor=0;
235: }
236:
237: if (wanted & PARSE_PATH) {
238: if(given.absolute) { /* All is given */
239: if(wanted & PARSE_PUNCTUATION) strcat(result, "/");
240: strcat(result, given.absolute);
241: } else if(related.absolute) { /* Adopt path not name */
242: strcat(result, "/");
243: strcat(result, related.absolute);
244: if (given.relative) {
245: p = strchr(result, '?'); /* Search part? */
246: if (!p) p=result+strlen(result)-1;
247: for (; *p!='/'; p--); /* last / */
248: p[1]=0; /* Remove filename */
249: strcat(result, given.relative); /* Add given one */
250: HTSimplify (result);
251: }
252: } else if(given.relative) {
253: strcat(result, given.relative); /* what we've got */
254: } else if(related.relative) {
255: strcat(result, related.relative);
256: } else { /* No inheritance */
257: strcat(result, "/");
258: }
259: }
260:
261: if (wanted & PARSE_ANCHOR)
262: if(given.anchor || related.anchor) {
263: if(wanted & PARSE_PUNCTUATION) strcat(result, "#");
264: strcat(result, given.anchor ? given.anchor : related.anchor);
265: }
266: free(rel);
267: free(name);
268:
269: StrAllocCopy(return_value, result);
270: free(result);
271: return return_value; /* exactly the right length */
272: }
273:
2.11 timbl 274:
2.21 frystyk 275: #if 0 /* NOT USED FOR THE MOMENT */
2.15 luotonen 276: /*
277: ** As strcpy() but guaranteed to work correctly
278: ** with overlapping parameters. AL 7 Feb 1994
279: */
280: PRIVATE void ari_strcpy ARGS2(char *, to,
281: char *, from)
282: {
283: char * tmp;
284:
285: if (!to || !from) return;
286:
287: tmp = (char*)malloc(strlen(from)+1);
288: if (!tmp) outofmem(__FILE__, "my_strcpy");
289:
290: strcpy(tmp, from);
291: strcpy(to, tmp);
292: free(tmp);
293: }
2.21 frystyk 294: #endif
295:
2.20 timbl 296:
297: /* Simplify a URI
298: // --------------
299: // A URI is allowed to contain the seqeunce xxx/../ which may be
1.1 timbl 300: // replaced by "" , and the seqeunce "/./" which may be replaced by "/".
2.20 timbl 301: // Simplification helps us recognize duplicate URIs.
1.1 timbl 302: //
303: // Thus, /etc/junk/../fred becomes /etc/fred
304: // /etc/junk/./fred becomes /etc/junk/fred
2.11 timbl 305: //
306: // but we should NOT change
307: // http://fred.xxx.edu/../..
308: //
309: // or ../../albert.html
2.26 frystyk 310: //
311: // In the same manner, the following prefixed are preserved:
312: //
313: // ./<etc>
314: // //<etc>
315: //
316: // In order to avoid empty URLs the following URLs become:
317: //
318: // /fred/.. becomes /fred/..
319: // /fred/././.. becomes /fred/..
2.27 frystyk 320: // /fred/.././junk/.././ becomes /fred/..
2.26 frystyk 321: //
1.1 timbl 322: */
2.14 luotonen 323: PUBLIC void HTSimplify ARGS1(char *, filename)
1.1 timbl 324: {
2.19 frystyk 325: int tokcnt = 0;
326: char *strptr;
327: char *urlptr;
2.27 frystyk 328: BOOL prefix = NO; /* If prefix == YES then we can delete all segments */
2.19 frystyk 329: if (!filename || !*filename) /* Just to be sure! */
330: return;
331:
2.27 frystyk 332: if (TRACE)
333: fprintf(stderr, "HTSimplify.. `%s\' ", filename);
334:
2.19 frystyk 335: /* Skip prefix, starting ./ and starting ///<etc> */
2.27 frystyk 336: if ((urlptr = strstr(filename, "://")) != NULL) { /* Find prefix */
337: urlptr += 3;
338: prefix = YES;
339: } else if ((urlptr = strstr(filename, ":/")) != NULL) {
340: urlptr += 2;
341: prefix = YES;
342: } else
343: urlptr = filename;
344: if (*urlptr == '.' && *(urlptr+1) == '/') { /* Starting ./<etc> */
2.19 frystyk 345: urlptr += 2;
2.27 frystyk 346: prefix = YES;
347: } else if (*urlptr == '/') { /* Some URLs start //<file> */
2.19 frystyk 348: while (*++urlptr == '/');
2.27 frystyk 349: prefix = YES;
2.19 frystyk 350: }
2.27 frystyk 351: if (!*urlptr) { /* If nothing left */
352: if (TRACE)
353: fprintf(stderr, "No simplification possible\n");
2.19 frystyk 354: return;
2.27 frystyk 355: }
2.19 frystyk 356:
357: /* Now we have the string we want to work with */
358: strptr = urlptr;
359: while (*strptr++) { /* Count number of delimiters */
360: if (*strptr == '/')
361: tokcnt++;
362: }
363: {
364: BOOL slashtail = NO;
2.27 frystyk 365: int segcnt = 0; /* Number of 'real segments' (not '.' and '..') */
2.19 frystyk 366: char *empty = "";
367: char *url = NULL;
368: char **tokptr;
369: char **tokstart;
370: StrAllocCopy(url, urlptr);
371:
372: /* Does the URL end with a slash? */
373: if(*(filename+strlen(filename)-1) == '/')
374: slashtail = YES;
375:
376: /* I allocate cnt+2 as I don't know if the url is terminated by '/' */
377: if ((tokstart = (char **) calloc(tokcnt+2, sizeof(char *))) == NULL)
378: outofmem(__FILE__, "HTSimplify");
379:
2.27 frystyk 380: /* Read the tokens forwards and count `real' segments */
2.19 frystyk 381: tokptr = tokstart;
2.27 frystyk 382: *tokptr = strtok(url, "/");
383: if (strcmp(*tokptr, ".") && strcmp(*tokptr, ".."))
384: segcnt++;
385: tokptr++;
386: while ((strptr = strtok(NULL, "/")) != NULL) {
387: if (strcmp(strptr, ".") && strcmp(strptr, ".."))
388: segcnt++;
389: else if (!strcmp(strptr, "..") && !segcnt)
390: prefix = YES;
2.19 frystyk 391: *tokptr++ = strptr;
2.27 frystyk 392: }
393:
2.19 frystyk 394: /* Scan backwards for '.' and '..' */
395: tokptr--;
396: while(tokptr >= tokstart) {
397: if (!strcmp(*tokptr, ".")) {
398: *tokptr = empty;
399: } else if (!strcmp(*tokptr, "..")) {
400: char **pptr = tokptr-1;
401: while (pptr >= tokstart) {
2.26 frystyk 402: if (**pptr && strcmp(*pptr, "..") && strcmp(*pptr, ".") &&
2.27 frystyk 403: (segcnt > 1 || prefix)) {
2.19 frystyk 404: *pptr = empty;
405: *tokptr = empty;
2.27 frystyk 406: segcnt--;
2.19 frystyk 407: break;
408: }
409: pptr--;
410: }
411: }
412: tokptr--;
413: }
414:
415: /* Write the rest out forwards */
416: *urlptr = '\0';
417: while (*++tokptr) {
418: if (**tokptr) {
2.27 frystyk 419: if (*urlptr) /* Don't want two in the beginning */
420: strcat(urlptr, "/");
2.19 frystyk 421: strcat(urlptr, *tokptr);
422: }
423: }
2.27 frystyk 424:
425: if (slashtail == YES && *(urlptr+(int)strlen(urlptr)-1) != '/')
2.19 frystyk 426: strcat(urlptr, "/");
427: free(url);
428: free(tokstart);
429: }
430: if (TRACE)
2.27 frystyk 431: fprintf(stderr, "into\n............ `%s'\n", filename);
2.19 frystyk 432: }
433: #ifdef OLD_CODE
2.17 frystyk 434: char * p = filename;
1.1 timbl 435: char * q;
2.17 frystyk 436:
437: if (p) {
438: while (*p && (*p == '/' || *p == '.')) /* Pass starting / or .'s */
439: p++;
440: while(*p) {
441: if (*p=='/') {
1.1 timbl 442: if ((p[1]=='.') && (p[2]=='.') && (p[3]=='/' || !p[3] )) {
2.11 timbl 443: for (q=p-1; (q>=filename) && (*q!='/'); q--); /* prev slash */
444: if (q[0]=='/' && 0!=strncmp(q, "/../", 4)
445: &&!(q-1>filename && q[-1]=='/')) {
2.15 luotonen 446: ari_strcpy(q, p+3); /* Remove /xxx/.. */
1.1 timbl 447: if (!*filename) strcpy(filename, "/");
448: p = q-1; /* Start again with prev slash */
2.11 timbl 449: } else { /* xxx/.. leave it! */
2.9 timbl 450: #ifdef BUG_CODE
2.15 luotonen 451: ari_strcpy(filename, p[3] ? p+4 : p+3); /* rm xxx/../ */
1.1 timbl 452: p = filename; /* Start again */
2.9 timbl 453: #endif
1.1 timbl 454: }
455: } else if ((p[1]=='.') && (p[2]=='/' || !p[2])) {
2.15 luotonen 456: ari_strcpy(p, p+2); /* Remove a slash and a dot */
2.13 luotonen 457: } else if (p[-1] != ':') {
458: while (p[1] == '/') {
2.15 luotonen 459: ari_strcpy(p, p+1); /* Remove multiple slashes */
2.13 luotonen 460: }
1.1 timbl 461: }
2.17 frystyk 462: }
463: p++;
464: } /* end while (*p) */
465: } /* end if (p) */
1.1 timbl 466: }
2.19 frystyk 467: #endif /* OLD_CODE */
1.1 timbl 468:
469:
470: /* Make Relative Name
471: ** ------------------
472: **
473: ** This function creates and returns a string which gives an expression of
474: ** one address as related to another. Where there is no relation, an absolute
475: ** address is retured.
476: **
477: ** On entry,
478: ** Both names must be absolute, fully qualified names of nodes
479: ** (no anchor bits)
480: **
481: ** On exit,
482: ** The return result points to a newly allocated name which, if
483: ** parsed by HTParse relative to relatedName, will yield aName.
484: ** The caller is responsible for freeing the resulting name later.
485: **
486: */
487: #ifdef __STDC__
488: char * HTRelative(const char * aName, const char *relatedName)
489: #else
490: char * HTRelative(aName, relatedName)
491: char * aName;
492: char * relatedName;
493: #endif
494: {
495: char * result = 0;
496: CONST char *p = aName;
497: CONST char *q = relatedName;
498: CONST char * after_access = 0;
499: CONST char * path = 0;
500: CONST char * last_slash = 0;
501: int slashes = 0;
502:
503: for(;*p; p++, q++) { /* Find extent of match */
504: if (*p!=*q) break;
505: if (*p==':') after_access = p+1;
506: if (*p=='/') {
507: last_slash = p;
508: slashes++;
509: if (slashes==3) path=p;
510: }
511: }
512:
513: /* q, p point to the first non-matching character or zero */
514:
515: if (!after_access) { /* Different access */
516: StrAllocCopy(result, aName);
517: } else if (slashes<3){ /* Different nodes */
518: StrAllocCopy(result, after_access);
2.29 ! frystyk 519: #if 0 /* Henrik */
1.1 timbl 520: } else if (slashes==3){ /* Same node, different path */
521: StrAllocCopy(result, path);
2.21 frystyk 522: #endif
1.1 timbl 523: } else { /* Some path in common */
524: int levels= 0;
525: for(; *q && (*q!='#'); q++) if (*q=='/') levels++;
526: result = (char *)malloc(3*levels + strlen(last_slash) + 1);
527: if (result == NULL) outofmem(__FILE__, "HTRelative");
528: result[0]=0;
529: for(;levels; levels--)strcat(result, "../");
530: strcat(result, last_slash+1);
531: }
2.21 frystyk 532: if (TRACE) fprintf(stderr,
533: "HTRelative.. `%s' expressed relative to `%s' is `%s'\n",
534: aName, relatedName, result);
1.1 timbl 535: return result;
536: }
2.1 timbl 537:
538:
2.6 timbl 539: /* Escape undesirable characters using % HTEscape()
540: ** -------------------------------------
541: **
542: ** This function takes a pointer to a string in which
543: ** some characters may be unacceptable unescaped.
544: ** It returns a string which has these characters
545: ** represented by a '%' character followed by two hex digits.
546: **
2.20 timbl 547: ** In the tradition of being conservative in what you do and liberal
548: ** in what you accept, we encode some characters which in fact are
549: ** allowed in URLs unencoded -- so DON'T use the table below for
550: ** parsing!
551: **
2.6 timbl 552: ** Unlike HTUnEscape(), this routine returns a malloced string.
2.20 timbl 553: **
2.6 timbl 554: */
555:
2.20 timbl 556: /* Not BOTH static AND const at the same time in gcc :-(, Henrik 18/03-94
557: ** code gen error in gcc when making random access to
558: ** static const table(!!) */
2.19 frystyk 559: /* PRIVATE CONST unsigned char isAcceptable[96] = */
560: PRIVATE unsigned char isAcceptable[96] =
2.6 timbl 561:
2.20 timbl 562: /* Overencodes */
2.6 timbl 563: /* Bit 0 xalpha -- see HTFile.h
564: ** Bit 1 xpalpha -- as xalpha but with plus.
2.20 timbl 565: ** Bit 2 ... path -- as xpalpha but with /
2.6 timbl 566: */
567: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
568: { 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
569: 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
570: 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
571: 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
572: 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
573: 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
574:
575: PRIVATE char *hex = "0123456789ABCDEF";
576:
2.8 timbl 577: PUBLIC char * HTEscape ARGS2 (CONST char *, str,
2.6 timbl 578: unsigned char, mask)
579: {
580: #define ACCEPTABLE(a) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
581: CONST char * p;
582: char * q;
583: char * result;
584: int unacceptable = 0;
585: for(p=str; *p; p++)
586: if (!ACCEPTABLE((unsigned char)TOASCII(*p)))
587: unacceptable++;
588: result = (char *) malloc(p-str + unacceptable+ unacceptable + 1);
589: if (result == NULL) outofmem(__FILE__, "HTEscape");
590: for(q=result, p=str; *p; p++) {
591: unsigned char a = TOASCII(*p);
592: if (!ACCEPTABLE(a)) {
593: *q++ = HEX_ESCAPE; /* Means hex commming */
594: *q++ = hex[a >> 4];
595: *q++ = hex[a & 15];
596: }
597: else *q++ = *p;
598: }
599: *q++ = 0; /* Terminate */
600: return result;
601: }
602:
603:
2.1 timbl 604: /* Decode %xx escaped characters HTUnEscape()
605: ** -----------------------------
606: **
607: ** This function takes a pointer to a string in which some
608: ** characters may have been encoded in %xy form, where xy is
609: ** the acsii hex code for character 16x+y.
610: ** The string is converted in place, as it will never grow.
611: */
612:
613: PRIVATE char from_hex ARGS1(char, c)
614: {
2.6 timbl 615: return c >= '0' && c <= '9' ? c - '0'
616: : c >= 'A' && c <= 'F'? c - 'A' + 10
617: : c - 'a' + 10; /* accept small letters just in case */
2.1 timbl 618: }
619:
620: PUBLIC char * HTUnEscape ARGS1( char *, str)
621: {
622: char * p = str;
623: char * q = str;
2.25 frystyk 624:
625: if (!str) { /* Just for safety ;-) */
626: if (TRACE)
627: fprintf(stderr, "HTUnEscape.. Called with NULL argument.\n");
628: return "";
629: }
2.1 timbl 630: while(*p) {
2.6 timbl 631: if (*p == HEX_ESCAPE) {
2.1 timbl 632: p++;
633: if (*p) *q = from_hex(*p++) * 16;
634: if (*p) *q = FROMASCII(*q + from_hex(*p++));
635: q++;
636: } else {
637: *q++ = *p++;
638: }
639: }
640:
641: *q++ = 0;
642: return str;
643:
644: } /* HTUnEscape */
645:
646:
2.24 luotonen 647: /* HTCleanTelnetString()
648: * Make sure that the given string doesn't contain characters that
649: * could cause security holes, such as newlines in ftp, gopher,
650: * news or telnet URLs; more specifically: allows everything between
2.26 frystyk 651: * ASCII 20-7E, and also A0-FE, inclusive. Also TAB ('\t') allowed!
2.24 luotonen 652: *
653: * On entry,
654: * str the string that is *modified* if necessary. The
655: * string will be truncated at the first illegal
656: * character that is encountered.
657: * On exit,
658: * returns YES, if the string was modified.
659: * NO, otherwise.
660: */
661: PUBLIC BOOL HTCleanTelnetString ARGS1(char *, str)
662: {
663: char * cur = str;
664:
665: if (!str) return NO;
666:
667: while (*cur) {
668: int a = TOASCII(*cur);
2.26 frystyk 669: if (a != 0x9 && (a < 0x20 || (a > 0x7E && a < 0xA0) || a > 0xFE)) {
2.24 luotonen 670: CTRACE(stderr, "Illegal..... character in URL: \"%s\"\n",str);
671: *cur = 0;
672: CTRACE(stderr, "Truncated... \"%s\"\n",str);
673: return YES;
674: }
675: cur++;
676: }
677: return NO;
678: }
679:
Webmaster