Annotation of libwww/Library/src/HTParse.c, revision 2.30
1.1 timbl 1: /* Parse HyperText Document Address HTParse.c
2: ** ================================
2.26 frystyk 3: **
4: ** history:
5: ** May 12 94 TAB added as legal char in HTCleanTelnetString
6: **
1.1 timbl 7: */
2.27 frystyk 8: #include "tcp.h"
1.1 timbl 9: #include "HTUtils.h"
10: #include "HTParse.h"
11:
2.6 timbl 12: #define HEX_ESCAPE '%'
13:
1.1 timbl 14: struct struct_parts {
2.20 timbl 15: char * access; /* Now known as "scheme" */
1.1 timbl 16: char * host;
17: char * absolute;
18: char * relative;
19: /* char * search; no - treated as part of path */
20: char * anchor;
21: };
22:
23: /* Strip white space off a string
24: ** ------------------------------
25: **
26: ** On exit,
27: ** Return value points to first non-white character, or to 0 if none.
28: ** All trailing white space is OVERWRITTEN with zero.
29: */
30:
2.13 luotonen 31: PUBLIC char * HTStrip ARGS1(char *, s)
1.1 timbl 32: {
33: #define SPACE(c) ((c==' ')||(c=='\t')||(c=='\n'))
34: char * p=s;
2.13 luotonen 35: if (!s) return NULL; /* Doesn't dump core if NULL */
36: for(p=s;*p;p++); /* Find end of string */
1.1 timbl 37: for(p--;p>=s;p--) {
38: if(SPACE(*p)) *p=0; /* Zap trailing blanks */
39: else break;
40: }
41: while(SPACE(*s))s++; /* Strip leading blanks */
42: return s;
43: }
44:
45:
46: /* Scan a filename for its consituents
47: ** -----------------------------------
48: **
49: ** On entry,
50: ** name points to a document name which may be incomplete.
51: ** On exit,
52: ** absolute or relative may be nonzero (but not both).
53: ** host, anchor and access may be nonzero if they were specified.
54: ** Any which are nonzero point to zero terminated strings.
55: */
56: #ifdef __STDC__
57: PRIVATE void scan(char * name, struct struct_parts *parts)
58: #else
59: PRIVATE void scan(name, parts)
60: char * name;
61: struct struct_parts *parts;
62: #endif
63: {
64: char * after_access;
65: char * p;
66: int length = strlen(name);
67:
68: parts->access = 0;
69: parts->host = 0;
70: parts->absolute = 0;
71: parts->relative = 0;
72: parts->anchor = 0;
73:
74: after_access = name;
75: for(p=name; *p; p++) {
76: if (*p==':') {
77: *p = 0;
2.20 timbl 78: parts->access = after_access; /* Scheme has been specified */
1.1 timbl 79: after_access = p+1;
2.22 luotonen 80: if (0==strcasecomp("URL", parts->access)) {
2.20 timbl 81: parts->access = NULL; /* Ignore IETF's URL: pre-prefix */
82: } else break;
1.1 timbl 83: }
2.20 timbl 84: if (*p=='/') break; /* Access has not been specified */
1.1 timbl 85: if (*p=='#') break;
86: }
87:
88: for(p=name+length-1; p>=name; p--) {
89: if (*p =='#') {
90: parts->anchor=p+1;
91: *p=0; /* terminate the rest */
92: }
93: }
94: p = after_access;
95: if (*p=='/'){
96: if (p[1]=='/') {
97: parts->host = p+2; /* host has been specified */
98: *p=0; /* Terminate access */
99: p=strchr(parts->host,'/'); /* look for end of host name if any */
100: if(p) {
101: *p=0; /* Terminate host */
102: parts->absolute = p+1; /* Root has been found */
103: }
104: } else {
105: parts->absolute = p+1; /* Root found but no host */
106: }
107: } else {
108: parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
109: }
110:
2.16 timbl 111: #ifdef OLD_CODE
1.1 timbl 112: /* Access specified but no host: the anchor was not really one
2.16 timbl 113: e.g. news:j462#36487@foo.bar -- JFG 10/jul/92, from bug report */
114: /* This kludge doesn't work for example when coming across
115: file:/usr/local/www/fred#123
116: which loses its anchor. Correct approach in news is to
117: escape weird characters not allowed in URL. TBL 21/dec/93
118: */
1.1 timbl 119: if (parts->access && ! parts->host && parts->anchor) {
120: *(parts->anchor - 1) = '#'; /* Restore the '#' in the address */
121: parts->anchor = 0;
122: }
2.16 timbl 123: #endif
1.1 timbl 124:
125: #ifdef NOT_DEFINED /* search is just treated as part of path */
126: {
127: char *p = relative ? relative : absolute;
128: if (p) {
129: char * q = strchr(p, '?'); /* Any search string? */
130: if (q) {
131: *q = 0; /* If so, chop that off. */
132: parts->search = q+1;
133: }
134: }
135: }
136: #endif
137: } /*scan */
138:
139:
140: /* Parse a Name relative to another name
141: ** -------------------------------------
142: **
143: ** This returns those parts of a name which are given (and requested)
144: ** substituting bits from the related name where necessary.
145: **
146: ** On entry,
147: ** aName A filename given
148: ** relatedName A name relative to which aName is to be parsed
149: ** wanted A mask for the bits which are wanted.
150: **
151: ** On exit,
152: ** returns A pointer to a malloc'd string which MUST BE FREED
153: */
154: #ifdef __STDC__
155: char * HTParse(const char * aName, const char * relatedName, int wanted)
156: #else
157: char * HTParse(aName, relatedName, wanted)
158: char * aName;
159: char * relatedName;
160: int wanted;
161: #endif
162:
163: {
164: char * result = 0;
165: char * return_value = 0;
166: int len;
167: char * name = 0;
168: char * rel = 0;
169: char * p;
2.12 timbl 170: char * access;
1.1 timbl 171: struct struct_parts given, related;
172:
173: /* Make working copies of input strings to cut up:
174: */
175: len = strlen(aName)+strlen(relatedName)+10;
176: result=(char *)malloc(len); /* Lots of space: more than enough */
177: if (result == NULL) outofmem(__FILE__, "HTParse");
178:
179: StrAllocCopy(name, aName);
180: StrAllocCopy(rel, relatedName);
181:
182: scan(name, &given);
183: scan(rel, &related);
184: result[0]=0; /* Clear string */
2.12 timbl 185: access = given.access ? given.access : related.access;
1.1 timbl 186: if (wanted & PARSE_ACCESS)
2.12 timbl 187: if (access) {
188: strcat(result, access);
1.1 timbl 189: if(wanted & PARSE_PUNCTUATION) strcat(result, ":");
190: }
191:
192: if (given.access && related.access) /* If different, inherit nothing. */
193: if (strcmp(given.access, related.access)!=0) {
194: related.host=0;
195: related.absolute=0;
196: related.relative=0;
197: related.anchor=0;
198: }
199:
200: if (wanted & PARSE_HOST)
201: if(given.host || related.host) {
2.12 timbl 202: char * tail = result + strlen(result);
1.1 timbl 203: if(wanted & PARSE_PUNCTUATION) strcat(result, "//");
204: strcat(result, given.host ? given.host : related.host);
2.12 timbl 205: #define CLEAN_URLS
206: #ifdef CLEAN_URLS
207: /* Ignore default port numbers, and trailing dots on FQDNs
208: which will only cause identical adreesses to look different */
209: {
210: char * p;
211: p = strchr(tail, ':');
212: if (p && access) { /* Port specified */
213: if ( ( strcmp(access, "http") == 0
214: && strcmp(p, ":80") == 0 )
215: ||
216: ( strcmp(access, "gopher") == 0
217: && strcmp(p, ":70") == 0 )
218: )
219: *p = (char)0; /* It is the default: ignore it */
220: }
221: if (!p) p = tail + strlen(tail); /* After hostname */
2.21 frystyk 222: if (*p) { /* Henrik 17/04-94 */
223: p--; /* End of hostname */
224: if (*p == '.') *p = (char)0; /* chop final . */
225: }
2.12 timbl 226: }
227: #endif
1.1 timbl 228: }
229:
230: if (given.host && related.host) /* If different hosts, inherit no path. */
231: if (strcmp(given.host, related.host)!=0) {
232: related.absolute=0;
233: related.relative=0;
234: related.anchor=0;
235: }
236:
237: if (wanted & PARSE_PATH) {
238: if(given.absolute) { /* All is given */
239: if(wanted & PARSE_PUNCTUATION) strcat(result, "/");
240: strcat(result, given.absolute);
241: } else if(related.absolute) { /* Adopt path not name */
242: strcat(result, "/");
243: strcat(result, related.absolute);
244: if (given.relative) {
245: p = strchr(result, '?'); /* Search part? */
246: if (!p) p=result+strlen(result)-1;
247: for (; *p!='/'; p--); /* last / */
248: p[1]=0; /* Remove filename */
249: strcat(result, given.relative); /* Add given one */
250: HTSimplify (result);
251: }
252: } else if(given.relative) {
253: strcat(result, given.relative); /* what we've got */
254: } else if(related.relative) {
255: strcat(result, related.relative);
256: } else { /* No inheritance */
257: strcat(result, "/");
258: }
259: }
260:
261: if (wanted & PARSE_ANCHOR)
262: if(given.anchor || related.anchor) {
263: if(wanted & PARSE_PUNCTUATION) strcat(result, "#");
264: strcat(result, given.anchor ? given.anchor : related.anchor);
265: }
266: free(rel);
267: free(name);
268:
269: StrAllocCopy(return_value, result);
270: free(result);
271: return return_value; /* exactly the right length */
272: }
273:
2.11 timbl 274:
2.21 frystyk 275: #if 0 /* NOT USED FOR THE MOMENT */
2.15 luotonen 276: /*
277: ** As strcpy() but guaranteed to work correctly
278: ** with overlapping parameters. AL 7 Feb 1994
279: */
280: PRIVATE void ari_strcpy ARGS2(char *, to,
281: char *, from)
282: {
283: char * tmp;
284:
285: if (!to || !from) return;
286:
287: tmp = (char*)malloc(strlen(from)+1);
288: if (!tmp) outofmem(__FILE__, "my_strcpy");
289:
290: strcpy(tmp, from);
291: strcpy(to, tmp);
292: free(tmp);
293: }
2.21 frystyk 294: #endif
295:
2.20 timbl 296:
297: /* Simplify a URI
298: // --------------
299: // A URI is allowed to contain the seqeunce xxx/../ which may be
1.1 timbl 300: // replaced by "" , and the seqeunce "/./" which may be replaced by "/".
2.20 timbl 301: // Simplification helps us recognize duplicate URIs.
1.1 timbl 302: //
303: // Thus, /etc/junk/../fred becomes /etc/fred
304: // /etc/junk/./fred becomes /etc/junk/fred
2.11 timbl 305: //
306: // but we should NOT change
307: // http://fred.xxx.edu/../..
308: //
309: // or ../../albert.html
2.26 frystyk 310: //
311: // In the same manner, the following prefixed are preserved:
312: //
313: // ./<etc>
314: // //<etc>
315: //
316: // In order to avoid empty URLs the following URLs become:
317: //
318: // /fred/.. becomes /fred/..
319: // /fred/././.. becomes /fred/..
2.27 frystyk 320: // /fred/.././junk/.././ becomes /fred/..
2.26 frystyk 321: //
2.30 ! frystyk 322: // If more than one set of `://' is found (several proxies in cascade) then
! 323: // only the part after the last `://' is simplified.
1.1 timbl 324: */
2.14 luotonen 325: PUBLIC void HTSimplify ARGS1(char *, filename)
1.1 timbl 326: {
2.19 frystyk 327: int tokcnt = 0;
328: char *strptr;
329: char *urlptr;
2.27 frystyk 330: BOOL prefix = NO; /* If prefix == YES then we can delete all segments */
2.19 frystyk 331: if (!filename || !*filename) /* Just to be sure! */
332: return;
333:
2.27 frystyk 334: if (TRACE)
335: fprintf(stderr, "HTSimplify.. `%s\' ", filename);
336:
2.19 frystyk 337: /* Skip prefix, starting ./ and starting ///<etc> */
2.27 frystyk 338: if ((urlptr = strstr(filename, "://")) != NULL) { /* Find prefix */
2.30 ! frystyk 339: char *newptr;
2.27 frystyk 340: urlptr += 3;
2.30 ! frystyk 341: while ((newptr = strstr(urlptr, "://")) != NULL)
! 342: urlptr = newptr+3;
2.27 frystyk 343: prefix = YES;
344: } else if ((urlptr = strstr(filename, ":/")) != NULL) {
345: urlptr += 2;
346: prefix = YES;
347: } else
348: urlptr = filename;
349: if (*urlptr == '.' && *(urlptr+1) == '/') { /* Starting ./<etc> */
2.19 frystyk 350: urlptr += 2;
2.27 frystyk 351: prefix = YES;
352: } else if (*urlptr == '/') { /* Some URLs start //<file> */
2.19 frystyk 353: while (*++urlptr == '/');
2.27 frystyk 354: prefix = YES;
2.19 frystyk 355: }
2.27 frystyk 356: if (!*urlptr) { /* If nothing left */
357: if (TRACE)
358: fprintf(stderr, "No simplification possible\n");
2.19 frystyk 359: return;
2.27 frystyk 360: }
2.19 frystyk 361:
362: /* Now we have the string we want to work with */
363: strptr = urlptr;
364: while (*strptr++) { /* Count number of delimiters */
365: if (*strptr == '/')
366: tokcnt++;
367: }
368: {
369: BOOL slashtail = NO;
2.27 frystyk 370: int segcnt = 0; /* Number of 'real segments' (not '.' and '..') */
2.19 frystyk 371: char *empty = "";
372: char *url = NULL;
373: char **tokptr;
374: char **tokstart;
375: StrAllocCopy(url, urlptr);
376:
377: /* Does the URL end with a slash? */
378: if(*(filename+strlen(filename)-1) == '/')
379: slashtail = YES;
380:
381: /* I allocate cnt+2 as I don't know if the url is terminated by '/' */
382: if ((tokstart = (char **) calloc(tokcnt+2, sizeof(char *))) == NULL)
383: outofmem(__FILE__, "HTSimplify");
384:
2.27 frystyk 385: /* Read the tokens forwards and count `real' segments */
2.19 frystyk 386: tokptr = tokstart;
2.27 frystyk 387: *tokptr = strtok(url, "/");
388: if (strcmp(*tokptr, ".") && strcmp(*tokptr, ".."))
389: segcnt++;
390: tokptr++;
391: while ((strptr = strtok(NULL, "/")) != NULL) {
392: if (strcmp(strptr, ".") && strcmp(strptr, ".."))
393: segcnt++;
394: else if (!strcmp(strptr, "..") && !segcnt)
395: prefix = YES;
2.19 frystyk 396: *tokptr++ = strptr;
2.27 frystyk 397: }
398:
2.19 frystyk 399: /* Scan backwards for '.' and '..' */
400: tokptr--;
401: while(tokptr >= tokstart) {
402: if (!strcmp(*tokptr, ".")) {
403: *tokptr = empty;
404: } else if (!strcmp(*tokptr, "..")) {
405: char **pptr = tokptr-1;
406: while (pptr >= tokstart) {
2.26 frystyk 407: if (**pptr && strcmp(*pptr, "..") && strcmp(*pptr, ".") &&
2.27 frystyk 408: (segcnt > 1 || prefix)) {
2.19 frystyk 409: *pptr = empty;
410: *tokptr = empty;
2.27 frystyk 411: segcnt--;
2.19 frystyk 412: break;
413: }
414: pptr--;
415: }
416: }
417: tokptr--;
418: }
419:
420: /* Write the rest out forwards */
421: *urlptr = '\0';
422: while (*++tokptr) {
423: if (**tokptr) {
2.27 frystyk 424: if (*urlptr) /* Don't want two in the beginning */
425: strcat(urlptr, "/");
2.19 frystyk 426: strcat(urlptr, *tokptr);
427: }
428: }
2.27 frystyk 429:
430: if (slashtail == YES && *(urlptr+(int)strlen(urlptr)-1) != '/')
2.19 frystyk 431: strcat(urlptr, "/");
432: free(url);
433: free(tokstart);
434: }
435: if (TRACE)
2.27 frystyk 436: fprintf(stderr, "into\n............ `%s'\n", filename);
2.19 frystyk 437: }
438: #ifdef OLD_CODE
2.17 frystyk 439: char * p = filename;
1.1 timbl 440: char * q;
2.17 frystyk 441:
442: if (p) {
443: while (*p && (*p == '/' || *p == '.')) /* Pass starting / or .'s */
444: p++;
445: while(*p) {
446: if (*p=='/') {
1.1 timbl 447: if ((p[1]=='.') && (p[2]=='.') && (p[3]=='/' || !p[3] )) {
2.11 timbl 448: for (q=p-1; (q>=filename) && (*q!='/'); q--); /* prev slash */
449: if (q[0]=='/' && 0!=strncmp(q, "/../", 4)
450: &&!(q-1>filename && q[-1]=='/')) {
2.15 luotonen 451: ari_strcpy(q, p+3); /* Remove /xxx/.. */
1.1 timbl 452: if (!*filename) strcpy(filename, "/");
453: p = q-1; /* Start again with prev slash */
2.11 timbl 454: } else { /* xxx/.. leave it! */
2.9 timbl 455: #ifdef BUG_CODE
2.15 luotonen 456: ari_strcpy(filename, p[3] ? p+4 : p+3); /* rm xxx/../ */
1.1 timbl 457: p = filename; /* Start again */
2.9 timbl 458: #endif
1.1 timbl 459: }
460: } else if ((p[1]=='.') && (p[2]=='/' || !p[2])) {
2.15 luotonen 461: ari_strcpy(p, p+2); /* Remove a slash and a dot */
2.13 luotonen 462: } else if (p[-1] != ':') {
463: while (p[1] == '/') {
2.15 luotonen 464: ari_strcpy(p, p+1); /* Remove multiple slashes */
2.13 luotonen 465: }
1.1 timbl 466: }
2.17 frystyk 467: }
468: p++;
469: } /* end while (*p) */
470: } /* end if (p) */
1.1 timbl 471: }
2.19 frystyk 472: #endif /* OLD_CODE */
1.1 timbl 473:
474:
475: /* Make Relative Name
476: ** ------------------
477: **
478: ** This function creates and returns a string which gives an expression of
479: ** one address as related to another. Where there is no relation, an absolute
480: ** address is retured.
481: **
482: ** On entry,
483: ** Both names must be absolute, fully qualified names of nodes
484: ** (no anchor bits)
485: **
486: ** On exit,
487: ** The return result points to a newly allocated name which, if
488: ** parsed by HTParse relative to relatedName, will yield aName.
489: ** The caller is responsible for freeing the resulting name later.
490: **
491: */
492: #ifdef __STDC__
493: char * HTRelative(const char * aName, const char *relatedName)
494: #else
495: char * HTRelative(aName, relatedName)
496: char * aName;
497: char * relatedName;
498: #endif
499: {
500: char * result = 0;
501: CONST char *p = aName;
502: CONST char *q = relatedName;
503: CONST char * after_access = 0;
504: CONST char * path = 0;
505: CONST char * last_slash = 0;
506: int slashes = 0;
507:
508: for(;*p; p++, q++) { /* Find extent of match */
509: if (*p!=*q) break;
510: if (*p==':') after_access = p+1;
511: if (*p=='/') {
512: last_slash = p;
513: slashes++;
514: if (slashes==3) path=p;
515: }
516: }
517:
518: /* q, p point to the first non-matching character or zero */
519:
520: if (!after_access) { /* Different access */
521: StrAllocCopy(result, aName);
522: } else if (slashes<3){ /* Different nodes */
523: StrAllocCopy(result, after_access);
2.29 frystyk 524: #if 0 /* Henrik */
1.1 timbl 525: } else if (slashes==3){ /* Same node, different path */
526: StrAllocCopy(result, path);
2.21 frystyk 527: #endif
1.1 timbl 528: } else { /* Some path in common */
529: int levels= 0;
530: for(; *q && (*q!='#'); q++) if (*q=='/') levels++;
531: result = (char *)malloc(3*levels + strlen(last_slash) + 1);
532: if (result == NULL) outofmem(__FILE__, "HTRelative");
533: result[0]=0;
534: for(;levels; levels--)strcat(result, "../");
535: strcat(result, last_slash+1);
536: }
2.21 frystyk 537: if (TRACE) fprintf(stderr,
538: "HTRelative.. `%s' expressed relative to `%s' is `%s'\n",
539: aName, relatedName, result);
1.1 timbl 540: return result;
541: }
2.1 timbl 542:
543:
2.6 timbl 544: /* Escape undesirable characters using % HTEscape()
545: ** -------------------------------------
546: **
547: ** This function takes a pointer to a string in which
548: ** some characters may be unacceptable unescaped.
549: ** It returns a string which has these characters
550: ** represented by a '%' character followed by two hex digits.
551: **
2.20 timbl 552: ** In the tradition of being conservative in what you do and liberal
553: ** in what you accept, we encode some characters which in fact are
554: ** allowed in URLs unencoded -- so DON'T use the table below for
555: ** parsing!
556: **
2.6 timbl 557: ** Unlike HTUnEscape(), this routine returns a malloced string.
2.20 timbl 558: **
2.6 timbl 559: */
560:
2.20 timbl 561: /* Not BOTH static AND const at the same time in gcc :-(, Henrik 18/03-94
562: ** code gen error in gcc when making random access to
563: ** static const table(!!) */
2.19 frystyk 564: /* PRIVATE CONST unsigned char isAcceptable[96] = */
565: PRIVATE unsigned char isAcceptable[96] =
2.6 timbl 566:
2.20 timbl 567: /* Overencodes */
2.6 timbl 568: /* Bit 0 xalpha -- see HTFile.h
569: ** Bit 1 xpalpha -- as xalpha but with plus.
2.20 timbl 570: ** Bit 2 ... path -- as xpalpha but with /
2.6 timbl 571: */
572: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
573: { 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
574: 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
575: 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
576: 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
577: 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
578: 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
579:
580: PRIVATE char *hex = "0123456789ABCDEF";
581:
2.8 timbl 582: PUBLIC char * HTEscape ARGS2 (CONST char *, str,
2.6 timbl 583: unsigned char, mask)
584: {
585: #define ACCEPTABLE(a) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
586: CONST char * p;
587: char * q;
588: char * result;
589: int unacceptable = 0;
590: for(p=str; *p; p++)
591: if (!ACCEPTABLE((unsigned char)TOASCII(*p)))
592: unacceptable++;
593: result = (char *) malloc(p-str + unacceptable+ unacceptable + 1);
594: if (result == NULL) outofmem(__FILE__, "HTEscape");
595: for(q=result, p=str; *p; p++) {
596: unsigned char a = TOASCII(*p);
597: if (!ACCEPTABLE(a)) {
598: *q++ = HEX_ESCAPE; /* Means hex commming */
599: *q++ = hex[a >> 4];
600: *q++ = hex[a & 15];
601: }
602: else *q++ = *p;
603: }
604: *q++ = 0; /* Terminate */
605: return result;
606: }
607:
608:
2.1 timbl 609: /* Decode %xx escaped characters HTUnEscape()
610: ** -----------------------------
611: **
612: ** This function takes a pointer to a string in which some
613: ** characters may have been encoded in %xy form, where xy is
614: ** the acsii hex code for character 16x+y.
615: ** The string is converted in place, as it will never grow.
616: */
617:
618: PRIVATE char from_hex ARGS1(char, c)
619: {
2.6 timbl 620: return c >= '0' && c <= '9' ? c - '0'
621: : c >= 'A' && c <= 'F'? c - 'A' + 10
622: : c - 'a' + 10; /* accept small letters just in case */
2.1 timbl 623: }
624:
625: PUBLIC char * HTUnEscape ARGS1( char *, str)
626: {
627: char * p = str;
628: char * q = str;
2.25 frystyk 629:
630: if (!str) { /* Just for safety ;-) */
631: if (TRACE)
632: fprintf(stderr, "HTUnEscape.. Called with NULL argument.\n");
633: return "";
634: }
2.1 timbl 635: while(*p) {
2.6 timbl 636: if (*p == HEX_ESCAPE) {
2.1 timbl 637: p++;
638: if (*p) *q = from_hex(*p++) * 16;
639: if (*p) *q = FROMASCII(*q + from_hex(*p++));
640: q++;
641: } else {
642: *q++ = *p++;
643: }
644: }
645:
646: *q++ = 0;
647: return str;
648:
649: } /* HTUnEscape */
650:
651:
2.24 luotonen 652: /* HTCleanTelnetString()
653: * Make sure that the given string doesn't contain characters that
654: * could cause security holes, such as newlines in ftp, gopher,
655: * news or telnet URLs; more specifically: allows everything between
2.26 frystyk 656: * ASCII 20-7E, and also A0-FE, inclusive. Also TAB ('\t') allowed!
2.24 luotonen 657: *
658: * On entry,
659: * str the string that is *modified* if necessary. The
660: * string will be truncated at the first illegal
661: * character that is encountered.
662: * On exit,
663: * returns YES, if the string was modified.
664: * NO, otherwise.
665: */
666: PUBLIC BOOL HTCleanTelnetString ARGS1(char *, str)
667: {
668: char * cur = str;
669:
670: if (!str) return NO;
671:
672: while (*cur) {
673: int a = TOASCII(*cur);
2.26 frystyk 674: if (a != 0x9 && (a < 0x20 || (a > 0x7E && a < 0xA0) || a > 0xFE)) {
2.24 luotonen 675: CTRACE(stderr, "Illegal..... character in URL: \"%s\"\n",str);
676: *cur = 0;
677: CTRACE(stderr, "Truncated... \"%s\"\n",str);
678: return YES;
679: }
680: cur++;
681: }
682: return NO;
683: }
684:
Webmaster