Annotation of libwww/Library/src/HTParse.c, revision 1.1
1.1 ! timbl 1: /* Parse HyperText Document Address HTParse.c
! 2: ** ================================
! 3: */
! 4:
! 5: #include "HTUtils.h"
! 6: #include "HTParse.h"
! 7: #include "tcp.h"
! 8:
! 9: struct struct_parts {
! 10: char * access;
! 11: char * host;
! 12: char * absolute;
! 13: char * relative;
! 14: /* char * search; no - treated as part of path */
! 15: char * anchor;
! 16: };
! 17:
! 18:
! 19: /* Strip white space off a string
! 20: ** ------------------------------
! 21: **
! 22: ** On exit,
! 23: ** Return value points to first non-white character, or to 0 if none.
! 24: ** All trailing white space is OVERWRITTEN with zero.
! 25: */
! 26:
! 27: #ifdef __STDC__
! 28: char * HTStrip(char * s)
! 29: #else
! 30: char * HTStrip(s)
! 31: char *s;
! 32: #endif
! 33: {
! 34: #define SPACE(c) ((c==' ')||(c=='\t')||(c=='\n'))
! 35: char * p=s;
! 36: for(p=s;*p;p++); /* Find end of string */
! 37: for(p--;p>=s;p--) {
! 38: if(SPACE(*p)) *p=0; /* Zap trailing blanks */
! 39: else break;
! 40: }
! 41: while(SPACE(*s))s++; /* Strip leading blanks */
! 42: return s;
! 43: }
! 44:
! 45:
! 46: /* Scan a filename for its consituents
! 47: ** -----------------------------------
! 48: **
! 49: ** On entry,
! 50: ** name points to a document name which may be incomplete.
! 51: ** On exit,
! 52: ** absolute or relative may be nonzero (but not both).
! 53: ** host, anchor and access may be nonzero if they were specified.
! 54: ** Any which are nonzero point to zero terminated strings.
! 55: */
! 56: #ifdef __STDC__
! 57: PRIVATE void scan(char * name, struct struct_parts *parts)
! 58: #else
! 59: PRIVATE void scan(name, parts)
! 60: char * name;
! 61: struct struct_parts *parts;
! 62: #endif
! 63: {
! 64: char * after_access;
! 65: char * p;
! 66: int length = strlen(name);
! 67:
! 68: parts->access = 0;
! 69: parts->host = 0;
! 70: parts->absolute = 0;
! 71: parts->relative = 0;
! 72: parts->anchor = 0;
! 73:
! 74: after_access = name;
! 75: for(p=name; *p; p++) {
! 76: if (*p==':') {
! 77: *p = 0;
! 78: parts->access = name; /* Access name has been specified */
! 79: after_access = p+1;
! 80: }
! 81: if (*p=='/') break;
! 82: if (*p=='#') break;
! 83: }
! 84:
! 85: for(p=name+length-1; p>=name; p--) {
! 86: if (*p =='#') {
! 87: parts->anchor=p+1;
! 88: *p=0; /* terminate the rest */
! 89: }
! 90: }
! 91: p = after_access;
! 92: if (*p=='/'){
! 93: if (p[1]=='/') {
! 94: parts->host = p+2; /* host has been specified */
! 95: *p=0; /* Terminate access */
! 96: p=strchr(parts->host,'/'); /* look for end of host name if any */
! 97: if(p) {
! 98: *p=0; /* Terminate host */
! 99: parts->absolute = p+1; /* Root has been found */
! 100: }
! 101: } else {
! 102: parts->absolute = p+1; /* Root found but no host */
! 103: }
! 104: } else {
! 105: parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
! 106: }
! 107:
! 108: /* Access specified but no host: the anchor was not really one
! 109: e.g. news:j462#36487@foo.bar -- JFG 10/7/92, from bug report */
! 110: if (parts->access && ! parts->host && parts->anchor) {
! 111: *(parts->anchor - 1) = '#'; /* Restore the '#' in the address */
! 112: parts->anchor = 0;
! 113: }
! 114:
! 115: #ifdef NOT_DEFINED /* search is just treated as part of path */
! 116: {
! 117: char *p = relative ? relative : absolute;
! 118: if (p) {
! 119: char * q = strchr(p, '?'); /* Any search string? */
! 120: if (q) {
! 121: *q = 0; /* If so, chop that off. */
! 122: parts->search = q+1;
! 123: }
! 124: }
! 125: }
! 126: #endif
! 127: } /*scan */
! 128:
! 129:
! 130: /* Parse a Name relative to another name
! 131: ** -------------------------------------
! 132: **
! 133: ** This returns those parts of a name which are given (and requested)
! 134: ** substituting bits from the related name where necessary.
! 135: **
! 136: ** On entry,
! 137: ** aName A filename given
! 138: ** relatedName A name relative to which aName is to be parsed
! 139: ** wanted A mask for the bits which are wanted.
! 140: **
! 141: ** On exit,
! 142: ** returns A pointer to a malloc'd string which MUST BE FREED
! 143: */
! 144: #ifdef __STDC__
! 145: char * HTParse(const char * aName, const char * relatedName, int wanted)
! 146: #else
! 147: char * HTParse(aName, relatedName, wanted)
! 148: char * aName;
! 149: char * relatedName;
! 150: int wanted;
! 151: #endif
! 152:
! 153: {
! 154: char * result = 0;
! 155: char * return_value = 0;
! 156: int len;
! 157: char * name = 0;
! 158: char * rel = 0;
! 159: char * p;
! 160: struct struct_parts given, related;
! 161:
! 162: /* Make working copies of input strings to cut up:
! 163: */
! 164: len = strlen(aName)+strlen(relatedName)+10;
! 165: result=(char *)malloc(len); /* Lots of space: more than enough */
! 166: if (result == NULL) outofmem(__FILE__, "HTParse");
! 167:
! 168: StrAllocCopy(name, aName);
! 169: StrAllocCopy(rel, relatedName);
! 170:
! 171: scan(name, &given);
! 172: scan(rel, &related);
! 173: result[0]=0; /* Clear string */
! 174: if (wanted & PARSE_ACCESS)
! 175: if (given.access|| related.access) {
! 176: strcat(result, given.access ? given.access : related.access);
! 177: if(wanted & PARSE_PUNCTUATION) strcat(result, ":");
! 178: }
! 179:
! 180: if (given.access && related.access) /* If different, inherit nothing. */
! 181: if (strcmp(given.access, related.access)!=0) {
! 182: related.host=0;
! 183: related.absolute=0;
! 184: related.relative=0;
! 185: related.anchor=0;
! 186: }
! 187:
! 188: if (wanted & PARSE_HOST)
! 189: if(given.host || related.host) {
! 190: if(wanted & PARSE_PUNCTUATION) strcat(result, "//");
! 191: strcat(result, given.host ? given.host : related.host);
! 192: }
! 193:
! 194: if (given.host && related.host) /* If different hosts, inherit no path. */
! 195: if (strcmp(given.host, related.host)!=0) {
! 196: related.absolute=0;
! 197: related.relative=0;
! 198: related.anchor=0;
! 199: }
! 200:
! 201: if (wanted & PARSE_PATH) {
! 202: if(given.absolute) { /* All is given */
! 203: if(wanted & PARSE_PUNCTUATION) strcat(result, "/");
! 204: strcat(result, given.absolute);
! 205: } else if(related.absolute) { /* Adopt path not name */
! 206: strcat(result, "/");
! 207: strcat(result, related.absolute);
! 208: if (given.relative) {
! 209: p = strchr(result, '?'); /* Search part? */
! 210: if (!p) p=result+strlen(result)-1;
! 211: for (; *p!='/'; p--); /* last / */
! 212: p[1]=0; /* Remove filename */
! 213: strcat(result, given.relative); /* Add given one */
! 214: HTSimplify (result);
! 215: }
! 216: } else if(given.relative) {
! 217: strcat(result, given.relative); /* what we've got */
! 218: } else if(related.relative) {
! 219: strcat(result, related.relative);
! 220: } else { /* No inheritance */
! 221: strcat(result, "/");
! 222: }
! 223: }
! 224:
! 225: if (wanted & PARSE_ANCHOR)
! 226: if(given.anchor || related.anchor) {
! 227: if(wanted & PARSE_PUNCTUATION) strcat(result, "#");
! 228: strcat(result, given.anchor ? given.anchor : related.anchor);
! 229: }
! 230: free(rel);
! 231: free(name);
! 232:
! 233: StrAllocCopy(return_value, result);
! 234: free(result);
! 235: return return_value; /* exactly the right length */
! 236: }
! 237:
! 238: /* Simplify a filename
! 239: // -------------------
! 240: //
! 241: // A unix-style file is allowed to contain the seqeunce xxx/../ which may be
! 242: // replaced by "" , and the seqeunce "/./" which may be replaced by "/".
! 243: // Simplification helps us recognize duplicate filenames.
! 244: //
! 245: // Thus, /etc/junk/../fred becomes /etc/fred
! 246: // /etc/junk/./fred becomes /etc/junk/fred
! 247: */
! 248: #ifdef __STDC__
! 249: void HTSimplify(char * filename)
! 250: #else
! 251: void HTSimplify(filename)
! 252: char * filename;
! 253: #endif
! 254:
! 255: {
! 256: char * p;
! 257: char * q;
! 258: for(p=filename+2; *p; p++) {
! 259: if (*p=='/') {
! 260: if ((p[1]=='.') && (p[2]=='.') && (p[3]=='/' || !p[3] )) {
! 261: for (q=p-1; (q>filename) && (*q!='/'); q--); /* prev slash */
! 262: if (*q=='/') {
! 263: strcpy(q, p+3); /* Remove /xxx/.. */
! 264: if (!*filename) strcpy(filename, "/");
! 265: p = q-1; /* Start again with prev slash */
! 266: } else { /* xxx/.. error? */
! 267: strcpy(filename, p[3] ? p+4 : p+3); /* rm xxx/../ */
! 268: p = filename; /* Start again */
! 269: }
! 270: } else if ((p[1]=='.') && (p[2]=='/' || !p[2])) {
! 271: strcpy(p, p+2); /* Remove a slash and a dot */
! 272: }
! 273: }
! 274: }
! 275: }
! 276:
! 277:
! 278: /* Make Relative Name
! 279: ** ------------------
! 280: **
! 281: ** This function creates and returns a string which gives an expression of
! 282: ** one address as related to another. Where there is no relation, an absolute
! 283: ** address is retured.
! 284: **
! 285: ** On entry,
! 286: ** Both names must be absolute, fully qualified names of nodes
! 287: ** (no anchor bits)
! 288: **
! 289: ** On exit,
! 290: ** The return result points to a newly allocated name which, if
! 291: ** parsed by HTParse relative to relatedName, will yield aName.
! 292: ** The caller is responsible for freeing the resulting name later.
! 293: **
! 294: */
! 295: #ifdef __STDC__
! 296: char * HTRelative(const char * aName, const char *relatedName)
! 297: #else
! 298: char * HTRelative(aName, relatedName)
! 299: char * aName;
! 300: char * relatedName;
! 301: #endif
! 302: {
! 303: char * result = 0;
! 304: CONST char *p = aName;
! 305: CONST char *q = relatedName;
! 306: CONST char * after_access = 0;
! 307: CONST char * path = 0;
! 308: CONST char * last_slash = 0;
! 309: int slashes = 0;
! 310:
! 311: for(;*p; p++, q++) { /* Find extent of match */
! 312: if (*p!=*q) break;
! 313: if (*p==':') after_access = p+1;
! 314: if (*p=='/') {
! 315: last_slash = p;
! 316: slashes++;
! 317: if (slashes==3) path=p;
! 318: }
! 319: }
! 320:
! 321: /* q, p point to the first non-matching character or zero */
! 322:
! 323: if (!after_access) { /* Different access */
! 324: StrAllocCopy(result, aName);
! 325: } else if (slashes<3){ /* Different nodes */
! 326: StrAllocCopy(result, after_access);
! 327: } else if (slashes==3){ /* Same node, different path */
! 328: StrAllocCopy(result, path);
! 329: } else { /* Some path in common */
! 330: int levels= 0;
! 331: for(; *q && (*q!='#'); q++) if (*q=='/') levels++;
! 332: result = (char *)malloc(3*levels + strlen(last_slash) + 1);
! 333: if (result == NULL) outofmem(__FILE__, "HTRelative");
! 334: result[0]=0;
! 335: for(;levels; levels--)strcat(result, "../");
! 336: strcat(result, last_slash+1);
! 337: }
! 338: if (TRACE) fprintf(stderr, "HT: `%s' expressed relative to\n `%s' is\n `%s'.",
! 339: aName, relatedName, result);
! 340: return result;
! 341: }
Webmaster