Return to AHTURLTools.c CVS log | Up to [Public] / Amaya / amaya |
1.7 cvs 1: /* 2: * 3: * (c) COPYRIGHT MIT and INRIA, 1996. 4: * Please first read the full copyright statement in file COPYRIGHT. 5: * 6: */ 1.9 cvs 7: 1.10 cvs 8: /* 9: * AHTURLTools.c: contains all the functions for testing, manipulating, 10: * and normalizing URLs. 11: * 12: * Authors: J. Kahan, I. Vatton 13: * 14: */ 1.7 cvs 15: 1.8 cvs 16: /* Amaya includes */ 17: #define EXPORT extern 1.3 cvs 18: #include "amaya.h" 19: 1.8 cvs 20: 21: #include "init_f.h" 22: #include "AHTURLTools_f.h" 23: 24: /*---------------------------------------------------------------------- 1.11 cvs 25: ExplodeURL 1.8 cvs 26: ----------------------------------------------------------------------*/ 27: 28: #ifdef __STDC__ 29: void ExplodeURL (char *url, char **proto, char **host, char **dir, char **file) 30: #else 31: void ExplodeURL (url, proto, host, dir, file) 32: char *url; 33: char **proto; 34: char **host; 35: char **dir; 36: char **file; 37: 38: #endif 39: { 1.9 cvs 40: char *curr, *temp; 1.8 cvs 41: 42: if ((url == NULL) || (proto == NULL) || (host == NULL) || 43: (dir == NULL) || (file == NULL)) 44: return; 45: 46: /* initialize every pointer */ 47: *proto = *host = *dir = *file = NULL; 48: 49: /* skip any leading space */ 50: while ((*url == SPACE) || (*url == TAB)) 51: url++; 1.9 cvs 52: curr = url; 53: if (*curr == 0) 1.8 cvs 54: goto finished; 55: 56: /* go to the end of the URL */ 1.9 cvs 57: while ((*curr != 0) && (*curr != SPACE) && (*curr != '\b') && 58: (*curr != '\r') && (*curr != EOL)) 59: curr++; 1.8 cvs 60: 61: /* mark the end of the chain */ 1.9 cvs 62: *curr = EOS; 63: curr--; 64: if (curr <= url) 1.8 cvs 65: goto finished; 66: 67: /* search the next DIR_SEP indicating the beginning of the file name */ 68: do 1.11 cvs 69: curr--; 1.9 cvs 70: while ((curr >= url) && (*curr != DIR_SEP)); 1.11 cvs 71: 1.9 cvs 72: if (curr < url) 1.8 cvs 73: goto finished; 1.9 cvs 74: *file = curr + 1; 1.8 cvs 75: 76: /* mark the end of the dir */ 1.9 cvs 77: *curr = EOS; 78: curr--; 79: if (curr < url) 1.8 cvs 80: goto finished; 81: 82: /* search for the "/" indicating the host name start */ 1.9 cvs 83: while ((curr > url) && ((*curr != DIR_SEP) || (*(curr + 1) != DIR_SEP))) 84: curr--; 1.8 cvs 85: 86: /* if we found it, separate the host name from the directory */ 1.9 cvs 87: if ((*curr == DIR_SEP) && (*(curr + 1) == DIR_SEP)) 1.8 cvs 88: { 1.9 cvs 89: *host = temp = curr + 2; 1.8 cvs 90: while ((*temp != 0) && (*temp != DIR_SEP)) 91: temp++; 92: if (*temp == DIR_SEP) 93: { 94: *temp = EOS; 95: *dir = temp + 1; 96: } 97: } 98: else 1.11 cvs 99: *dir = curr; 100: 1.9 cvs 101: if (curr <= url) 1.8 cvs 102: goto finished; 103: 104: /* mark the end of the proto */ 1.9 cvs 105: *curr = EOS; 106: curr--; 107: if (curr < url) 1.8 cvs 108: goto finished; 109: 1.9 cvs 110: if (*curr == ':') 1.8 cvs 111: { 1.9 cvs 112: *curr = EOS; 113: curr--; 1.8 cvs 114: } 115: else 116: goto finished; 1.11 cvs 117: 1.9 cvs 118: if (curr < url) 1.8 cvs 119: goto finished; 1.9 cvs 120: while ((curr > url) && (isalpha (*curr))) 121: curr--; 122: *proto = curr; 1.8 cvs 123: 124: finished:; 125: 126: #ifdef AMAYA_DEBUG 127: fprintf (stderr, "ExplodeURL(%s)\n\t", url); 128: if (*proto) 129: fprintf (stderr, "proto : %s, ", *proto); 130: if (*host) 131: fprintf (stderr, "host : %s, ", *host); 132: if (*dir) 133: fprintf (stderr, "dir : %s, ", *dir); 134: if (*file) 135: fprintf (stderr, "file : %s ", *file); 136: fprintf (stderr, "\n"); 137: #endif 138: 139: } 1.3 cvs 140: 1.4 cvs 141: /*---------------------------------------------------------------------- 1.9 cvs 142: IsHTMLName 143: returns TRUE if path points to an HTML resource. 1.4 cvs 144: ----------------------------------------------------------------------*/ 1.3 cvs 145: #ifdef __STDC__ 146: boolean IsHTMLName (char *path) 147: #else /* __STDC__ */ 148: boolean IsHTMLName (path) 149: char *path; 150: #endif /* __STDC__ */ 151: { 1.5 cvs 152: char temppath[MAX_LENGTH]; 153: char suffix[MAX_LENGTH]; 154: char nsuffix[MAX_LENGTH]; 155: int i; 156: 157: if (!path) 158: return FALSE; 159: 160: strcpy (temppath, path); 161: ExtractSuffix (temppath, suffix); 162: 163: /* Normalize the suffix */ 164: i = 0; 165: while (suffix[i] != EOS) 166: nsuffix[i] = TOLOWER (suffix[i++]); 167: nsuffix[i] = EOS; 168: if ((strcmp (nsuffix, "html")) && 169: (strcmp (nsuffix, "htm")) && 170: (strcmp (nsuffix, "shtml"))) 171: return FALSE; 172: return TRUE; 1.3 cvs 173: } 174: 1.4 cvs 175: /*---------------------------------------------------------------------- 1.9 cvs 176: IsImageName 177: returns TRUE if path points to an image resource. 1.4 cvs 178: ----------------------------------------------------------------------*/ 1.3 cvs 179: #ifdef __STDC__ 180: boolean IsImageName (char *path) 181: #else /* __STDC__ */ 182: boolean IsImageName (path) 183: char *path; 184: #endif /* __STDC__ */ 185: { 1.5 cvs 186: char temppath[MAX_LENGTH]; 187: char suffix[MAX_LENGTH]; 188: char nsuffix[MAX_LENGTH]; 189: int i; 190: 191: if (!path) 192: return FALSE; 193: 194: strcpy (temppath, path); 195: ExtractSuffix (temppath, suffix); 196: 197: /* Normalize the suffix */ 198: i = 0; 199: while (suffix[i] != EOS) 200: nsuffix[i] = TOLOWER (suffix[i++]); 201: nsuffix[i] = EOS; 202: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) && 203: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) && 204: (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au"))) 205: return FALSE; 206: return TRUE; 1.3 cvs 207: } 208: 1.4 cvs 209: /*---------------------------------------------------------------------- 1.9 cvs 210: IsTextName 1.4 cvs 211: ----------------------------------------------------------------------*/ 1.3 cvs 212: #ifdef __STDC__ 213: boolean IsTextName (char *path) 214: #else /* __STDC__ */ 215: boolean IsTextName (path) 216: char *path; 217: 218: #endif /* __STDC__ */ 219: { 1.5 cvs 220: char temppath[MAX_LENGTH]; 221: char suffix[MAX_LENGTH]; 222: char nsuffix[MAX_LENGTH]; 223: int i; 224: 225: if (!path) 226: return FALSE; 227: 228: strcpy (temppath, path); 229: ExtractSuffix (temppath, suffix); 230: 231: /* Normalize the suffix */ 232: i = 0; 233: while (suffix[i] != EOS) 234: { 235: nsuffix[i] = TOLOWER (suffix[i]); 236: i++; 237: } 238: nsuffix[i] = EOS; 239: 240: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) && 241: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) && 242: (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) && 243: (strcmp (nsuffix, "Z")) && (strcmp (nsuffix, "gz")) && 244: (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "xpg")) && 245: (strcmp (nsuffix, "xpd")) && (strcmp (nsuffix, "ps")) && 246: (strcmp (nsuffix, "au"))) 247: return TRUE; 248: return FALSE; 1.3 cvs 249: } 250: 1.4 cvs 251: /*---------------------------------------------------------------------- 1.9 cvs 252: IsHTTPPath 253: returns TRUE if path is in fact an http URL. 1.4 cvs 254: ----------------------------------------------------------------------*/ 1.3 cvs 255: #ifdef __STDC__ 256: boolean IsHTTPPath (char *path) 257: #else /* __STDC__ */ 258: boolean IsHTTPPath (path) 259: char *path; 260: #endif /* __STDC__ */ 261: { 1.5 cvs 262: if (!path) 263: return FALSE; 1.3 cvs 264: 1.5 cvs 265: if (strncmp (path, "http:", 5) != 0) 266: return FALSE; 267: return TRUE; 1.3 cvs 268: } 269: 1.4 cvs 270: /*---------------------------------------------------------------------- 1.9 cvs 271: IsWithParameters 272: returns TRUE if url has a concatenated query string. 1.4 cvs 273: ----------------------------------------------------------------------*/ 1.3 cvs 274: #ifdef __STDC__ 1.9 cvs 275: boolean IsWithParameters (char *url) 1.3 cvs 276: #else /* __STDC__ */ 1.9 cvs 277: boolean IsWithParameters (url) 278: char *url; 1.3 cvs 279: #endif /* __STDC__ */ 280: { 1.5 cvs 281: int i; 1.3 cvs 282: 1.9 cvs 283: if ((!url) || (url[0] == EOS)) 1.5 cvs 284: return FALSE; 1.3 cvs 285: 1.9 cvs 286: i = strlen (url) - 1; 287: while (i > 0 && url[i--] != '?') 1.5 cvs 288: if (i < 0) 289: return FALSE; 1.3 cvs 290: 1.5 cvs 291: /* There is a parameter */ 292: return TRUE; 1.3 cvs 293: } 294: 1.4 cvs 295: /*---------------------------------------------------------------------- 1.9 cvs 296: IsW3Path 297: returns TRUE if path is in fact a URL. 1.4 cvs 298: ----------------------------------------------------------------------*/ 1.3 cvs 299: #ifdef __STDC__ 300: boolean IsW3Path (char *path) 301: #else /* __STDC__ */ 302: boolean IsW3Path (path) 303: char *path; 304: #endif /* __STDC__ */ 305: { 1.5 cvs 306: if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) && 307: (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) && 308: (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) && 309: (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7))) 310: return FALSE; 311: return TRUE; 1.3 cvs 312: } 313: 1.4 cvs 314: /*---------------------------------------------------------------------- 1.9 cvs 315: IsValidProtocol 316: returns true if the url protocol is supported by Amaya. 1.4 cvs 317: ----------------------------------------------------------------------*/ 1.3 cvs 318: #ifdef __STDC__ 1.9 cvs 319: boolean IsValidProtocol (char *url) 1.3 cvs 320: #else /* __STDC__ */ 1.9 cvs 321: boolean IsValidProtocol (url) 322: char *url; 1.3 cvs 323: #endif /* __STDC__ */ 324: { 1.9 cvs 325: if (!strncmp (url, "http:", 5) 1.3 cvs 326: /***|| !strncmp (path, "ftp:", 4) 1.5 cvs 327: || !strncmp (path, "news:", 5)***/ ) 1.8 cvs 328: return (TRUE); 1.5 cvs 329: else 1.8 cvs 330: return (FALSE); 1.3 cvs 331: } 332: 1.4 cvs 333: /*---------------------------------------------------------------------- 1.9 cvs 334: NormalizeURL 335: normalizes orgName according to a base associated with doc, and 336: following the standard URL format rules. 337: The function returns the new complete and normalized URL 1.12 ! cvs 338: or file name path (newName) and the name of the document (docName). 1.9 cvs 339: N.B. If the function can't find out what's the docName, it assigns 340: the name "noname.html". 1.4 cvs 341: ----------------------------------------------------------------------*/ 1.3 cvs 342: #ifdef __STDC__ 343: void NormalizeURL (char *orgName, Document doc, char *newName, char *docName) 344: #else /* __STDC__ */ 345: void NormalizeURL (orgName, doc, newName, docName) 346: char *orgName; 347: Document doc; 348: char *newName; 349: char *docName; 350: #endif /* __STDC__ */ 351: { 1.5 cvs 352: char basename[MAX_LENGTH]; 353: char tempname[MAX_LENGTH]; 354: int i; 355: char *ptr; 356: char *basename_ptr; 357: int basename_flag; 358: Element el; 359: ElementType elType; 360: AttributeType attrType; 361: Attribute attrHREF; 362: int length; 363: 364: /* Fix up orgName, by erasing leading and trailing white space */ 365: if (!newName || !docName) 366: return; 367: ptr = orgName; 368: while (*ptr == ' ' && *ptr++ != EOS) ; 369: strcpy (tempname, ptr); 370: ptr = strchr (tempname, ' '); 371: if (ptr) 372: *ptr = EOS; 373: 374: /* 375: ** the following block to take into account the BASE element. 376: ** This is not very optimized, as this procedure is repeated for 377: ** each element which is retrieved. A better way would be to 378: ** move this higher up in the function call hierarchy. 379: */ 1.12 ! cvs 380: if (!IsW3Path (tempname) && doc) 1.5 cvs 381: { 382: length = MAX_LENGTH; 383: /* get the root element */ 384: el = TtaGetMainRoot (doc); 385: 386: /* search the BASE element */ 387: elType.ElSSchema = TtaGetDocumentSSchema (doc); 388: elType.ElTypeNum = HTML_EL_BASE; 389: el = TtaSearchTypedElement (elType, SearchInTree, el); 390: if (el) 391: { 392: /* 393: ** The document has a BASE element 394: ** Get the HREF attribute of the BASE Element 1.3 cvs 395: */ 1.5 cvs 396: attrType.AttrSSchema = elType.ElSSchema; 397: attrType.AttrTypeNum = HTML_ATTR_HREF_; 398: attrHREF = TtaGetAttribute (el, attrType); 399: if (attrHREF) 400: { 401: /* 402: ** Use the base path of the document 403: ** To do: verify length of the buffer 404: ** length > TtaGetTextAttributeLength (attrHREF) + strlen (orgName) 405: */ 406: TtaGiveTextAttributeValue (attrHREF, basename, &length); 407: 408: /* 409: ** base and orgName have to be separated by a DIR_SEP 410: */ 411: if (basename[strlen (basename) - 1] != DIR_SEP && tempname[0] != DIR_SEP) 412: strcat (basename, DIR_STR); 413: } 1.11 cvs 414: else 415: basename[0] = EOS; 1.5 cvs 416: } 417: else 418: basename[0] = EOS; 419: } 420: else 421: basename[0] = EOS; 422: 423: if (basename[0] == EOS) 424: { 425: /* 426: ** There is no BASE element in that document. 427: ** A temporary fix as TtaExtractName does not tolerate a name 428: ** ending in /. Here, we reinsert the slash, in order to 429: ** parse the name in the following two lines. A bit 430: ** redundant and has to be reviewed. 431: */ 432: if (DocumentURLs[(int) doc]) 433: { 434: basename_ptr = HTParse (DocumentURLs[(int) doc], "", PARSE_ALL); 435: basename_flag = TRUE; 436: } 437: else 438: { 439: basename_ptr = ""; 440: basename_flag = FALSE; 441: } 442: } 443: else 444: { 445: basename_ptr = HTParse (basename, "", PARSE_ALL); 446: basename_flag = TRUE; 447: } /* if-else tempname */ 448: 449: ptr = HTParse (tempname, basename_ptr, PARSE_ALL); 450: if (basename_flag) 451: HT_FREE (basename_ptr); 452: if (ptr) 453: { 454: ptr = HTSimplify (&ptr); 455: strcpy (newName, ptr); 456: HT_FREE (ptr); 457: } 458: else 459: newName[0] = EOS; 460: 461: i = strlen (newName) - 1; 462: if (i > 0) 463: { 464: /* 465: ** A temporary fix for an interfacing problem: 466: ** TtaExtractName does not tolerate url's finished on DIR_SEP 467: */ 468: ptr = strrchr (newName, DIR_SEP); 469: if (ptr) 470: ptr++; 471: if (ptr && *ptr != EOS) 472: strcpy (docName, ptr); 473: else 474: /* 475: ** The docname was not comprised inside the URL, so let's 476: ** assign a "noname.html" name :) 477: */ 478: strcpy (docName, "noname.html"); 479: 480: /* 481: ** A temporary fix for an interfacing problem: 482: ** TtaExtractName does not tolerate url's finished on DIR_SEP 483: */ 484: if (newName[i] == DIR_SEP) 485: newName[i] = EOS; 486: } 1.3 cvs 487: } 488: 1.4 cvs 489: /*---------------------------------------------------------------------- 1.9 cvs 490: IsSameHost 1.4 cvs 491: ----------------------------------------------------------------------*/ 1.3 cvs 492: #ifdef __STDC__ 493: boolean IsSameHost (char *url1, char *url2) 494: #else /* __STDC__ */ 495: boolean IsSameHost (url1, url2) 496: char *path; 497: #endif /* __STDC__ */ 498: { 1.5 cvs 499: char *basename_ptr1, *basename_ptr2; 500: boolean result; 1.3 cvs 501: 1.5 cvs 502: basename_ptr1 = HTParse (url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION); 503: basename_ptr2 = HTParse (url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION); 1.3 cvs 504: 1.5 cvs 505: if (strcmp (basename_ptr1, basename_ptr2)) 1.8 cvs 506: result = FALSE; 1.5 cvs 507: else 1.8 cvs 508: result = TRUE; 1.3 cvs 509: 1.5 cvs 510: HT_FREE (basename_ptr1); 511: HT_FREE (basename_ptr2); 1.3 cvs 512: 1.5 cvs 513: return (result); 1.3 cvs 514: } 515: 516: 1.4 cvs 517: /*---------------------------------------------------------------------- 1.9 cvs 518: AHTMakeRelativeURL 519: converts url into a relative url to base_url. 520: If succesful, returns the new URL, otherwise, it returns NULL. 521: The caller has to free the new URL. 1.4 cvs 522: ----------------------------------------------------------------------*/ 1.3 cvs 523: #ifdef __STDC__ 1.5 cvs 524: char *AHTMakeRelativeName (char *url, char *base_url) 1.3 cvs 525: #else /* __STDC__ */ 1.5 cvs 526: char *AHTMakeRelativeName (url, base_url) 527: char url; 528: char base_url; 529: 1.3 cvs 530: #endif /* __STDC__ */ 531: { 1.5 cvs 532: char *base_ptr, *url_ptr; 533: char *result; 534: 535: /* verify if we are in the same host */ 1.3 cvs 536: 1.5 cvs 537: base_ptr = HTParse (base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION); 538: url_ptr = HTParse (url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION); 1.3 cvs 539: 1.5 cvs 540: if (!strcmp (base_ptr, url_ptr)) 541: { 542: HT_FREE (base_ptr); 543: HT_FREE (url_ptr); 1.3 cvs 544: 1.5 cvs 545: /* Normalize the URLs */ 1.3 cvs 546: 1.5 cvs 547: base_ptr = HTParse (base_url, "", PARSE_ALL); 548: url_ptr = HTParse (url, "", PARSE_ALL); 1.3 cvs 549: 1.5 cvs 550: /* Use libwww to make relative name */ 1.3 cvs 551: 1.5 cvs 552: result = HTRelative (url_ptr, base_ptr); 553: HT_FREE (base_ptr); 554: HT_FREE (url_ptr); 555: } 556: else 557: result = (char *) NULL; 1.3 cvs 558: 1.5 cvs 559: return (result); 1.3 cvs 560: } 1.9 cvs 561: 562: 563: