Return to AHTURLTools.c CVS log | Up to [Public] / Amaya / amaya |
1.7 cvs 1: /* 2: * 3: * (c) COPYRIGHT MIT and INRIA, 1996. 4: * Please first read the full copyright statement in file COPYRIGHT. 5: * 6: */ 1.9 cvs 7: 1.10 cvs 8: /* 9: * AHTURLTools.c: contains all the functions for testing, manipulating, 10: * and normalizing URLs. 11: * 12: * Authors: J. Kahan, I. Vatton 13: * 14: */ 1.7 cvs 15: 1.8 cvs 16: /* Amaya includes */ 17: #define EXPORT extern 1.3 cvs 18: #include "amaya.h" 19: 1.8 cvs 20: 21: #include "init_f.h" 22: #include "AHTURLTools_f.h" 23: 24: /*---------------------------------------------------------------------- 1.11 cvs 25: ExplodeURL 1.8 cvs 26: ----------------------------------------------------------------------*/ 27: 28: #ifdef __STDC__ 29: void ExplodeURL (char *url, char **proto, char **host, char **dir, char **file) 30: #else 31: void ExplodeURL (url, proto, host, dir, file) 32: char *url; 33: char **proto; 34: char **host; 35: char **dir; 36: char **file; 37: 38: #endif 39: { 1.9 cvs 40: char *curr, *temp; 1.8 cvs 41: 42: if ((url == NULL) || (proto == NULL) || (host == NULL) || 43: (dir == NULL) || (file == NULL)) 44: return; 45: 46: /* initialize every pointer */ 47: *proto = *host = *dir = *file = NULL; 48: 49: /* skip any leading space */ 50: while ((*url == SPACE) || (*url == TAB)) 51: url++; 1.9 cvs 52: curr = url; 53: if (*curr == 0) 1.8 cvs 54: goto finished; 55: 56: /* go to the end of the URL */ 1.9 cvs 57: while ((*curr != 0) && (*curr != SPACE) && (*curr != '\b') && 58: (*curr != '\r') && (*curr != EOL)) 59: curr++; 1.8 cvs 60: 61: /* mark the end of the chain */ 1.9 cvs 62: *curr = EOS; 63: curr--; 64: if (curr <= url) 1.8 cvs 65: goto finished; 66: 67: /* search the next DIR_SEP indicating the beginning of the file name */ 68: do 1.11 cvs 69: curr--; 1.9 cvs 70: while ((curr >= url) && (*curr != DIR_SEP)); 1.11 cvs 71: 1.9 cvs 72: if (curr < url) 1.8 cvs 73: goto finished; 1.9 cvs 74: *file = curr + 1; 1.8 cvs 75: 76: /* mark the end of the dir */ 1.9 cvs 77: *curr = EOS; 78: curr--; 79: if (curr < url) 1.8 cvs 80: goto finished; 81: 82: /* search for the "/" indicating the host name start */ 1.9 cvs 83: while ((curr > url) && ((*curr != DIR_SEP) || (*(curr + 1) != DIR_SEP))) 84: curr--; 1.8 cvs 85: 86: /* if we found it, separate the host name from the directory */ 1.9 cvs 87: if ((*curr == DIR_SEP) && (*(curr + 1) == DIR_SEP)) 1.8 cvs 88: { 1.9 cvs 89: *host = temp = curr + 2; 1.8 cvs 90: while ((*temp != 0) && (*temp != DIR_SEP)) 91: temp++; 92: if (*temp == DIR_SEP) 93: { 94: *temp = EOS; 95: *dir = temp + 1; 96: } 97: } 98: else 1.11 cvs 99: *dir = curr; 100: 1.9 cvs 101: if (curr <= url) 1.8 cvs 102: goto finished; 103: 104: /* mark the end of the proto */ 1.9 cvs 105: *curr = EOS; 106: curr--; 107: if (curr < url) 1.8 cvs 108: goto finished; 109: 1.9 cvs 110: if (*curr == ':') 1.8 cvs 111: { 1.9 cvs 112: *curr = EOS; 113: curr--; 1.8 cvs 114: } 115: else 116: goto finished; 1.11 cvs 117: 1.9 cvs 118: if (curr < url) 1.8 cvs 119: goto finished; 1.9 cvs 120: while ((curr > url) && (isalpha (*curr))) 121: curr--; 122: *proto = curr; 1.8 cvs 123: 124: finished:; 125: 126: #ifdef AMAYA_DEBUG 127: fprintf (stderr, "ExplodeURL(%s)\n\t", url); 128: if (*proto) 129: fprintf (stderr, "proto : %s, ", *proto); 130: if (*host) 131: fprintf (stderr, "host : %s, ", *host); 132: if (*dir) 133: fprintf (stderr, "dir : %s, ", *dir); 134: if (*file) 135: fprintf (stderr, "file : %s ", *file); 136: fprintf (stderr, "\n"); 137: #endif 138: 139: } 1.3 cvs 140: 1.4 cvs 141: /*---------------------------------------------------------------------- 1.9 cvs 142: IsHTMLName 143: returns TRUE if path points to an HTML resource. 1.4 cvs 144: ----------------------------------------------------------------------*/ 1.3 cvs 145: #ifdef __STDC__ 146: boolean IsHTMLName (char *path) 147: #else /* __STDC__ */ 148: boolean IsHTMLName (path) 149: char *path; 150: #endif /* __STDC__ */ 151: { 1.5 cvs 152: char temppath[MAX_LENGTH]; 153: char suffix[MAX_LENGTH]; 154: char nsuffix[MAX_LENGTH]; 155: int i; 156: 157: if (!path) 1.13 cvs 158: return (FALSE); 1.5 cvs 159: 160: strcpy (temppath, path); 161: ExtractSuffix (temppath, suffix); 162: 163: /* Normalize the suffix */ 164: i = 0; 165: while (suffix[i] != EOS) 1.13 cvs 166: { 167: nsuffix[i] = TOLOWER (suffix[i]); 168: i++; 169: } 1.5 cvs 170: nsuffix[i] = EOS; 171: if ((strcmp (nsuffix, "html")) && 172: (strcmp (nsuffix, "htm")) && 173: (strcmp (nsuffix, "shtml"))) 1.13 cvs 174: return (FALSE); 175: else if ((!strcmp (nsuffix, "gz")) || 176: (!strcmp (nsuffix, "Z"))) 177: { 178: /* take in account compressed files */ 179: ExtractSuffix (temppath, suffix); 180: /* Normalize the suffix */ 181: i = 0; 182: while (suffix[i] != EOS) 183: { 184: nsuffix[i] = TOLOWER (suffix[i]); 185: i++; 186: } 187: nsuffix[i] = EOS; 188: if ((strcmp (nsuffix, "html")) && 189: (strcmp (nsuffix, "htm")) && 190: (strcmp (nsuffix, "shtml"))) 191: return (FALSE); 192: else 193: return (TRUE); 194: } 195: else 196: return (TRUE); 1.3 cvs 197: } 198: 1.4 cvs 199: /*---------------------------------------------------------------------- 1.9 cvs 200: IsImageName 201: returns TRUE if path points to an image resource. 1.4 cvs 202: ----------------------------------------------------------------------*/ 1.3 cvs 203: #ifdef __STDC__ 204: boolean IsImageName (char *path) 205: #else /* __STDC__ */ 206: boolean IsImageName (path) 207: char *path; 208: #endif /* __STDC__ */ 209: { 1.5 cvs 210: char temppath[MAX_LENGTH]; 211: char suffix[MAX_LENGTH]; 212: char nsuffix[MAX_LENGTH]; 213: int i; 214: 215: if (!path) 1.13 cvs 216: return (FALSE); 1.5 cvs 217: 218: strcpy (temppath, path); 219: ExtractSuffix (temppath, suffix); 220: 221: /* Normalize the suffix */ 222: i = 0; 223: while (suffix[i] != EOS) 1.13 cvs 224: { 225: nsuffix[i] = TOLOWER (suffix[i]); 226: i++; 227: } 1.5 cvs 228: nsuffix[i] = EOS; 229: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) && 230: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) && 231: (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au"))) 1.13 cvs 232: return (FALSE); 233: return (TRUE); 1.3 cvs 234: } 235: 1.4 cvs 236: /*---------------------------------------------------------------------- 1.9 cvs 237: IsTextName 1.4 cvs 238: ----------------------------------------------------------------------*/ 1.3 cvs 239: #ifdef __STDC__ 240: boolean IsTextName (char *path) 241: #else /* __STDC__ */ 242: boolean IsTextName (path) 243: char *path; 244: 245: #endif /* __STDC__ */ 246: { 1.5 cvs 247: char temppath[MAX_LENGTH]; 248: char suffix[MAX_LENGTH]; 249: char nsuffix[MAX_LENGTH]; 250: int i; 251: 252: if (!path) 1.13 cvs 253: return (FALSE); 1.5 cvs 254: 255: strcpy (temppath, path); 256: ExtractSuffix (temppath, suffix); 257: 258: /* Normalize the suffix */ 259: i = 0; 260: while (suffix[i] != EOS) 261: { 262: nsuffix[i] = TOLOWER (suffix[i]); 263: i++; 264: } 265: nsuffix[i] = EOS; 266: 267: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) && 268: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) && 269: (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) && 270: (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "xpg")) && 271: (strcmp (nsuffix, "xpd")) && (strcmp (nsuffix, "ps")) && 272: (strcmp (nsuffix, "au"))) 1.13 cvs 273: return (TRUE); 274: else if ((!strcmp (nsuffix, "gz")) || (!strcmp (nsuffix, "Z"))) 275: { 276: /* take in account compressed files */ 277: ExtractSuffix (temppath, suffix); 278: /* Normalize the suffix */ 279: i = 0; 280: while (suffix[i] != EOS) 281: { 282: nsuffix[i] = TOLOWER (suffix[i]); 283: i++; 284: } 285: nsuffix[i] = EOS; 286: if ((!strcmp (nsuffix, "html")) || 287: (!strcmp (nsuffix, "htm")) || 288: (!strcmp (nsuffix, "shtml"))) 289: return (TRUE); 290: else 291: return (FALSE); 292: } 293: else 294: return (FALSE); 1.3 cvs 295: } 296: 1.4 cvs 297: /*---------------------------------------------------------------------- 1.9 cvs 298: IsHTTPPath 299: returns TRUE if path is in fact an http URL. 1.4 cvs 300: ----------------------------------------------------------------------*/ 1.3 cvs 301: #ifdef __STDC__ 302: boolean IsHTTPPath (char *path) 303: #else /* __STDC__ */ 304: boolean IsHTTPPath (path) 305: char *path; 306: #endif /* __STDC__ */ 307: { 1.5 cvs 308: if (!path) 309: return FALSE; 1.3 cvs 310: 1.5 cvs 311: if (strncmp (path, "http:", 5) != 0) 312: return FALSE; 313: return TRUE; 1.3 cvs 314: } 315: 1.4 cvs 316: /*---------------------------------------------------------------------- 1.9 cvs 317: IsWithParameters 318: returns TRUE if url has a concatenated query string. 1.4 cvs 319: ----------------------------------------------------------------------*/ 1.3 cvs 320: #ifdef __STDC__ 1.9 cvs 321: boolean IsWithParameters (char *url) 1.3 cvs 322: #else /* __STDC__ */ 1.9 cvs 323: boolean IsWithParameters (url) 324: char *url; 1.3 cvs 325: #endif /* __STDC__ */ 326: { 1.5 cvs 327: int i; 1.3 cvs 328: 1.9 cvs 329: if ((!url) || (url[0] == EOS)) 1.5 cvs 330: return FALSE; 1.3 cvs 331: 1.9 cvs 332: i = strlen (url) - 1; 333: while (i > 0 && url[i--] != '?') 1.5 cvs 334: if (i < 0) 335: return FALSE; 1.3 cvs 336: 1.5 cvs 337: /* There is a parameter */ 338: return TRUE; 1.3 cvs 339: } 340: 1.4 cvs 341: /*---------------------------------------------------------------------- 1.9 cvs 342: IsW3Path 343: returns TRUE if path is in fact a URL. 1.4 cvs 344: ----------------------------------------------------------------------*/ 1.3 cvs 345: #ifdef __STDC__ 346: boolean IsW3Path (char *path) 347: #else /* __STDC__ */ 348: boolean IsW3Path (path) 349: char *path; 350: #endif /* __STDC__ */ 351: { 1.5 cvs 352: if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) && 353: (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) && 354: (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) && 355: (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7))) 356: return FALSE; 357: return TRUE; 1.3 cvs 358: } 359: 1.4 cvs 360: /*---------------------------------------------------------------------- 1.9 cvs 361: IsValidProtocol 362: returns true if the url protocol is supported by Amaya. 1.4 cvs 363: ----------------------------------------------------------------------*/ 1.3 cvs 364: #ifdef __STDC__ 1.9 cvs 365: boolean IsValidProtocol (char *url) 1.3 cvs 366: #else /* __STDC__ */ 1.9 cvs 367: boolean IsValidProtocol (url) 368: char *url; 1.3 cvs 369: #endif /* __STDC__ */ 370: { 1.9 cvs 371: if (!strncmp (url, "http:", 5) 1.3 cvs 372: /***|| !strncmp (path, "ftp:", 4) 1.5 cvs 373: || !strncmp (path, "news:", 5)***/ ) 1.8 cvs 374: return (TRUE); 1.5 cvs 375: else 1.8 cvs 376: return (FALSE); 1.3 cvs 377: } 378: 1.4 cvs 379: /*---------------------------------------------------------------------- 1.9 cvs 380: NormalizeURL 381: normalizes orgName according to a base associated with doc, and 382: following the standard URL format rules. 383: The function returns the new complete and normalized URL 1.12 cvs 384: or file name path (newName) and the name of the document (docName). 1.9 cvs 385: N.B. If the function can't find out what's the docName, it assigns 386: the name "noname.html". 1.4 cvs 387: ----------------------------------------------------------------------*/ 1.3 cvs 388: #ifdef __STDC__ 389: void NormalizeURL (char *orgName, Document doc, char *newName, char *docName) 390: #else /* __STDC__ */ 391: void NormalizeURL (orgName, doc, newName, docName) 392: char *orgName; 393: Document doc; 394: char *newName; 395: char *docName; 396: #endif /* __STDC__ */ 397: { 1.5 cvs 398: char basename[MAX_LENGTH]; 399: char tempname[MAX_LENGTH]; 400: int i; 401: char *ptr; 402: char *basename_ptr; 403: int basename_flag; 404: Element el; 405: ElementType elType; 406: AttributeType attrType; 407: Attribute attrHREF; 408: int length; 409: 410: /* Fix up orgName, by erasing leading and trailing white space */ 411: if (!newName || !docName) 412: return; 413: ptr = orgName; 414: while (*ptr == ' ' && *ptr++ != EOS) ; 415: strcpy (tempname, ptr); 416: ptr = strchr (tempname, ' '); 417: if (ptr) 418: *ptr = EOS; 419: 1.14 ! cvs 420: if (IsW3Path (tempname)) ! 421: /* the name is complete */ ! 422: strcpy (newName, tempname); 1.5 cvs 423: else 424: { 1.14 ! cvs 425: if (doc) ! 426: { ! 427: /* take into account the BASE element. */ ! 428: length = MAX_LENGTH; ! 429: /* get the root element */ ! 430: el = TtaGetMainRoot (doc); ! 431: ! 432: /* search the BASE element */ ! 433: elType.ElSSchema = TtaGetDocumentSSchema (doc); ! 434: elType.ElTypeNum = HTML_EL_BASE; ! 435: el = TtaSearchTypedElement (elType, SearchInTree, el); ! 436: if (el) ! 437: { ! 438: /* ! 439: ** The document has a BASE element ! 440: ** Get the HREF attribute of the BASE Element ! 441: */ ! 442: attrType.AttrSSchema = elType.ElSSchema; ! 443: attrType.AttrTypeNum = HTML_ATTR_HREF_; ! 444: attrHREF = TtaGetAttribute (el, attrType); ! 445: if (attrHREF) ! 446: { ! 447: /* Use the base path of the document */ ! 448: TtaGiveTextAttributeValue (attrHREF, basename, &length); ! 449: /* base and orgName have to be separated by a DIR_SEP */ ! 450: if (basename[strlen (basename) - 1] != DIR_SEP) ! 451: { ! 452: if (IsHTMLName (basename)) ! 453: { ! 454: /* remove the document name from basename */ ! 455: length = strlen (basename) - 1; ! 456: while (basename[length] != DIR_SEP) ! 457: basename[length--] = EOS; ! 458: } ! 459: else if (tempname[0] != DIR_SEP) ! 460: strcat (basename, DIR_STR); ! 461: } ! 462: } ! 463: else ! 464: basename[0] = EOS; ! 465: } ! 466: else ! 467: basename[0] = EOS; ! 468: } ! 469: else ! 470: basename[0] = EOS; ! 471: ! 472: if (basename[0] == EOS) ! 473: { ! 474: /* there is no BASE element in that document. */ ! 475: if (DocumentURLs[(int) doc]) ! 476: { ! 477: basename_ptr = HTParse (DocumentURLs[(int) doc], "", PARSE_ALL); ! 478: basename_flag = TRUE; ! 479: } ! 480: else ! 481: { ! 482: basename_ptr = ""; ! 483: basename_flag = FALSE; ! 484: } ! 485: } ! 486: else ! 487: { ! 488: basename_ptr = HTParse (basename, "", PARSE_ALL); ! 489: basename_flag = TRUE; ! 490: } ! 491: ! 492: ptr = HTParse (tempname, basename_ptr, PARSE_ALL); ! 493: if (basename_flag) ! 494: HT_FREE (basename_ptr); ! 495: if (ptr) ! 496: { ! 497: ptr = HTSimplify (&ptr); ! 498: strcpy (newName, ptr); ! 499: HT_FREE (ptr); ! 500: } ! 501: else ! 502: newName[0] = EOS; 1.5 cvs 503: } 504: 505: i = strlen (newName) - 1; 506: if (i > 0) 507: { 1.14 ! cvs 508: /* search now the document name */ ! 509: ptr = strrchr (newName, DIR_SEP); ! 510: if (ptr) ! 511: ptr++; ! 512: if (ptr && *ptr != EOS) ! 513: strcpy (docName, ptr); ! 514: else ! 515: /* the docname was not comprised inside the URL, so let's */ ! 516: /* assign a "noname.html" name :) */ ! 517: strcpy (docName, "noname.html"); ! 518: ! 519: /* remove DIR_SEP at the end of complete path */ 1.5 cvs 520: if (newName[i] == DIR_SEP) 521: newName[i] = EOS; 522: } 1.3 cvs 523: } 524: 1.4 cvs 525: /*---------------------------------------------------------------------- 1.9 cvs 526: IsSameHost 1.4 cvs 527: ----------------------------------------------------------------------*/ 1.3 cvs 528: #ifdef __STDC__ 529: boolean IsSameHost (char *url1, char *url2) 530: #else /* __STDC__ */ 531: boolean IsSameHost (url1, url2) 532: char *path; 533: #endif /* __STDC__ */ 534: { 1.5 cvs 535: char *basename_ptr1, *basename_ptr2; 536: boolean result; 1.3 cvs 537: 1.5 cvs 538: basename_ptr1 = HTParse (url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION); 539: basename_ptr2 = HTParse (url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION); 1.3 cvs 540: 1.5 cvs 541: if (strcmp (basename_ptr1, basename_ptr2)) 1.8 cvs 542: result = FALSE; 1.5 cvs 543: else 1.8 cvs 544: result = TRUE; 1.3 cvs 545: 1.5 cvs 546: HT_FREE (basename_ptr1); 547: HT_FREE (basename_ptr2); 1.3 cvs 548: 1.5 cvs 549: return (result); 1.3 cvs 550: } 551: 552: 1.4 cvs 553: /*---------------------------------------------------------------------- 1.9 cvs 554: AHTMakeRelativeURL 555: converts url into a relative url to base_url. 556: If succesful, returns the new URL, otherwise, it returns NULL. 557: The caller has to free the new URL. 1.4 cvs 558: ----------------------------------------------------------------------*/ 1.3 cvs 559: #ifdef __STDC__ 1.5 cvs 560: char *AHTMakeRelativeName (char *url, char *base_url) 1.3 cvs 561: #else /* __STDC__ */ 1.5 cvs 562: char *AHTMakeRelativeName (url, base_url) 563: char url; 564: char base_url; 565: 1.3 cvs 566: #endif /* __STDC__ */ 567: { 1.5 cvs 568: char *base_ptr, *url_ptr; 569: char *result; 570: 571: /* verify if we are in the same host */ 1.3 cvs 572: 1.5 cvs 573: base_ptr = HTParse (base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION); 574: url_ptr = HTParse (url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION); 1.3 cvs 575: 1.5 cvs 576: if (!strcmp (base_ptr, url_ptr)) 577: { 578: HT_FREE (base_ptr); 579: HT_FREE (url_ptr); 1.3 cvs 580: 1.5 cvs 581: /* Normalize the URLs */ 1.3 cvs 582: 1.5 cvs 583: base_ptr = HTParse (base_url, "", PARSE_ALL); 584: url_ptr = HTParse (url, "", PARSE_ALL); 1.3 cvs 585: 1.5 cvs 586: /* Use libwww to make relative name */ 1.3 cvs 587: 1.5 cvs 588: result = HTRelative (url_ptr, base_ptr); 589: HT_FREE (base_ptr); 590: HT_FREE (url_ptr); 591: } 592: else 593: result = (char *) NULL; 1.3 cvs 594: 1.5 cvs 595: return (result); 1.3 cvs 596: } 1.9 cvs 597: 598: 599: