Annotation of Amaya/amaya/AHTURLTools.c, revision 1.17
1.7 cvs 1: /*
2: *
3: * (c) COPYRIGHT MIT and INRIA, 1996.
4: * Please first read the full copyright statement in file COPYRIGHT.
5: *
6: */
1.9 cvs 7:
1.10 cvs 8: /*
9: * AHTURLTools.c: contains all the functions for testing, manipulating,
10: * and normalizing URLs.
11: *
12: * Authors: J. Kahan, I. Vatton
13: *
14: */
1.7 cvs 15:
1.8 cvs 16: /* Amaya includes */
1.15 cvs 17: #define THOT_EXPORT extern
1.3 cvs 18: #include "amaya.h"
19:
1.8 cvs 20:
21: #include "init_f.h"
22: #include "AHTURLTools_f.h"
23:
24: /*----------------------------------------------------------------------
1.11 cvs 25: ExplodeURL
1.8 cvs 26: ----------------------------------------------------------------------*/
27:
28: #ifdef __STDC__
29: void ExplodeURL (char *url, char **proto, char **host, char **dir, char **file)
30: #else
31: void ExplodeURL (url, proto, host, dir, file)
32: char *url;
33: char **proto;
34: char **host;
35: char **dir;
36: char **file;
37:
38: #endif
39: {
1.9 cvs 40: char *curr, *temp;
1.8 cvs 41:
42: if ((url == NULL) || (proto == NULL) || (host == NULL) ||
43: (dir == NULL) || (file == NULL))
44: return;
45:
46: /* initialize every pointer */
47: *proto = *host = *dir = *file = NULL;
48:
49: /* skip any leading space */
50: while ((*url == SPACE) || (*url == TAB))
51: url++;
1.9 cvs 52: curr = url;
53: if (*curr == 0)
1.8 cvs 54: goto finished;
55:
56: /* go to the end of the URL */
1.9 cvs 57: while ((*curr != 0) && (*curr != SPACE) && (*curr != '\b') &&
58: (*curr != '\r') && (*curr != EOL))
59: curr++;
1.8 cvs 60:
61: /* mark the end of the chain */
1.9 cvs 62: *curr = EOS;
63: curr--;
64: if (curr <= url)
1.8 cvs 65: goto finished;
66:
67: /* search the next DIR_SEP indicating the beginning of the file name */
68: do
1.11 cvs 69: curr--;
1.9 cvs 70: while ((curr >= url) && (*curr != DIR_SEP));
1.11 cvs 71:
1.9 cvs 72: if (curr < url)
1.8 cvs 73: goto finished;
1.9 cvs 74: *file = curr + 1;
1.8 cvs 75:
76: /* mark the end of the dir */
1.9 cvs 77: *curr = EOS;
78: curr--;
79: if (curr < url)
1.8 cvs 80: goto finished;
81:
82: /* search for the "/" indicating the host name start */
1.9 cvs 83: while ((curr > url) && ((*curr != DIR_SEP) || (*(curr + 1) != DIR_SEP)))
84: curr--;
1.8 cvs 85:
86: /* if we found it, separate the host name from the directory */
1.9 cvs 87: if ((*curr == DIR_SEP) && (*(curr + 1) == DIR_SEP))
1.8 cvs 88: {
1.9 cvs 89: *host = temp = curr + 2;
1.8 cvs 90: while ((*temp != 0) && (*temp != DIR_SEP))
91: temp++;
92: if (*temp == DIR_SEP)
93: {
94: *temp = EOS;
95: *dir = temp + 1;
96: }
97: }
98: else
1.11 cvs 99: *dir = curr;
100:
1.9 cvs 101: if (curr <= url)
1.8 cvs 102: goto finished;
103:
104: /* mark the end of the proto */
1.9 cvs 105: *curr = EOS;
106: curr--;
107: if (curr < url)
1.8 cvs 108: goto finished;
109:
1.9 cvs 110: if (*curr == ':')
1.8 cvs 111: {
1.9 cvs 112: *curr = EOS;
113: curr--;
1.8 cvs 114: }
115: else
116: goto finished;
1.11 cvs 117:
1.9 cvs 118: if (curr < url)
1.8 cvs 119: goto finished;
1.9 cvs 120: while ((curr > url) && (isalpha (*curr)))
121: curr--;
122: *proto = curr;
1.8 cvs 123:
124: finished:;
125:
126: #ifdef AMAYA_DEBUG
127: fprintf (stderr, "ExplodeURL(%s)\n\t", url);
128: if (*proto)
129: fprintf (stderr, "proto : %s, ", *proto);
130: if (*host)
131: fprintf (stderr, "host : %s, ", *host);
132: if (*dir)
133: fprintf (stderr, "dir : %s, ", *dir);
134: if (*file)
135: fprintf (stderr, "file : %s ", *file);
136: fprintf (stderr, "\n");
137: #endif
138:
139: }
1.3 cvs 140:
1.4 cvs 141: /*----------------------------------------------------------------------
1.9 cvs 142: IsHTMLName
143: returns TRUE if path points to an HTML resource.
1.4 cvs 144: ----------------------------------------------------------------------*/
1.3 cvs 145: #ifdef __STDC__
146: boolean IsHTMLName (char *path)
147: #else /* __STDC__ */
148: boolean IsHTMLName (path)
149: char *path;
150: #endif /* __STDC__ */
151: {
1.5 cvs 152: char temppath[MAX_LENGTH];
153: char suffix[MAX_LENGTH];
154: char nsuffix[MAX_LENGTH];
155: int i;
156:
157: if (!path)
1.13 cvs 158: return (FALSE);
1.5 cvs 159:
160: strcpy (temppath, path);
161: ExtractSuffix (temppath, suffix);
162:
163: /* Normalize the suffix */
164: i = 0;
165: while (suffix[i] != EOS)
1.13 cvs 166: {
167: nsuffix[i] = TOLOWER (suffix[i]);
168: i++;
169: }
1.5 cvs 170: nsuffix[i] = EOS;
171: if ((strcmp (nsuffix, "html")) &&
172: (strcmp (nsuffix, "htm")) &&
173: (strcmp (nsuffix, "shtml")))
1.13 cvs 174: return (FALSE);
175: else if ((!strcmp (nsuffix, "gz")) ||
1.16 cvs 176: (!strcmp (nsuffix, "z")))
1.13 cvs 177: {
178: /* take in account compressed files */
179: ExtractSuffix (temppath, suffix);
180: /* Normalize the suffix */
181: i = 0;
182: while (suffix[i] != EOS)
183: {
184: nsuffix[i] = TOLOWER (suffix[i]);
185: i++;
186: }
187: nsuffix[i] = EOS;
188: if ((strcmp (nsuffix, "html")) &&
189: (strcmp (nsuffix, "htm")) &&
190: (strcmp (nsuffix, "shtml")))
191: return (FALSE);
192: else
193: return (TRUE);
194: }
195: else
196: return (TRUE);
1.3 cvs 197: }
198:
1.4 cvs 199: /*----------------------------------------------------------------------
1.9 cvs 200: IsImageName
201: returns TRUE if path points to an image resource.
1.4 cvs 202: ----------------------------------------------------------------------*/
1.3 cvs 203: #ifdef __STDC__
204: boolean IsImageName (char *path)
205: #else /* __STDC__ */
206: boolean IsImageName (path)
207: char *path;
208: #endif /* __STDC__ */
209: {
1.5 cvs 210: char temppath[MAX_LENGTH];
211: char suffix[MAX_LENGTH];
212: char nsuffix[MAX_LENGTH];
213: int i;
214:
215: if (!path)
1.13 cvs 216: return (FALSE);
1.5 cvs 217:
218: strcpy (temppath, path);
219: ExtractSuffix (temppath, suffix);
220:
221: /* Normalize the suffix */
222: i = 0;
223: while (suffix[i] != EOS)
1.13 cvs 224: {
225: nsuffix[i] = TOLOWER (suffix[i]);
226: i++;
227: }
1.5 cvs 228: nsuffix[i] = EOS;
229: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
230: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
231: (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
1.13 cvs 232: return (FALSE);
233: return (TRUE);
1.3 cvs 234: }
235:
1.4 cvs 236: /*----------------------------------------------------------------------
1.9 cvs 237: IsTextName
1.4 cvs 238: ----------------------------------------------------------------------*/
1.3 cvs 239: #ifdef __STDC__
240: boolean IsTextName (char *path)
241: #else /* __STDC__ */
242: boolean IsTextName (path)
243: char *path;
244:
245: #endif /* __STDC__ */
246: {
1.5 cvs 247: char temppath[MAX_LENGTH];
248: char suffix[MAX_LENGTH];
249: char nsuffix[MAX_LENGTH];
250: int i;
251:
252: if (!path)
1.13 cvs 253: return (FALSE);
1.5 cvs 254:
255: strcpy (temppath, path);
256: ExtractSuffix (temppath, suffix);
257:
258: /* Normalize the suffix */
259: i = 0;
260: while (suffix[i] != EOS)
261: {
262: nsuffix[i] = TOLOWER (suffix[i]);
263: i++;
264: }
265: nsuffix[i] = EOS;
266:
267: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
268: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
269: (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
270: (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "xpg")) &&
271: (strcmp (nsuffix, "xpd")) && (strcmp (nsuffix, "ps")) &&
272: (strcmp (nsuffix, "au")))
1.13 cvs 273: return (TRUE);
1.16 cvs 274: else if ((!strcmp (nsuffix, "gz")) || (!strcmp (nsuffix, "z")))
1.13 cvs 275: {
276: /* take in account compressed files */
277: ExtractSuffix (temppath, suffix);
278: /* Normalize the suffix */
279: i = 0;
280: while (suffix[i] != EOS)
281: {
282: nsuffix[i] = TOLOWER (suffix[i]);
283: i++;
284: }
285: nsuffix[i] = EOS;
286: if ((!strcmp (nsuffix, "html")) ||
287: (!strcmp (nsuffix, "htm")) ||
288: (!strcmp (nsuffix, "shtml")))
289: return (TRUE);
290: else
291: return (FALSE);
292: }
293: else
294: return (FALSE);
1.3 cvs 295: }
296:
1.4 cvs 297: /*----------------------------------------------------------------------
1.9 cvs 298: IsHTTPPath
299: returns TRUE if path is in fact an http URL.
1.4 cvs 300: ----------------------------------------------------------------------*/
1.3 cvs 301: #ifdef __STDC__
302: boolean IsHTTPPath (char *path)
303: #else /* __STDC__ */
304: boolean IsHTTPPath (path)
305: char *path;
306: #endif /* __STDC__ */
307: {
1.5 cvs 308: if (!path)
309: return FALSE;
1.3 cvs 310:
1.5 cvs 311: if (strncmp (path, "http:", 5) != 0)
312: return FALSE;
313: return TRUE;
1.3 cvs 314: }
315:
1.4 cvs 316: /*----------------------------------------------------------------------
1.9 cvs 317: IsWithParameters
318: returns TRUE if url has a concatenated query string.
1.4 cvs 319: ----------------------------------------------------------------------*/
1.3 cvs 320: #ifdef __STDC__
1.9 cvs 321: boolean IsWithParameters (char *url)
1.3 cvs 322: #else /* __STDC__ */
1.9 cvs 323: boolean IsWithParameters (url)
324: char *url;
1.3 cvs 325: #endif /* __STDC__ */
326: {
1.5 cvs 327: int i;
1.3 cvs 328:
1.9 cvs 329: if ((!url) || (url[0] == EOS))
1.5 cvs 330: return FALSE;
1.3 cvs 331:
1.9 cvs 332: i = strlen (url) - 1;
333: while (i > 0 && url[i--] != '?')
1.5 cvs 334: if (i < 0)
335: return FALSE;
1.3 cvs 336:
1.5 cvs 337: /* There is a parameter */
338: return TRUE;
1.3 cvs 339: }
340:
1.4 cvs 341: /*----------------------------------------------------------------------
1.9 cvs 342: IsW3Path
343: returns TRUE if path is in fact a URL.
1.4 cvs 344: ----------------------------------------------------------------------*/
1.3 cvs 345: #ifdef __STDC__
346: boolean IsW3Path (char *path)
347: #else /* __STDC__ */
348: boolean IsW3Path (path)
349: char *path;
350: #endif /* __STDC__ */
351: {
1.5 cvs 352: if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
353: (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
354: (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
355: (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
356: return FALSE;
357: return TRUE;
1.3 cvs 358: }
359:
1.4 cvs 360: /*----------------------------------------------------------------------
1.9 cvs 361: IsValidProtocol
362: returns true if the url protocol is supported by Amaya.
1.4 cvs 363: ----------------------------------------------------------------------*/
1.3 cvs 364: #ifdef __STDC__
1.9 cvs 365: boolean IsValidProtocol (char *url)
1.3 cvs 366: #else /* __STDC__ */
1.9 cvs 367: boolean IsValidProtocol (url)
368: char *url;
1.3 cvs 369: #endif /* __STDC__ */
370: {
1.9 cvs 371: if (!strncmp (url, "http:", 5)
1.3 cvs 372: /***|| !strncmp (path, "ftp:", 4)
1.5 cvs 373: || !strncmp (path, "news:", 5)***/ )
1.8 cvs 374: return (TRUE);
1.5 cvs 375: else
1.8 cvs 376: return (FALSE);
1.3 cvs 377: }
378:
1.4 cvs 379: /*----------------------------------------------------------------------
1.9 cvs 380: NormalizeURL
381: normalizes orgName according to a base associated with doc, and
382: following the standard URL format rules.
383: The function returns the new complete and normalized URL
1.12 cvs 384: or file name path (newName) and the name of the document (docName).
1.9 cvs 385: N.B. If the function can't find out what's the docName, it assigns
386: the name "noname.html".
1.4 cvs 387: ----------------------------------------------------------------------*/
1.3 cvs 388: #ifdef __STDC__
389: void NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
390: #else /* __STDC__ */
391: void NormalizeURL (orgName, doc, newName, docName)
392: char *orgName;
393: Document doc;
394: char *newName;
395: char *docName;
396: #endif /* __STDC__ */
397: {
1.5 cvs 398: char basename[MAX_LENGTH];
399: char tempname[MAX_LENGTH];
400: int i;
401: char *ptr;
402: char *basename_ptr;
403: int basename_flag;
404: Element el;
405: ElementType elType;
406: AttributeType attrType;
407: Attribute attrHREF;
408: int length;
409:
410: /* Fix up orgName, by erasing leading and trailing white space */
411: if (!newName || !docName)
412: return;
413: ptr = orgName;
414: while (*ptr == ' ' && *ptr++ != EOS) ;
415: strcpy (tempname, ptr);
416: ptr = strchr (tempname, ' ');
417: if (ptr)
418: *ptr = EOS;
419:
1.17 ! cvs 420: if (IsW3Path (tempname))
1.14 cvs 421: /* the name is complete */
422: strcpy (newName, tempname);
1.5 cvs 423: else
424: {
1.17 ! cvs 425: if (doc == 0)
! 426: {
! 427: basename_ptr = "";
! 428: basename_flag = FALSE;
! 429: }
! 430: else
! 431: {
! 432: /* take into account the BASE element. */
! 433: length = MAX_LENGTH;
! 434: /* get the root element */
! 435: el = TtaGetMainRoot (doc);
1.16 cvs 436:
1.17 ! cvs 437: /* search the BASE element */
! 438: elType.ElSSchema = TtaGetDocumentSSchema (doc);
! 439: elType.ElTypeNum = HTML_EL_BASE;
! 440: el = TtaSearchTypedElement (elType, SearchInTree, el);
! 441: if (el)
1.14 cvs 442: {
1.17 ! cvs 443: /*
! 444: ** The document has a BASE element
! 445: ** Get the HREF attribute of the BASE Element
! 446: */
! 447: attrType.AttrSSchema = elType.ElSSchema;
! 448: attrType.AttrTypeNum = HTML_ATTR_HREF_;
! 449: attrHREF = TtaGetAttribute (el, attrType);
! 450: if (attrHREF)
1.14 cvs 451: {
1.17 ! cvs 452: /* Use the base path of the document */
! 453: TtaGiveTextAttributeValue (attrHREF, basename, &length);
! 454: /* base and orgName have to be separated by a DIR_SEP */
! 455: if (basename[strlen (basename) - 1] != DIR_SEP)
1.14 cvs 456: {
1.17 ! cvs 457: if (IsHTMLName (basename))
! 458: {
! 459: /* remove the document name from basename */
! 460: length = strlen (basename) - 1;
! 461: while (basename[length] != DIR_SEP)
! 462: basename[length--] = EOS;
! 463: }
! 464: else if (tempname[0] != DIR_SEP)
! 465: strcat (basename, DIR_STR);
1.14 cvs 466: }
467: }
1.17 ! cvs 468: else
! 469: basename[0] = EOS;
1.14 cvs 470: }
471: else
472: basename[0] = EOS;
473:
1.17 ! cvs 474: if (basename[0] == EOS)
1.14 cvs 475: {
1.17 ! cvs 476: /* there is no BASE element in that document. */
! 477: if (DocumentURLs[(int) doc])
! 478: {
! 479: basename_ptr = HTParse (DocumentURLs[(int) doc], "", PARSE_ALL);
! 480: basename_flag = TRUE;
! 481: }
! 482: else
! 483: {
! 484: basename_ptr = "";
! 485: basename_flag = FALSE;
! 486: }
1.14 cvs 487: }
488: else
489: {
1.17 ! cvs 490: basename_ptr = HTParse (basename, "", PARSE_ALL);
! 491: basename_flag = TRUE;
1.14 cvs 492: }
493: }
1.16 cvs 494:
1.17 ! cvs 495: if (tempname[0] == '/' && doc)
1.16 cvs 496: ptr = HTParse (tempname, basename_ptr, PARSE_ACCESS | PARSE_PUNCTUATION | PARSE_HOST);
497: else
498: ptr = HTParse (tempname, basename_ptr, PARSE_ALL);
1.14 cvs 499: if (basename_flag)
500: HT_FREE (basename_ptr);
501: if (ptr)
502: {
503: ptr = HTSimplify (&ptr);
504: strcpy (newName, ptr);
505: HT_FREE (ptr);
506: }
507: else
508: newName[0] = EOS;
1.5 cvs 509: }
510:
511: i = strlen (newName) - 1;
512: if (i > 0)
513: {
1.14 cvs 514: /* search now the document name */
515: ptr = strrchr (newName, DIR_SEP);
516: if (ptr)
517: ptr++;
518: if (ptr && *ptr != EOS)
519: strcpy (docName, ptr);
520: else
521: /* the docname was not comprised inside the URL, so let's */
522: /* assign a "noname.html" name :) */
523: strcpy (docName, "noname.html");
524:
525: /* remove DIR_SEP at the end of complete path */
1.5 cvs 526: if (newName[i] == DIR_SEP)
527: newName[i] = EOS;
528: }
1.3 cvs 529: }
530:
1.4 cvs 531: /*----------------------------------------------------------------------
1.9 cvs 532: IsSameHost
1.4 cvs 533: ----------------------------------------------------------------------*/
1.3 cvs 534: #ifdef __STDC__
535: boolean IsSameHost (char *url1, char *url2)
536: #else /* __STDC__ */
537: boolean IsSameHost (url1, url2)
538: char *path;
539: #endif /* __STDC__ */
540: {
1.5 cvs 541: char *basename_ptr1, *basename_ptr2;
542: boolean result;
1.3 cvs 543:
1.5 cvs 544: basename_ptr1 = HTParse (url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
545: basename_ptr2 = HTParse (url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
1.3 cvs 546:
1.5 cvs 547: if (strcmp (basename_ptr1, basename_ptr2))
1.8 cvs 548: result = FALSE;
1.5 cvs 549: else
1.8 cvs 550: result = TRUE;
1.3 cvs 551:
1.5 cvs 552: HT_FREE (basename_ptr1);
553: HT_FREE (basename_ptr2);
1.3 cvs 554:
1.5 cvs 555: return (result);
1.3 cvs 556: }
557:
558:
1.4 cvs 559: /*----------------------------------------------------------------------
1.9 cvs 560: AHTMakeRelativeURL
561: converts url into a relative url to base_url.
562: If succesful, returns the new URL, otherwise, it returns NULL.
563: The caller has to free the new URL.
1.4 cvs 564: ----------------------------------------------------------------------*/
1.3 cvs 565: #ifdef __STDC__
1.5 cvs 566: char *AHTMakeRelativeName (char *url, char *base_url)
1.3 cvs 567: #else /* __STDC__ */
1.5 cvs 568: char *AHTMakeRelativeName (url, base_url)
569: char url;
570: char base_url;
571:
1.3 cvs 572: #endif /* __STDC__ */
573: {
1.5 cvs 574: char *base_ptr, *url_ptr;
575: char *result;
576:
577: /* verify if we are in the same host */
1.3 cvs 578:
1.5 cvs 579: base_ptr = HTParse (base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
580: url_ptr = HTParse (url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
1.3 cvs 581:
1.5 cvs 582: if (!strcmp (base_ptr, url_ptr))
583: {
584: HT_FREE (base_ptr);
585: HT_FREE (url_ptr);
1.3 cvs 586:
1.5 cvs 587: /* Normalize the URLs */
1.3 cvs 588:
1.5 cvs 589: base_ptr = HTParse (base_url, "", PARSE_ALL);
590: url_ptr = HTParse (url, "", PARSE_ALL);
1.3 cvs 591:
1.5 cvs 592: /* Use libwww to make relative name */
1.3 cvs 593:
1.5 cvs 594: result = HTRelative (url_ptr, base_ptr);
595: HT_FREE (base_ptr);
596: HT_FREE (url_ptr);
597: }
598: else
599: result = (char *) NULL;
1.3 cvs 600:
1.5 cvs 601: return (result);
1.3 cvs 602: }
1.9 cvs 603:
604:
605:
Webmaster