Annotation of Amaya/amaya/AHTURLTools.c, revision 1.16
1.7 cvs 1: /*
2: *
3: * (c) COPYRIGHT MIT and INRIA, 1996.
4: * Please first read the full copyright statement in file COPYRIGHT.
5: *
6: */
1.9 cvs 7:
1.10 cvs 8: /*
9: * AHTURLTools.c: contains all the functions for testing, manipulating,
10: * and normalizing URLs.
11: *
12: * Authors: J. Kahan, I. Vatton
13: *
14: */
1.7 cvs 15:
1.8 cvs 16: /* Amaya includes */
1.15 cvs 17: #define THOT_EXPORT extern
1.3 cvs 18: #include "amaya.h"
19:
1.8 cvs 20:
21: #include "init_f.h"
22: #include "AHTURLTools_f.h"
23:
24: /*----------------------------------------------------------------------
1.11 cvs 25: ExplodeURL
1.8 cvs 26: ----------------------------------------------------------------------*/
27:
28: #ifdef __STDC__
29: void ExplodeURL (char *url, char **proto, char **host, char **dir, char **file)
30: #else
31: void ExplodeURL (url, proto, host, dir, file)
32: char *url;
33: char **proto;
34: char **host;
35: char **dir;
36: char **file;
37:
38: #endif
39: {
1.9 cvs 40: char *curr, *temp;
1.8 cvs 41:
42: if ((url == NULL) || (proto == NULL) || (host == NULL) ||
43: (dir == NULL) || (file == NULL))
44: return;
45:
46: /* initialize every pointer */
47: *proto = *host = *dir = *file = NULL;
48:
49: /* skip any leading space */
50: while ((*url == SPACE) || (*url == TAB))
51: url++;
1.9 cvs 52: curr = url;
53: if (*curr == 0)
1.8 cvs 54: goto finished;
55:
56: /* go to the end of the URL */
1.9 cvs 57: while ((*curr != 0) && (*curr != SPACE) && (*curr != '\b') &&
58: (*curr != '\r') && (*curr != EOL))
59: curr++;
1.8 cvs 60:
61: /* mark the end of the chain */
1.9 cvs 62: *curr = EOS;
63: curr--;
64: if (curr <= url)
1.8 cvs 65: goto finished;
66:
67: /* search the next DIR_SEP indicating the beginning of the file name */
68: do
1.11 cvs 69: curr--;
1.9 cvs 70: while ((curr >= url) && (*curr != DIR_SEP));
1.11 cvs 71:
1.9 cvs 72: if (curr < url)
1.8 cvs 73: goto finished;
1.9 cvs 74: *file = curr + 1;
1.8 cvs 75:
76: /* mark the end of the dir */
1.9 cvs 77: *curr = EOS;
78: curr--;
79: if (curr < url)
1.8 cvs 80: goto finished;
81:
82: /* search for the "/" indicating the host name start */
1.9 cvs 83: while ((curr > url) && ((*curr != DIR_SEP) || (*(curr + 1) != DIR_SEP)))
84: curr--;
1.8 cvs 85:
86: /* if we found it, separate the host name from the directory */
1.9 cvs 87: if ((*curr == DIR_SEP) && (*(curr + 1) == DIR_SEP))
1.8 cvs 88: {
1.9 cvs 89: *host = temp = curr + 2;
1.8 cvs 90: while ((*temp != 0) && (*temp != DIR_SEP))
91: temp++;
92: if (*temp == DIR_SEP)
93: {
94: *temp = EOS;
95: *dir = temp + 1;
96: }
97: }
98: else
1.11 cvs 99: *dir = curr;
100:
1.9 cvs 101: if (curr <= url)
1.8 cvs 102: goto finished;
103:
104: /* mark the end of the proto */
1.9 cvs 105: *curr = EOS;
106: curr--;
107: if (curr < url)
1.8 cvs 108: goto finished;
109:
1.9 cvs 110: if (*curr == ':')
1.8 cvs 111: {
1.9 cvs 112: *curr = EOS;
113: curr--;
1.8 cvs 114: }
115: else
116: goto finished;
1.11 cvs 117:
1.9 cvs 118: if (curr < url)
1.8 cvs 119: goto finished;
1.9 cvs 120: while ((curr > url) && (isalpha (*curr)))
121: curr--;
122: *proto = curr;
1.8 cvs 123:
124: finished:;
125:
126: #ifdef AMAYA_DEBUG
127: fprintf (stderr, "ExplodeURL(%s)\n\t", url);
128: if (*proto)
129: fprintf (stderr, "proto : %s, ", *proto);
130: if (*host)
131: fprintf (stderr, "host : %s, ", *host);
132: if (*dir)
133: fprintf (stderr, "dir : %s, ", *dir);
134: if (*file)
135: fprintf (stderr, "file : %s ", *file);
136: fprintf (stderr, "\n");
137: #endif
138:
139: }
1.3 cvs 140:
1.4 cvs 141: /*----------------------------------------------------------------------
1.9 cvs 142: IsHTMLName
143: returns TRUE if path points to an HTML resource.
1.4 cvs 144: ----------------------------------------------------------------------*/
1.3 cvs 145: #ifdef __STDC__
146: boolean IsHTMLName (char *path)
147: #else /* __STDC__ */
148: boolean IsHTMLName (path)
149: char *path;
150: #endif /* __STDC__ */
151: {
1.5 cvs 152: char temppath[MAX_LENGTH];
153: char suffix[MAX_LENGTH];
154: char nsuffix[MAX_LENGTH];
155: int i;
156:
157: if (!path)
1.13 cvs 158: return (FALSE);
1.5 cvs 159:
160: strcpy (temppath, path);
161: ExtractSuffix (temppath, suffix);
162:
163: /* Normalize the suffix */
164: i = 0;
165: while (suffix[i] != EOS)
1.13 cvs 166: {
167: nsuffix[i] = TOLOWER (suffix[i]);
168: i++;
169: }
1.5 cvs 170: nsuffix[i] = EOS;
171: if ((strcmp (nsuffix, "html")) &&
172: (strcmp (nsuffix, "htm")) &&
173: (strcmp (nsuffix, "shtml")))
1.13 cvs 174: return (FALSE);
175: else if ((!strcmp (nsuffix, "gz")) ||
1.16 ! cvs 176: (!strcmp (nsuffix, "z")))
1.13 cvs 177: {
178: /* take in account compressed files */
179: ExtractSuffix (temppath, suffix);
180: /* Normalize the suffix */
181: i = 0;
182: while (suffix[i] != EOS)
183: {
184: nsuffix[i] = TOLOWER (suffix[i]);
185: i++;
186: }
187: nsuffix[i] = EOS;
188: if ((strcmp (nsuffix, "html")) &&
189: (strcmp (nsuffix, "htm")) &&
190: (strcmp (nsuffix, "shtml")))
191: return (FALSE);
192: else
193: return (TRUE);
194: }
195: else
196: return (TRUE);
1.3 cvs 197: }
198:
1.4 cvs 199: /*----------------------------------------------------------------------
1.9 cvs 200: IsImageName
201: returns TRUE if path points to an image resource.
1.4 cvs 202: ----------------------------------------------------------------------*/
1.3 cvs 203: #ifdef __STDC__
204: boolean IsImageName (char *path)
205: #else /* __STDC__ */
206: boolean IsImageName (path)
207: char *path;
208: #endif /* __STDC__ */
209: {
1.5 cvs 210: char temppath[MAX_LENGTH];
211: char suffix[MAX_LENGTH];
212: char nsuffix[MAX_LENGTH];
213: int i;
214:
215: if (!path)
1.13 cvs 216: return (FALSE);
1.5 cvs 217:
218: strcpy (temppath, path);
219: ExtractSuffix (temppath, suffix);
220:
221: /* Normalize the suffix */
222: i = 0;
223: while (suffix[i] != EOS)
1.13 cvs 224: {
225: nsuffix[i] = TOLOWER (suffix[i]);
226: i++;
227: }
1.5 cvs 228: nsuffix[i] = EOS;
229: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
230: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
231: (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
1.13 cvs 232: return (FALSE);
233: return (TRUE);
1.3 cvs 234: }
235:
1.4 cvs 236: /*----------------------------------------------------------------------
1.9 cvs 237: IsTextName
1.4 cvs 238: ----------------------------------------------------------------------*/
1.3 cvs 239: #ifdef __STDC__
240: boolean IsTextName (char *path)
241: #else /* __STDC__ */
242: boolean IsTextName (path)
243: char *path;
244:
245: #endif /* __STDC__ */
246: {
1.5 cvs 247: char temppath[MAX_LENGTH];
248: char suffix[MAX_LENGTH];
249: char nsuffix[MAX_LENGTH];
250: int i;
251:
252: if (!path)
1.13 cvs 253: return (FALSE);
1.5 cvs 254:
255: strcpy (temppath, path);
256: ExtractSuffix (temppath, suffix);
257:
258: /* Normalize the suffix */
259: i = 0;
260: while (suffix[i] != EOS)
261: {
262: nsuffix[i] = TOLOWER (suffix[i]);
263: i++;
264: }
265: nsuffix[i] = EOS;
266:
267: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
268: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
269: (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
270: (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "xpg")) &&
271: (strcmp (nsuffix, "xpd")) && (strcmp (nsuffix, "ps")) &&
272: (strcmp (nsuffix, "au")))
1.13 cvs 273: return (TRUE);
1.16 ! cvs 274: else if ((!strcmp (nsuffix, "gz")) || (!strcmp (nsuffix, "z")))
1.13 cvs 275: {
276: /* take in account compressed files */
277: ExtractSuffix (temppath, suffix);
278: /* Normalize the suffix */
279: i = 0;
280: while (suffix[i] != EOS)
281: {
282: nsuffix[i] = TOLOWER (suffix[i]);
283: i++;
284: }
285: nsuffix[i] = EOS;
286: if ((!strcmp (nsuffix, "html")) ||
287: (!strcmp (nsuffix, "htm")) ||
288: (!strcmp (nsuffix, "shtml")))
289: return (TRUE);
290: else
291: return (FALSE);
292: }
293: else
294: return (FALSE);
1.3 cvs 295: }
296:
1.4 cvs 297: /*----------------------------------------------------------------------
1.9 cvs 298: IsHTTPPath
299: returns TRUE if path is in fact an http URL.
1.4 cvs 300: ----------------------------------------------------------------------*/
1.3 cvs 301: #ifdef __STDC__
302: boolean IsHTTPPath (char *path)
303: #else /* __STDC__ */
304: boolean IsHTTPPath (path)
305: char *path;
306: #endif /* __STDC__ */
307: {
1.5 cvs 308: if (!path)
309: return FALSE;
1.3 cvs 310:
1.5 cvs 311: if (strncmp (path, "http:", 5) != 0)
312: return FALSE;
313: return TRUE;
1.3 cvs 314: }
315:
1.4 cvs 316: /*----------------------------------------------------------------------
1.9 cvs 317: IsWithParameters
318: returns TRUE if url has a concatenated query string.
1.4 cvs 319: ----------------------------------------------------------------------*/
1.3 cvs 320: #ifdef __STDC__
1.9 cvs 321: boolean IsWithParameters (char *url)
1.3 cvs 322: #else /* __STDC__ */
1.9 cvs 323: boolean IsWithParameters (url)
324: char *url;
1.3 cvs 325: #endif /* __STDC__ */
326: {
1.5 cvs 327: int i;
1.3 cvs 328:
1.9 cvs 329: if ((!url) || (url[0] == EOS))
1.5 cvs 330: return FALSE;
1.3 cvs 331:
1.9 cvs 332: i = strlen (url) - 1;
333: while (i > 0 && url[i--] != '?')
1.5 cvs 334: if (i < 0)
335: return FALSE;
1.3 cvs 336:
1.5 cvs 337: /* There is a parameter */
338: return TRUE;
1.3 cvs 339: }
340:
1.4 cvs 341: /*----------------------------------------------------------------------
1.9 cvs 342: IsW3Path
343: returns TRUE if path is in fact a URL.
1.4 cvs 344: ----------------------------------------------------------------------*/
1.3 cvs 345: #ifdef __STDC__
346: boolean IsW3Path (char *path)
347: #else /* __STDC__ */
348: boolean IsW3Path (path)
349: char *path;
350: #endif /* __STDC__ */
351: {
1.5 cvs 352: if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
353: (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
354: (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
355: (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
356: return FALSE;
357: return TRUE;
1.3 cvs 358: }
359:
1.4 cvs 360: /*----------------------------------------------------------------------
1.9 cvs 361: IsValidProtocol
362: returns true if the url protocol is supported by Amaya.
1.4 cvs 363: ----------------------------------------------------------------------*/
1.3 cvs 364: #ifdef __STDC__
1.9 cvs 365: boolean IsValidProtocol (char *url)
1.3 cvs 366: #else /* __STDC__ */
1.9 cvs 367: boolean IsValidProtocol (url)
368: char *url;
1.3 cvs 369: #endif /* __STDC__ */
370: {
1.9 cvs 371: if (!strncmp (url, "http:", 5)
1.3 cvs 372: /***|| !strncmp (path, "ftp:", 4)
1.5 cvs 373: || !strncmp (path, "news:", 5)***/ )
1.8 cvs 374: return (TRUE);
1.5 cvs 375: else
1.8 cvs 376: return (FALSE);
1.3 cvs 377: }
378:
1.4 cvs 379: /*----------------------------------------------------------------------
1.9 cvs 380: NormalizeURL
381: normalizes orgName according to a base associated with doc, and
382: following the standard URL format rules.
383: The function returns the new complete and normalized URL
1.12 cvs 384: or file name path (newName) and the name of the document (docName).
1.9 cvs 385: N.B. If the function can't find out what's the docName, it assigns
386: the name "noname.html".
1.4 cvs 387: ----------------------------------------------------------------------*/
1.3 cvs 388: #ifdef __STDC__
389: void NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
390: #else /* __STDC__ */
391: void NormalizeURL (orgName, doc, newName, docName)
392: char *orgName;
393: Document doc;
394: char *newName;
395: char *docName;
396: #endif /* __STDC__ */
397: {
1.5 cvs 398: char basename[MAX_LENGTH];
399: char tempname[MAX_LENGTH];
400: int i;
401: char *ptr;
402: char *basename_ptr;
403: int basename_flag;
404: Element el;
405: ElementType elType;
406: AttributeType attrType;
407: Attribute attrHREF;
408: int length;
409:
410: /* Fix up orgName, by erasing leading and trailing white space */
411: if (!newName || !docName)
412: return;
413: ptr = orgName;
414: while (*ptr == ' ' && *ptr++ != EOS) ;
415: strcpy (tempname, ptr);
416: ptr = strchr (tempname, ' ');
417: if (ptr)
418: *ptr = EOS;
419:
1.16 ! cvs 420: if (IsW3Path (tempname) || doc == 0)
1.14 cvs 421: /* the name is complete */
422: strcpy (newName, tempname);
1.5 cvs 423: else
424: {
1.16 ! cvs 425: /* take into account the BASE element. */
! 426: length = MAX_LENGTH;
! 427: /* get the root element */
! 428: el = TtaGetMainRoot (doc);
! 429:
! 430: /* search the BASE element */
! 431: elType.ElSSchema = TtaGetDocumentSSchema (doc);
! 432: elType.ElTypeNum = HTML_EL_BASE;
! 433: el = TtaSearchTypedElement (elType, SearchInTree, el);
! 434: if (el)
1.14 cvs 435: {
1.16 ! cvs 436: /*
! 437: ** The document has a BASE element
! 438: ** Get the HREF attribute of the BASE Element
! 439: */
! 440: attrType.AttrSSchema = elType.ElSSchema;
! 441: attrType.AttrTypeNum = HTML_ATTR_HREF_;
! 442: attrHREF = TtaGetAttribute (el, attrType);
! 443: if (attrHREF)
1.14 cvs 444: {
1.16 ! cvs 445: /* Use the base path of the document */
! 446: TtaGiveTextAttributeValue (attrHREF, basename, &length);
! 447: /* base and orgName have to be separated by a DIR_SEP */
! 448: if (basename[strlen (basename) - 1] != DIR_SEP)
1.14 cvs 449: {
1.16 ! cvs 450: if (IsHTMLName (basename))
1.14 cvs 451: {
1.16 ! cvs 452: /* remove the document name from basename */
! 453: length = strlen (basename) - 1;
! 454: while (basename[length] != DIR_SEP)
! 455: basename[length--] = EOS;
1.14 cvs 456: }
1.16 ! cvs 457: else if (tempname[0] != DIR_SEP)
! 458: strcat (basename, DIR_STR);
1.14 cvs 459: }
460: }
461: else
462: basename[0] = EOS;
463: }
464: else
465: basename[0] = EOS;
466:
467: if (basename[0] == EOS)
468: {
469: /* there is no BASE element in that document. */
470: if (DocumentURLs[(int) doc])
471: {
472: basename_ptr = HTParse (DocumentURLs[(int) doc], "", PARSE_ALL);
473: basename_flag = TRUE;
474: }
475: else
476: {
477: basename_ptr = "";
478: basename_flag = FALSE;
479: }
480: }
481: else
482: {
483: basename_ptr = HTParse (basename, "", PARSE_ALL);
484: basename_flag = TRUE;
485: }
1.16 ! cvs 486:
! 487: if (tempname[0] == '/')
! 488: ptr = HTParse (tempname, basename_ptr, PARSE_ACCESS | PARSE_PUNCTUATION | PARSE_HOST);
! 489: else
! 490: ptr = HTParse (tempname, basename_ptr, PARSE_ALL);
1.14 cvs 491: if (basename_flag)
492: HT_FREE (basename_ptr);
493: if (ptr)
494: {
495: ptr = HTSimplify (&ptr);
496: strcpy (newName, ptr);
497: HT_FREE (ptr);
498: }
499: else
500: newName[0] = EOS;
1.5 cvs 501: }
502:
503: i = strlen (newName) - 1;
504: if (i > 0)
505: {
1.14 cvs 506: /* search now the document name */
507: ptr = strrchr (newName, DIR_SEP);
508: if (ptr)
509: ptr++;
510: if (ptr && *ptr != EOS)
511: strcpy (docName, ptr);
512: else
513: /* the docname was not comprised inside the URL, so let's */
514: /* assign a "noname.html" name :) */
515: strcpy (docName, "noname.html");
516:
517: /* remove DIR_SEP at the end of complete path */
1.5 cvs 518: if (newName[i] == DIR_SEP)
519: newName[i] = EOS;
520: }
1.3 cvs 521: }
522:
1.4 cvs 523: /*----------------------------------------------------------------------
1.9 cvs 524: IsSameHost
1.4 cvs 525: ----------------------------------------------------------------------*/
1.3 cvs 526: #ifdef __STDC__
527: boolean IsSameHost (char *url1, char *url2)
528: #else /* __STDC__ */
529: boolean IsSameHost (url1, url2)
530: char *path;
531: #endif /* __STDC__ */
532: {
1.5 cvs 533: char *basename_ptr1, *basename_ptr2;
534: boolean result;
1.3 cvs 535:
1.5 cvs 536: basename_ptr1 = HTParse (url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
537: basename_ptr2 = HTParse (url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
1.3 cvs 538:
1.5 cvs 539: if (strcmp (basename_ptr1, basename_ptr2))
1.8 cvs 540: result = FALSE;
1.5 cvs 541: else
1.8 cvs 542: result = TRUE;
1.3 cvs 543:
1.5 cvs 544: HT_FREE (basename_ptr1);
545: HT_FREE (basename_ptr2);
1.3 cvs 546:
1.5 cvs 547: return (result);
1.3 cvs 548: }
549:
550:
1.4 cvs 551: /*----------------------------------------------------------------------
1.9 cvs 552: AHTMakeRelativeURL
553: converts url into a relative url to base_url.
554: If succesful, returns the new URL, otherwise, it returns NULL.
555: The caller has to free the new URL.
1.4 cvs 556: ----------------------------------------------------------------------*/
1.3 cvs 557: #ifdef __STDC__
1.5 cvs 558: char *AHTMakeRelativeName (char *url, char *base_url)
1.3 cvs 559: #else /* __STDC__ */
1.5 cvs 560: char *AHTMakeRelativeName (url, base_url)
561: char url;
562: char base_url;
563:
1.3 cvs 564: #endif /* __STDC__ */
565: {
1.5 cvs 566: char *base_ptr, *url_ptr;
567: char *result;
568:
569: /* verify if we are in the same host */
1.3 cvs 570:
1.5 cvs 571: base_ptr = HTParse (base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
572: url_ptr = HTParse (url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
1.3 cvs 573:
1.5 cvs 574: if (!strcmp (base_ptr, url_ptr))
575: {
576: HT_FREE (base_ptr);
577: HT_FREE (url_ptr);
1.3 cvs 578:
1.5 cvs 579: /* Normalize the URLs */
1.3 cvs 580:
1.5 cvs 581: base_ptr = HTParse (base_url, "", PARSE_ALL);
582: url_ptr = HTParse (url, "", PARSE_ALL);
1.3 cvs 583:
1.5 cvs 584: /* Use libwww to make relative name */
1.3 cvs 585:
1.5 cvs 586: result = HTRelative (url_ptr, base_ptr);
587: HT_FREE (base_ptr);
588: HT_FREE (url_ptr);
589: }
590: else
591: result = (char *) NULL;
1.3 cvs 592:
1.5 cvs 593: return (result);
1.3 cvs 594: }
1.9 cvs 595:
596:
597:
Webmaster