Annotation of Amaya/amaya/AHTURLTools.c, revision 1.32
1.7 cvs 1: /*
2: *
3: * (c) COPYRIGHT MIT and INRIA, 1996.
4: * Please first read the full copyright statement in file COPYRIGHT.
5: *
6: */
1.9 cvs 7:
1.10 cvs 8: /*
9: * AHTURLTools.c: contains all the functions for testing, manipulating,
1.25 cvs 10: * and normalizing URLs. It also contains a local copy of the libWWW
11: * URL parsing functions.
1.10 cvs 12: *
13: * Authors: J. Kahan, I. Vatton
14: *
15: */
1.7 cvs 16:
1.15 cvs 17: #define THOT_EXPORT extern
1.3 cvs 18: #include "amaya.h"
19:
1.8 cvs 20: #include "init_f.h"
21: #include "AHTURLTools_f.h"
22:
1.24 cvs 23: #define MAX_PRINT_URL_LENGTH 50
1.29 cvs 24: typedef struct _HTURI {
25: char * access; /* Now known as "scheme" */
26: char * host;
27: char * absolute;
28: char * relative;
29: char * fragment;
30: } HTURI;
1.24 cvs 31:
1.28 cvs 32:
33: /*----------------------------------------------------------------------
34: ConvertToLowerCase
35: Converts a string to lowercase.
36: ----------------------------------------------------------------------*/
1.22 cvs 37: #ifdef __STDC__
1.28 cvs 38: static void ConvertToLowerCase (char *string)
39: #else /* __STDC__ */
40: static void ConvertToLowerCase (string)
41: char *string;
42:
43: #endif /* __STDC__ */
44: {
45: int i;
46:
47: if (!string)
48: return;
49:
50: for (i = 0; string[i] != EOS; i++)
51: string[i] = tolower (string[i]);
52: }
1.22 cvs 53:
1.8 cvs 54: /*----------------------------------------------------------------------
1.11 cvs 55: ExplodeURL
1.8 cvs 56: ----------------------------------------------------------------------*/
57: #ifdef __STDC__
58: void ExplodeURL (char *url, char **proto, char **host, char **dir, char **file)
59: #else
60: void ExplodeURL (url, proto, host, dir, file)
61: char *url;
62: char **proto;
63: char **host;
64: char **dir;
65: char **file;
66:
67: #endif
68: {
1.9 cvs 69: char *curr, *temp;
1.32 ! cvs 70: char my_dir_sep;
! 71:
! 72: if (url && strchr (url, '/'))
! 73: {
! 74: my_dir_sep = '/';
! 75: } else {
! 76: my_dir_sep = DIR_SEP;
! 77: }
! 78:
1.8 cvs 79:
80: if ((url == NULL) || (proto == NULL) || (host == NULL) ||
81: (dir == NULL) || (file == NULL))
82: return;
83:
84: /* initialize every pointer */
85: *proto = *host = *dir = *file = NULL;
86:
87: /* skip any leading space */
88: while ((*url == SPACE) || (*url == TAB))
89: url++;
1.9 cvs 90: curr = url;
91: if (*curr == 0)
1.8 cvs 92: goto finished;
93:
94: /* go to the end of the URL */
1.9 cvs 95: while ((*curr != 0) && (*curr != SPACE) && (*curr != '\b') &&
96: (*curr != '\r') && (*curr != EOL))
97: curr++;
1.8 cvs 98:
99: /* mark the end of the chain */
1.9 cvs 100: *curr = EOS;
101: curr--;
102: if (curr <= url)
1.8 cvs 103: goto finished;
104:
105: /* search the next DIR_SEP indicating the beginning of the file name */
106: do
1.11 cvs 107: curr--;
1.32 ! cvs 108: while ((curr >= url) && (*curr != my_dir_sep));
1.11 cvs 109:
1.9 cvs 110: if (curr < url)
1.8 cvs 111: goto finished;
1.9 cvs 112: *file = curr + 1;
1.8 cvs 113:
114: /* mark the end of the dir */
1.9 cvs 115: *curr = EOS;
116: curr--;
117: if (curr < url)
1.8 cvs 118: goto finished;
119:
1.29 cvs 120: /* search for the DIR_STR indicating the host name start */
1.32 ! cvs 121: while ((curr > url) && ((*curr != my_dir_sep) || (*(curr + 1) != my_dir_sep)))
1.9 cvs 122: curr--;
1.8 cvs 123:
124: /* if we found it, separate the host name from the directory */
1.32 ! cvs 125: if ((*curr == DIR_SEP) && (*(curr + 1) == my_dir_sep))
1.8 cvs 126: {
1.9 cvs 127: *host = temp = curr + 2;
1.32 ! cvs 128: while ((*temp != 0) && (*temp != my_dir_sep))
1.8 cvs 129: temp++;
1.32 ! cvs 130: if (*temp == my_dir_sep)
1.8 cvs 131: {
132: *temp = EOS;
133: *dir = temp + 1;
134: }
135: }
136: else
1.11 cvs 137: *dir = curr;
138:
1.9 cvs 139: if (curr <= url)
1.8 cvs 140: goto finished;
141:
142: /* mark the end of the proto */
1.9 cvs 143: *curr = EOS;
144: curr--;
145: if (curr < url)
1.8 cvs 146: goto finished;
147:
1.32 ! cvs 148: if (*curr == ':')
1.8 cvs 149: {
1.9 cvs 150: *curr = EOS;
151: curr--;
1.8 cvs 152: }
153: else
154: goto finished;
1.11 cvs 155:
1.9 cvs 156: if (curr < url)
1.8 cvs 157: goto finished;
1.9 cvs 158: while ((curr > url) && (isalpha (*curr)))
159: curr--;
160: *proto = curr;
1.8 cvs 161:
162: finished:;
163:
164: #ifdef AMAYA_DEBUG
165: fprintf (stderr, "ExplodeURL(%s)\n\t", url);
166: if (*proto)
167: fprintf (stderr, "proto : %s, ", *proto);
168: if (*host)
169: fprintf (stderr, "host : %s, ", *host);
170: if (*dir)
171: fprintf (stderr, "dir : %s, ", *dir);
172: if (*file)
173: fprintf (stderr, "file : %s ", *file);
174: fprintf (stderr, "\n");
175: #endif
176:
177: }
1.3 cvs 178:
1.4 cvs 179: /*----------------------------------------------------------------------
1.9 cvs 180: IsHTMLName
181: returns TRUE if path points to an HTML resource.
1.4 cvs 182: ----------------------------------------------------------------------*/
1.3 cvs 183: #ifdef __STDC__
184: boolean IsHTMLName (char *path)
185: #else /* __STDC__ */
186: boolean IsHTMLName (path)
187: char *path;
188: #endif /* __STDC__ */
189: {
1.5 cvs 190: char temppath[MAX_LENGTH];
191: char suffix[MAX_LENGTH];
192: char nsuffix[MAX_LENGTH];
193: int i;
194:
195: if (!path)
1.13 cvs 196: return (FALSE);
1.5 cvs 197:
198: strcpy (temppath, path);
199: ExtractSuffix (temppath, suffix);
200:
201: /* Normalize the suffix */
202: i = 0;
203: while (suffix[i] != EOS)
1.13 cvs 204: {
1.25 cvs 205: nsuffix[i] = tolower (suffix[i]);
1.13 cvs 206: i++;
207: }
1.5 cvs 208: nsuffix[i] = EOS;
209: if ((strcmp (nsuffix, "html")) &&
210: (strcmp (nsuffix, "htm")) &&
211: (strcmp (nsuffix, "shtml")))
1.13 cvs 212: return (FALSE);
1.22 cvs 213: else if (!strcmp (nsuffix, "gz"))
1.13 cvs 214: {
215: /* take in account compressed files */
216: ExtractSuffix (temppath, suffix);
217: /* Normalize the suffix */
218: i = 0;
219: while (suffix[i] != EOS)
220: {
1.25 cvs 221: nsuffix[i] = tolower (suffix[i]);
1.13 cvs 222: i++;
223: }
224: nsuffix[i] = EOS;
225: if ((strcmp (nsuffix, "html")) &&
226: (strcmp (nsuffix, "htm")) &&
227: (strcmp (nsuffix, "shtml")))
228: return (FALSE);
229: else
230: return (TRUE);
231: }
232: else
233: return (TRUE);
1.3 cvs 234: }
235:
1.4 cvs 236: /*----------------------------------------------------------------------
1.9 cvs 237: IsImageName
238: returns TRUE if path points to an image resource.
1.4 cvs 239: ----------------------------------------------------------------------*/
1.3 cvs 240: #ifdef __STDC__
241: boolean IsImageName (char *path)
242: #else /* __STDC__ */
243: boolean IsImageName (path)
244: char *path;
245: #endif /* __STDC__ */
246: {
1.5 cvs 247: char temppath[MAX_LENGTH];
248: char suffix[MAX_LENGTH];
249: char nsuffix[MAX_LENGTH];
250: int i;
251:
252: if (!path)
1.13 cvs 253: return (FALSE);
1.5 cvs 254:
255: strcpy (temppath, path);
256: ExtractSuffix (temppath, suffix);
257:
258: /* Normalize the suffix */
259: i = 0;
260: while (suffix[i] != EOS)
1.13 cvs 261: {
1.25 cvs 262: nsuffix[i] = tolower (suffix[i]);
1.13 cvs 263: i++;
264: }
1.5 cvs 265: nsuffix[i] = EOS;
266: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
267: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
268: (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
1.13 cvs 269: return (FALSE);
270: return (TRUE);
1.3 cvs 271: }
272:
1.4 cvs 273: /*----------------------------------------------------------------------
1.9 cvs 274: IsTextName
1.4 cvs 275: ----------------------------------------------------------------------*/
1.3 cvs 276: #ifdef __STDC__
277: boolean IsTextName (char *path)
278: #else /* __STDC__ */
279: boolean IsTextName (path)
280: char *path;
281:
282: #endif /* __STDC__ */
283: {
1.5 cvs 284: char temppath[MAX_LENGTH];
285: char suffix[MAX_LENGTH];
286: char nsuffix[MAX_LENGTH];
287: int i;
288:
289: if (!path)
1.13 cvs 290: return (FALSE);
1.5 cvs 291:
292: strcpy (temppath, path);
293: ExtractSuffix (temppath, suffix);
294:
295: /* Normalize the suffix */
296: i = 0;
297: while (suffix[i] != EOS)
298: {
1.25 cvs 299: nsuffix[i] = tolower (suffix[i]);
1.5 cvs 300: i++;
301: }
302: nsuffix[i] = EOS;
303:
304: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
305: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
306: (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
1.22 cvs 307: (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "tar")) &&
308: (strcmp (nsuffix, "xpg")) && (strcmp (nsuffix, "xpd")) &&
309: (strcmp (nsuffix, "ps")) && (strcmp (nsuffix, "au")))
1.13 cvs 310: return (TRUE);
1.22 cvs 311: else if (!strcmp (nsuffix, "gz"))
1.13 cvs 312: {
313: /* take in account compressed files */
314: ExtractSuffix (temppath, suffix);
315: /* Normalize the suffix */
316: i = 0;
317: while (suffix[i] != EOS)
318: {
1.25 cvs 319: nsuffix[i] = tolower (suffix[i]);
1.13 cvs 320: i++;
321: }
322: nsuffix[i] = EOS;
323: if ((!strcmp (nsuffix, "html")) ||
324: (!strcmp (nsuffix, "htm")) ||
325: (!strcmp (nsuffix, "shtml")))
326: return (TRUE);
327: else
328: return (FALSE);
329: }
330: else
331: return (FALSE);
1.3 cvs 332: }
333:
1.4 cvs 334: /*----------------------------------------------------------------------
1.9 cvs 335: IsHTTPPath
336: returns TRUE if path is in fact an http URL.
1.4 cvs 337: ----------------------------------------------------------------------*/
1.3 cvs 338: #ifdef __STDC__
339: boolean IsHTTPPath (char *path)
340: #else /* __STDC__ */
341: boolean IsHTTPPath (path)
342: char *path;
343: #endif /* __STDC__ */
344: {
1.5 cvs 345: if (!path)
346: return FALSE;
1.3 cvs 347:
1.5 cvs 348: if (strncmp (path, "http:", 5) != 0)
349: return FALSE;
350: return TRUE;
1.3 cvs 351: }
352:
1.4 cvs 353: /*----------------------------------------------------------------------
1.9 cvs 354: IsWithParameters
355: returns TRUE if url has a concatenated query string.
1.4 cvs 356: ----------------------------------------------------------------------*/
1.3 cvs 357: #ifdef __STDC__
1.9 cvs 358: boolean IsWithParameters (char *url)
1.3 cvs 359: #else /* __STDC__ */
1.9 cvs 360: boolean IsWithParameters (url)
361: char *url;
1.3 cvs 362: #endif /* __STDC__ */
363: {
1.5 cvs 364: int i;
1.3 cvs 365:
1.9 cvs 366: if ((!url) || (url[0] == EOS))
1.5 cvs 367: return FALSE;
1.3 cvs 368:
1.9 cvs 369: i = strlen (url) - 1;
370: while (i > 0 && url[i--] != '?')
1.5 cvs 371: if (i < 0)
372: return FALSE;
1.3 cvs 373:
1.5 cvs 374: /* There is a parameter */
375: return TRUE;
1.3 cvs 376: }
377:
1.4 cvs 378: /*----------------------------------------------------------------------
1.9 cvs 379: IsW3Path
380: returns TRUE if path is in fact a URL.
1.4 cvs 381: ----------------------------------------------------------------------*/
1.3 cvs 382: #ifdef __STDC__
383: boolean IsW3Path (char *path)
384: #else /* __STDC__ */
385: boolean IsW3Path (path)
386: char *path;
387: #endif /* __STDC__ */
388: {
1.5 cvs 389: if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
390: (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
391: (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
392: (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
393: return FALSE;
394: return TRUE;
1.3 cvs 395: }
396:
1.4 cvs 397: /*----------------------------------------------------------------------
1.9 cvs 398: IsValidProtocol
399: returns true if the url protocol is supported by Amaya.
1.4 cvs 400: ----------------------------------------------------------------------*/
1.3 cvs 401: #ifdef __STDC__
1.9 cvs 402: boolean IsValidProtocol (char *url)
1.3 cvs 403: #else /* __STDC__ */
1.9 cvs 404: boolean IsValidProtocol (url)
405: char *url;
1.3 cvs 406: #endif /* __STDC__ */
407: {
1.26 cvs 408: if (!strncmp (url, "http:", 5))
1.22 cvs 409: /* experimental */
1.26 cvs 410: /*** || !strncmp (url, "ftp:", 4)) ***/
1.24 cvs 411: /*** || !strncmp (path, "news:", 5)***/
1.8 cvs 412: return (TRUE);
1.5 cvs 413: else
1.8 cvs 414: return (FALSE);
1.3 cvs 415: }
416:
1.31 cvs 417:
418: /*----------------------------------------------------------------------
419: GetBaseURL
420: normalizes orgName according to a base associated with doc, and
421: following the standard URL format rules.
422: The function returns the base used to solve relative URL and SRC:
423: - the base of the document,
424: - or the document path (without document name).
425: ----------------------------------------------------------------------*/
426: #ifdef __STDC__
427: char *GetBaseURL (Document doc)
428: #else /* __STDC__ */
429: char *GetBaseURL (doc)
430: Document doc;
431: #endif /* __STDC__ */
432: {
433: Element el;
434: ElementType elType;
435: AttributeType attrType;
436: Attribute attr;
1.32 ! cvs 437: char my_dir_sep;
! 438: char *my_dir_str;
1.31 cvs 439: char *ptr, *basename;
440: int length;
441:
442: basename = TtaGetMemory (MAX_LENGTH);
443: strcpy (basename, DocumentURLs[doc]);
444: length = MAX_LENGTH -1;
445: /* get the root element */
446: el = TtaGetMainRoot (doc);
447: /* search the BASE element */
448: elType.ElSSchema = TtaGetDocumentSSchema (doc);
449: elType.ElTypeNum = HTML_EL_BASE;
450: el = TtaSearchTypedElement (elType, SearchInTree, el);
451: if (el)
452: {
453: /* The document has a BASE element -> Get the HREF attribute */
454: attrType.AttrSSchema = elType.ElSSchema;
455: attrType.AttrTypeNum = HTML_ATTR_HREF_;
456: attr = TtaGetAttribute (el, attrType);
457: if (attr)
458: {
459: /* Use the base path of the document */
460: TtaGiveTextAttributeValue (attr, basename, &length);
461: /* base and orgName have to be separated by a DIR_SEP */
462: length--;
1.32 ! cvs 463:
! 464: if (basename [0] != EOS && strchr (basename, '/'))
! 465: {
! 466: my_dir_str = "/";
! 467: my_dir_sep = '/';
! 468: }
! 469: else
! 470: {
! 471: my_dir_str = DIR_STR;
! 472: my_dir_sep = DIR_SEP;
! 473: }
! 474:
! 475:
! 476: if (basename[0] != EOS && basename[length] != my_dir_sep)
1.31 cvs 477: /* verify if the base has the form "protocol://server:port" */
478: {
479: ptr = AmayaParseUrl (basename, "", AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST |
480: AMAYA_PARSE_PUNCTUATION);
481: if (ptr && !strcmp (ptr, basename))
482: {
483: /* it has this form, we complete it by adding a DIR_STR */
1.32 ! cvs 484: strcat (basename, my_dir_str);
1.31 cvs 485: length++;
486: }
487: if (ptr)
488: TtaFreeMemory (ptr);
489: }
490: }
1.32 ! cvs 491: }
! 492: else
! 493: {
! 494: if (basename [0] != EOS && strchr (basename, '/'))
! 495: {
! 496: my_dir_str = "/";
! 497: my_dir_sep = '/';
! 498: }
! 499: else
! 500: {
! 501: my_dir_str = DIR_STR;
! 502: my_dir_sep = DIR_SEP;
! 503: }
! 504:
! 505: }
1.31 cvs 506:
507: /* Remove anything after the last DIR_SEP char. If no such char is found,
508: * then search for the first ":" char, hoping that what's before that is a
509: * protocol. If found, end the string there. If neither char is found,
510: * then discard the whole base element.
511: */
512: length = strlen (basename) - 1;
513: /* search for the last DIR_SEP char */
1.32 ! cvs 514: while (length >= 0 && basename[length] != my_dir_sep)
1.31 cvs 515: length--;
516: if (length >= 0)
517: /* found the last DIR_SEP char, end the string there */
518: basename[length + 1] = EOS;
519: else
520: /* search for the first PATH_STR char */
521: {
1.32 ! cvs 522: for (length = 0; basename[length] != ':' &&
1.31 cvs 523: basename[length] != EOS; length ++);
1.32 ! cvs 524: if (basename[length] == ':')
1.31 cvs 525: /* found, so end the string there */
526: basename[length + 1] = EOS;
527: else
528: /* not found, discard the base */
529: basename[0] = EOS;
530: }
531: return (basename);
532: }
533:
534:
1.4 cvs 535: /*----------------------------------------------------------------------
1.9 cvs 536: NormalizeURL
537: normalizes orgName according to a base associated with doc, and
538: following the standard URL format rules.
539: The function returns the new complete and normalized URL
1.12 cvs 540: or file name path (newName) and the name of the document (docName).
1.9 cvs 541: N.B. If the function can't find out what's the docName, it assigns
542: the name "noname.html".
1.4 cvs 543: ----------------------------------------------------------------------*/
1.3 cvs 544: #ifdef __STDC__
545: void NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
546: #else /* __STDC__ */
547: void NormalizeURL (orgName, doc, newName, docName)
548: char *orgName;
549: Document doc;
550: char *newName;
551: char *docName;
552: #endif /* __STDC__ */
553: {
1.31 cvs 554: char *basename;
1.18 cvs 555: char tempOrgName[MAX_LENGTH];
1.5 cvs 556: char *ptr;
557: int length;
1.32 ! cvs 558: char my_dir_sep;
! 559: char *my_dir_str;
1.5 cvs 560:
561: if (!newName || !docName)
562: return;
1.18 cvs 563:
1.32 ! cvs 564: if (doc != 0)
! 565: basename = GetBaseURL (doc);
! 566: else
! 567: basename = (char *) NULL;
! 568:
! 569: if (strchr (orgName, '/') || (basename && strchr (basename, '/')))
! 570: {
! 571: my_dir_str = "/";
! 572: my_dir_sep = '/';
! 573: } else {
! 574: my_dir_str = DIR_STR;
! 575: my_dir_sep = DIR_SEP;
! 576: }
! 577:
! 578:
1.18 cvs 579: /*
1.31 cvs 580: * Clean orgName
581: * Make sure we have a complete orgName, without any leading or trailing
582: * white spaces, or trailinbg new lines
583: */
1.18 cvs 584:
1.5 cvs 585: ptr = orgName;
1.18 cvs 586: /* skip leading white space and new line characters */
1.19 cvs 587: while ((*ptr == ' ' || *ptr == EOL) && *ptr++ != EOS);
1.18 cvs 588: strcpy (tempOrgName, ptr);
589: /* clean trailing white space */
590: ptr = strchr (tempOrgName, ' ');
591: if (ptr)
592: *ptr = EOS;
593: /* clean trailing new lines */
1.19 cvs 594: ptr = strchr (tempOrgName, EOL);
1.5 cvs 595: if (ptr)
596: *ptr = EOS;
597:
1.18 cvs 598: /*
1.31 cvs 599: * Make orgName a complete URL
600: * If the URL does not include a protocol, then try to calculate
601: * one using the doc's base element (if it exists),
602: */
1.21 cvs 603: if (tempOrgName[0] == EOS)
604: {
605: newName[0] = EOS;
606: return;
607: }
608: else if (IsW3Path (tempOrgName))
609: {
610: /* the name is complete, go to the Sixth Step */
611: strcpy (newName, tempOrgName);
612: /* verify if the URL has the form "protocol://server:port" */
1.25 cvs 613: ptr = AmayaParseUrl (newName, "", AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST |
614: AMAYA_PARSE_PUNCTUATION);
1.21 cvs 615: if (ptr && !strcmp (ptr, newName))
616: {
1.29 cvs 617: /* it has this form, we complete it by adding a DIR_STR */
1.32 ! cvs 618: strcat (newName, my_dir_str);
1.21 cvs 619: }
620: if (ptr)
1.25 cvs 621: TtaFreeMemory (ptr);
1.21 cvs 622: }
623: else if ( doc == 0)
1.19 cvs 624: /* the name is complete, go to the Sixth Step */
1.18 cvs 625: strcpy (newName, tempOrgName);
1.5 cvs 626: else
627: {
1.22 cvs 628:
1.31 cvs 629: /* Calculate the absolute URL, using the base or document URL */
1.32 ! cvs 630:
1.25 cvs 631: ptr = AmayaParseUrl (tempOrgName, basename, AMAYA_PARSE_ALL);
1.31 cvs 632: TtaFreeMemory (basename);
1.14 cvs 633: if (ptr)
634: {
1.29 cvs 635: SimplifyUrl (&ptr);
1.14 cvs 636: strcpy (newName, ptr);
1.25 cvs 637: TtaFreeMemory (ptr);
1.14 cvs 638: }
639: else
1.18 cvs 640: newName[0] = EOS;
1.5 cvs 641: }
642:
1.18 cvs 643: /*
1.31 cvs 644: * Prepare the docname that will refer to this ressource in the
645: * .amaya directory. If the new URL finishes on DIR_SEP, then use
646: * noname.html as a default ressource name
1.18 cvs 647: */
1.19 cvs 648: if (newName[0] != EOS)
1.5 cvs 649: {
1.19 cvs 650: length = strlen (newName) - 1;
1.32 ! cvs 651: if (newName[length] == my_dir_sep)
1.18 cvs 652: {
653: /* docname was not comprised inside the URL, so let's */
654: /* assign the default ressource name */
655: strcpy (docName, "noname.html");
656: /* remove DIR_SEP at the end of complete path */
1.23 cvs 657: /* newName[length] = EOS; */
1.18 cvs 658: }
1.14 cvs 659: else
1.18 cvs 660: {
661: /* docname is comprised inside the URL */
1.32 ! cvs 662: while (length >= 0 && newName[length] != my_dir_sep)
1.18 cvs 663: length--;
664: if (length < 0)
665: strcpy (docName, newName);
666: else
667: strcpy (docName, &newName[length+1]);
668: }
1.19 cvs 669:
1.5 cvs 670: }
1.18 cvs 671: else
672: docName[0] = EOS;
673: }
1.3 cvs 674:
1.4 cvs 675: /*----------------------------------------------------------------------
1.9 cvs 676: IsSameHost
1.4 cvs 677: ----------------------------------------------------------------------*/
1.3 cvs 678: #ifdef __STDC__
679: boolean IsSameHost (char *url1, char *url2)
680: #else /* __STDC__ */
681: boolean IsSameHost (url1, url2)
682: char *path;
683: #endif /* __STDC__ */
684: {
1.5 cvs 685: char *basename_ptr1, *basename_ptr2;
686: boolean result;
1.3 cvs 687:
1.25 cvs 688: basename_ptr1 = AmayaParseUrl (url1, "", AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION);
689: basename_ptr2 = AmayaParseUrl (url2, "", AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION);
1.3 cvs 690:
1.5 cvs 691: if (strcmp (basename_ptr1, basename_ptr2))
1.8 cvs 692: result = FALSE;
1.5 cvs 693: else
1.8 cvs 694: result = TRUE;
1.3 cvs 695:
1.25 cvs 696: TtaFreeMemory (basename_ptr1);
697: TtaFreeMemory (basename_ptr2);
1.5 cvs 698: return (result);
1.3 cvs 699: }
700:
701:
1.4 cvs 702: /*----------------------------------------------------------------------
1.22 cvs 703: HasKnownFileSuffix
704: returns TRUE if path points to a file ending with a suffix.
705: ----------------------------------------------------------------------*/
706: #ifdef __STDC__
707: boolean HasKnownFileSuffix (char *path)
708: #else /* __STDC__ */
709: boolean HasKnownFileSuffix (path)
710: char *path;
711: #endif /* __STDC__ */
712: {
1.29 cvs 713: char *root;
714: char temppath[MAX_LENGTH];
715: char suffix[MAX_LENGTH];
1.22 cvs 716:
1.24 cvs 717: if (!path || path[0] == EOS || path[strlen(path)] == DIR_SEP)
1.22 cvs 718: return (FALSE);
719:
1.29 cvs 720: root = AmayaParseUrl(path, "", AMAYA_PARSE_PATH | AMAYA_PARSE_PUNCTUATION);
1.22 cvs 721:
722: if (root)
723: {
724: strcpy (temppath, root);
1.25 cvs 725: TtaFreeMemory (root);
1.22 cvs 726: /* Get the suffix */
727: ExtractSuffix (temppath, suffix);
728:
729: if( suffix[0] == EOS)
730: /* no suffix */
731: return (FALSE);
732:
733: /* Normalize the suffix */
734: ConvertToLowerCase (suffix);
735:
1.23 cvs 736: if (!strcmp (suffix, "gz"))
1.22 cvs 737: /* skip the compressed suffix */
738: {
739: ExtractSuffix (temppath, suffix);
740: if(suffix[0] == EOS)
741: /* no suffix */
742: return (FALSE);
743: /* Normalize the suffix */
744: ConvertToLowerCase (suffix);
745: }
746:
747: if ((strcmp (suffix, "gif")) && (strcmp (suffix, "xbm")) &&
748: (strcmp (suffix, "xpm")) && (strcmp (suffix, "jpg")) &&
749: (strcmp (suffix, "pdf")) && (strcmp (suffix, "png")) &&
750: (strcmp (suffix, "tgz")) && (strcmp (suffix, "xpg")) &&
751: (strcmp (suffix, "xpd")) && (strcmp (suffix, "ps")) &&
752: (strcmp (suffix, "au")) && (strcmp (suffix, "html")) &&
753: (strcmp (suffix, "htm")) && (strcmp (suffix, "shtml")) &&
754: (strcmp (suffix, "txt")) && (strcmp (suffix, "css")) &&
755: (strcmp (suffix, "eps")))
756: return (FALSE);
757: else
758: return (TRUE);
759: }
760: else
761: return (FALSE);
762: }
763:
764:
765: /*----------------------------------------------------------------------
1.24 cvs 766: ChopURL
767: Gives back a URL no longer than MAX_PRINT_URL_LENGTH chars (outputURL).
768: If inputURL is bigger than that size, outputURL receives
769: MAX_PRINT_URL_LENGTH / 2 chars from the beginning of inputURL, "...",
770: and MAX_PRINT_URL_LENGTH / 2 chars from the end of inputURL.
771: If inputURL is not longer than MAX_PRINT_URL_LENGTH chars, it gets
772: copied into outputURL.
773: N.B.: outputURL must point to a memory block of MAX_PRINT_URL_LENGTH
774: chars.
775: ----------------------------------------------------------------------*/
776: #ifdef __STDC__
777: void ChopURL (char *outputURL, char *inputURL)
778: #else
779: void ChopURL (outputURL, inputURL)
780: char *outputURL;
781: char *inputURL;
782: #endif
1.22 cvs 783:
1.24 cvs 784: {
785: int len;
1.9 cvs 786:
1.24 cvs 787: len = strlen (inputURL);
788: if (len <= MAX_PRINT_URL_LENGTH)
1.29 cvs 789: strcpy (outputURL, inputURL);
1.24 cvs 790: else
791: /* make a truncated urlName on the status window */
792: {
793: strncpy (outputURL, inputURL, MAX_PRINT_URL_LENGTH / 2);
794: outputURL [MAX_PRINT_URL_LENGTH / 2] = EOS;
795: strcat (outputURL, "...");
796: strcat (outputURL, &(inputURL[len - MAX_PRINT_URL_LENGTH / 2 ]));
797: }
1.25 cvs 798: }
799:
800:
801: /*----------------------------------------------------------------------
802: scan
803: Scan a filename for its consituents
804: -----------------------------------
805:
806: On entry,
807: name points to a document name which may be incomplete.
808: On exit,
809: absolute or relative may be nonzero (but not both).
810: host, fragment and access may be nonzero if they were specified.
811: Any which are nonzero point to zero terminated strings.
812: ----------------------------------------------------------------------*/
813: #ifdef __STDC__
814: static void scan (char * name, HTURI * parts)
815: #else /* __STDC__ */
816: static void scan (name, parts)
817: char *name;
818: HTURI *parts;
819:
820: #endif /* __STDC__ */
821: {
1.28 cvs 822: char * p;
823: char * after_access = name;
1.25 cvs 824:
1.32 ! cvs 825: char my_dir_sep;
! 826: char my_path_sep;
! 827:
! 828: if (name && strchr (name, '/')) {
! 829: my_dir_sep = '/';
! 830: my_path_sep = ':';
! 831: } else {
! 832: my_dir_sep = DIR_SEP;
! 833: my_path_sep = ':';
! 834: }
! 835:
! 836:
1.28 cvs 837: memset(parts, '\0', sizeof(HTURI));
838: /* Look for fragment identifier */
839: if ((p = strrchr(name, '#')) != NULL)
840: {
841: *p++ = '\0';
842: parts->fragment = p;
1.25 cvs 843: }
844:
1.28 cvs 845: for (p=name; *p; p++)
846: {
1.32 ! cvs 847: if (*p==my_dir_sep || *p=='#' || *p=='?')
1.28 cvs 848: break;
1.32 ! cvs 849: if (*p==':')
1.28 cvs 850: {
851: *p = 0;
852: parts->access = after_access; /* Scheme has been specified */
853:
854: /* The combination of gcc, the "-O" flag and the HP platform is
855: unhealthy. The following three lines is a quick & dirty fix, but is
856: not recommended. Rather, turn off "-O". */
857:
858: /* after_access = p;*/
859: /* while (*after_access == 0)*/
860: /* after_access++;*/
861: after_access = p+1;
862: if (0==strcasecmp("URL", parts->access))
863: /* Ignore IETF's URL: pre-prefix */
864: parts->access = NULL;
865: else
1.25 cvs 866: break;
867: }
868: }
869:
870: p = after_access;
1.32 ! cvs 871: if (*p==my_dir_sep)
1.28 cvs 872: {
1.32 ! cvs 873: if (p[1]==my_dir_sep)
1.28 cvs 874: {
1.25 cvs 875: parts->host = p+2; /* host has been specified */
1.28 cvs 876: *p = 0; /* Terminate access */
877: /* look for end of host name if any */
1.32 ! cvs 878: p = strchr(parts->host,my_dir_sep);
1.28 cvs 879: if (p)
880: {
1.25 cvs 881: *p=0; /* Terminate host */
882: parts->absolute = p+1; /* Root has been found */
1.28 cvs 883: }
884: }
885: else
886: /* Root found but no host */
887: parts->absolute = p+1;
888: }
889: else
890: {
1.25 cvs 891: parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
1.28 cvs 892: }
1.25 cvs 893: }
894:
895:
896: /*----------------------------------------------------------------------
1.28 cvs 897: AmayaParseUrl: parse a Name relative to another name
898:
899: This returns those parts of a name which are given (and requested)
900: substituting bits from the related name where necessary.
1.25 cvs 901:
1.28 cvs 902: On entry,
1.25 cvs 903: aName A filename given
904: relatedName A name relative to which aName is to be parsed. Give
905: it an empty string if aName is absolute.
906: wanted A mask for the bits which are wanted.
907:
1.28 cvs 908: On exit,
1.25 cvs 909: returns A pointer to a malloc'd string which MUST BE FREED
910: ----------------------------------------------------------------------*/
911: #ifdef __STDC__
1.28 cvs 912: char *AmayaParseUrl (char *aName, char *relatedName, int wanted)
1.25 cvs 913: #else /* __STDC__ */
1.28 cvs 914: char *AmayaParseUrl (aName, relatedName, wanted)
915: char *aName;
916: char *relatedName;
917: int wanted;
1.25 cvs 918:
919: #endif /* __STDC__ */
920: {
1.29 cvs 921: char *return_value;
922: char result[MAX_LENGTH];
923: char name[MAX_LENGTH];
924: char rel[MAX_LENGTH];
925: char *p, *access;
926: HTURI given, related;
927: int len;
1.32 ! cvs 928: char my_dir_sep;
! 929: char *my_dir_str;
! 930:
! 931: if (strchr (aName, '/') || strchr (relatedName, '/') )
! 932: {
! 933: my_dir_str = "/";
! 934: my_dir_sep = '/';
! 935: }
! 936: else
! 937: {
! 938: my_dir_str = DIR_STR;
! 939: my_dir_sep = DIR_SEP;
! 940: }
! 941:
! 942:
1.29 cvs 943: /* Make working copies of input strings to cut up: */
944: return_value = NULL;
945: result[0] = 0; /* Clear string */
946: strcpy (name, aName);
947: if (relatedName != NULL)
948: strcpy (rel, relatedName);
949: else
950: relatedName[0] = EOS;
951:
952: scan (name, &given);
953: scan (rel, &related);
954: access = given.access ? given.access : related.access;
955: if (wanted & AMAYA_PARSE_ACCESS)
956: if (access)
957: {
958: strcat (result, access);
959: if(wanted & AMAYA_PARSE_PUNCTUATION)
1.32 ! cvs 960: strcat (result, ":");
1.29 cvs 961: }
962:
963: if (given.access && related.access)
964: /* If different, inherit nothing. */
965: if (strcmp (given.access, related.access) != 0)
966: {
967: related.host = 0;
968: related.absolute = 0;
969: related.relative = 0;
970: related.fragment = 0;
971: }
972:
973: if (wanted & AMAYA_PARSE_HOST)
974: if(given.host || related.host)
975: {
976: if(wanted & AMAYA_PARSE_PUNCTUATION)
977: strcat (result, "//");
978: strcat (result, given.host ? given.host : related.host);
979: }
980:
981: if (given.host && related.host)
982: /* If different hosts, inherit no path. */
983: if (strcmp(given.host, related.host) != 0)
984: {
985: related.absolute = 0;
986: related.relative = 0;
987: related.fragment = 0;
988: }
989:
990: if (wanted & AMAYA_PARSE_PATH)
991: {
992: if (given.absolute)
993: {
994: /* All is given */
995: if (wanted & AMAYA_PARSE_PUNCTUATION)
1.32 ! cvs 996: strcat (result, my_dir_str);
1.29 cvs 997: strcat (result, given.absolute);
1.25 cvs 998: }
1.29 cvs 999: else if (related.absolute)
1000: {
1001: /* Adopt path not name */
1.32 ! cvs 1002: strcat (result, my_dir_str);
1.29 cvs 1003: strcat (result, related.absolute);
1004: if (given.relative)
1005: {
1006: /* Search part? */
1007: p = strchr (result, '?');
1008: if (!p)
1009: p=result+strlen(result)-1;
1.32 ! cvs 1010: for (; *p!=my_dir_sep; p--); /* last / */
1.29 cvs 1011: /* Remove filename */
1012: p[1]=0;
1013: /* Add given one */
1014: strcat (result, given.relative);
1015: /*SimplifyUrl (&result);*/
1.25 cvs 1016: }
1017: }
1.29 cvs 1018: else if (given.relative)
1019: /* what we've got */
1020: strcat (result, given.relative);
1021: else if (related.relative)
1022: strcat (result, related.relative);
1023: else
1024: /* No inheritance */
1.32 ! cvs 1025: strcat (result, my_dir_str);
1.25 cvs 1026: }
1.29 cvs 1027:
1028: if (wanted & AMAYA_PARSE_ANCHOR)
1029: if (given.fragment || related.fragment)
1030: {
1031: if (given.absolute && given.fragment)
1032: {
1033: /*Fixes for relURLs...*/
1034: if (wanted & AMAYA_PARSE_PUNCTUATION)
1035: strcat (result, "#");
1036: strcat (result, given.fragment);
1037: }
1038: else if (!(given.absolute) && !(given.fragment))
1039: strcat (result, "");
1040: else
1041: {
1042: if (wanted & AMAYA_PARSE_PUNCTUATION)
1043: strcat (result, "#");
1044: strcat (result, given.fragment ? given.fragment : related.fragment);
1045: }
1046: }
1047: len = strlen (result);
1048: if ((return_value = (char *) TtaGetMemory (len + 1)) != NULL)
1049: strcpy (return_value, result);
1050: return (return_value); /* exactly the right length */
1.25 cvs 1051: }
1052:
1053: /*----------------------------------------------------------------------
1054: HTCanon
1055: Canonicalizes the URL in the following manner starting from the host
1056: pointer:
1057:
1058: 1) The host name is converted to lowercase
1059: 2) Chop off port if `:80' (http), `:70' (gopher), or `:21' (ftp)
1060:
1061: Return: OK The position of the current path part of the URL
1062: which might be the old one or a new one.
1063:
1064: ----------------------------------------------------------------------*/
1065: #ifdef __STDC__
1.28 cvs 1066: static char *HTCanon (char ** filename, char * host)
1.25 cvs 1067: #else /* __STDC__ */
1.28 cvs 1068: static char *HTCanon (filename, host)
1069: char **filename;
1070: char *host;
1.25 cvs 1071: #endif /* __STDC__ */
1072: {
1073: char *newname = NULL;
1074: char *port;
1075: char *strptr;
1076: char *path;
1077: char *access = host-3;
1078:
1.32 ! cvs 1079: char my_dir_sep;
! 1080: char *my_dir_str;
! 1081:
! 1082:
! 1083: if (*filename && strchr (*filename, '/')) {
! 1084: my_dir_str = "/";
! 1085: my_dir_sep = '/';
! 1086: }
! 1087: else {
! 1088: my_dir_str = DIR_STR;
! 1089: my_dir_sep = DIR_SEP;
! 1090: }
! 1091:
! 1092: while (access>*filename && *(access-1)!= my_dir_sep) /* Find access method */
1.25 cvs 1093: access--;
1.32 ! cvs 1094: if ((path = strchr(host, my_dir_sep)) == NULL) /* Find path */
1.25 cvs 1095: path = host + strlen(host);
1096: if ((strptr = strchr(host, '@')) != NULL && strptr<path) /* UserId */
1097: host = strptr;
1.32 ! cvs 1098: if ((port = strchr(host, ':')) != NULL && port>path) /* Port number */
1.25 cvs 1099: port = NULL;
1100:
1101: strptr = host; /* Convert to lower-case */
1102: while (strptr<path) {
1103: *strptr = tolower(*strptr);
1104: strptr++;
1105: }
1106:
1107: /* Does the URL contain a full domain name? This also works for a
1108: numerical host name. The domain name is already made lower-case
1109: and without a trailing dot. */
1110: {
1111: char *dot = port ? port : path;
1112: if (dot > *filename && *--dot=='.') {
1113: char *orig=dot, *dest=dot+1;
1114: while((*orig++ = *dest++));
1115: if (port) port--;
1116: path--;
1117: }
1118: }
1119: /* Chop off port if `:', `:80' (http), `:70' (gopher), or `:21' (ftp) */
1120: if (port) {
1.32 ! cvs 1121: if (!*(port+1) || *(port+1)==my_dir_sep) {
1.25 cvs 1122: if (!newname) {
1123: char *orig=port, *dest=port+1;
1124: while((*orig++ = *dest++));
1125: }
1126: } else if ((!strncmp(access, "http", 4) &&
1.32 ! cvs 1127: (*(port+1)=='8'&&*(port+2)=='0'&&(*(port+3)==my_dir_sep||!*(port+3)))) ||
1.25 cvs 1128: (!strncmp(access, "gopher", 6) &&
1.32 ! cvs 1129: (*(port+1)=='7'&&*(port+2)=='0'&&(*(port+3)==my_dir_sep||!*(port+3)))) ||
1.25 cvs 1130: (!strncmp(access, "ftp", 3) &&
1.32 ! cvs 1131: (*(port+1)=='2'&&*(port+2)=='1'&&(*(port+3)==my_dir_sep||!*(port+3))))) {
1.25 cvs 1132: if (!newname) {
1133: char *orig=port, *dest=port+3;
1134: while((*orig++ = *dest++));
1.28 cvs 1135: /* Update path position, Henry Minsky */
1136: path -= 3;
1.25 cvs 1137: }
1138: } else if (newname)
1139: strncat(newname, port, (int) (path-port));
1140: }
1141:
1142: if (newname) {
1143: char *newpath = newname+strlen(newname);
1144: strcat(newname, path);
1145: path = newpath;
1.28 cvs 1146: /* Free old copy */
1147: TtaFreeMemory(*filename);
1.25 cvs 1148: *filename = newname;
1149: }
1150: return path;
1151: }
1152:
1153:
1154: /*----------------------------------------------------------------------
1.29 cvs 1155: SimplifyUrl: simplify a URI
1.32 ! cvs 1156: A URI is allowed to contain the sequence xxx/../ which may be
! 1157: replaced by "" , and the sequence "/./" which may be replaced by DIR_STR.
1.28 cvs 1158: Simplification helps us recognize duplicate URIs.
1.25 cvs 1159:
1.28 cvs 1160: Thus, /etc/junk/../fred becomes /etc/fred
1161: /etc/junk/./fred becomes /etc/junk/fred
1.25 cvs 1162:
1.28 cvs 1163: but we should NOT change
1164: http://fred.xxx.edu/../..
1.25 cvs 1165:
1166: or ../../albert.html
1167:
1.28 cvs 1168: In order to avoid empty URLs the following URLs become:
1.25 cvs 1169:
1170: /fred/.. becomes /fred/..
1171: /fred/././.. becomes /fred/..
1172: /fred/.././junk/.././ becomes /fred/..
1173:
1.28 cvs 1174: If more than one set of `://' is found (several proxies in cascade) then
1175: only the part after the last `://' is simplified.
1.25 cvs 1176:
1.28 cvs 1177: Returns: A string which might be the old one or a new one.
1.25 cvs 1178: ----------------------------------------------------------------------*/
1179: #ifdef __STDC__
1.29 cvs 1180: void SimplifyUrl (char ** url)
1.25 cvs 1181: #else /* __STDC__ */
1.29 cvs 1182: void SimplifyUrl (url)
1.28 cvs 1183: char **url;
1.25 cvs 1184: #endif /* __STDC__ */
1185: {
1.28 cvs 1186: char *path, *p;
1187: char *newptr, *access;
1188: char *orig, *dest, *end;
1189:
1.32 ! cvs 1190: char my_dir_sep;
! 1191: char *my_dir_str;
! 1192:
! 1193:
1.28 cvs 1194: if (!url || !*url)
1195: return;
1196:
1.32 ! cvs 1197: if (strchr (*url, '/'))
! 1198: {
! 1199: my_dir_str = "/";
! 1200: my_dir_sep = '/';
! 1201: }
! 1202: else
! 1203: {
! 1204: my_dir_str = DIR_STR;
! 1205: my_dir_sep = DIR_SEP;
! 1206: }
! 1207:
! 1208:
1.28 cvs 1209: /* Find any scheme name */
1210: if ((path = strstr(*url, "://")) != NULL)
1211: { /* Find host name */
1212: access = *url;
1213: while (access<path && (*access=tolower(*access)))
1214: access++;
1215: path += 3;
1216: while ((newptr = strstr(path, "://")) != NULL)
1217: /* For proxies */
1218: path = newptr+3;
1219: /* We have a host name */
1220: path = HTCanon(url, path);
1.25 cvs 1221: }
1.28 cvs 1222: else if ((path = strstr(*url, ":/")) != NULL)
1223: path += 2;
1224: else
1225: path = *url;
1.25 cvs 1226:
1.32 ! cvs 1227: if (*path == my_dir_sep && *(path+1)==my_dir_sep)
1.28 cvs 1228: /* Some URLs start //<foo> */
1229: path += 1;
1230: else if (!strncmp(path, "news:", 5))
1231: {
1232: newptr = strchr(path+5, '@');
1233: if (!newptr)
1234: newptr = path + 5;
1235: while (*newptr)
1236: {
1237: /* Make group or host lower case */
1238: *newptr = tolower (*newptr);
1239: newptr++;
1.25 cvs 1240: }
1.28 cvs 1241: /* Doesn't need to do any more */
1242: return;
1.25 cvs 1243: }
1.28 cvs 1244:
1245: if ((p = path))
1246: {
1247: if (!((end = strchr (path, ';')) || (end = strchr (path, '?')) ||
1248: (end = strchr (path, '#'))))
1249: end = path + strlen (path);
1250:
1251: /* Parse string second time to simplify */
1252: p = path;
1253: while (p < end)
1254: {
1.32 ! cvs 1255: if (*p==my_dir_sep)
1.28 cvs 1256: {
1.32 ! cvs 1257: if (p > *url && *(p+1) == '.' && (*(p+2) == my_dir_sep || !*(p+2)))
1.28 cvs 1258: {
1259: orig = p + 1;
1.32 ! cvs 1260: dest = (*(p+2)!=my_dir_sep) ? p+2 : p+3;
1.28 cvs 1261: while ((*orig++ = *dest++)); /* Remove a slash and a dot */
1262: end = orig - 1;
1263: }
1.32 ! cvs 1264: else if (*(p+1)=='.' && *(p+2)=='.' && (*(p+3)==my_dir_sep || !*(p+3)))
1.28 cvs 1265: {
1266: newptr = p;
1.32 ! cvs 1267: while (newptr>path && *--newptr!=my_dir_sep); /* prev slash */
1.28 cvs 1268: if (strncmp(newptr, "/../", 4))
1269: {
1270: orig = newptr + 1;
1.32 ! cvs 1271: dest = (*(p+3)!=my_dir_sep) ? p+3 : p+4;
1.28 cvs 1272: while ((*orig++ = *dest++)); /* Remove /xxx/.. */
1273: end = orig-1;
1274: /* Start again with prev slash */
1275: p = newptr;
1.25 cvs 1276: }
1.28 cvs 1277: else
1.25 cvs 1278: p++;
1.28 cvs 1279: }
1.32 ! cvs 1280: else if (*(p+1) == my_dir_sep)
1.28 cvs 1281: {
1.32 ! cvs 1282: while (*(p+1) == my_dir_sep)
1.28 cvs 1283: {
1284: orig = p;
1285: dest = p + 1;
1286: while ((*orig++ = *dest++)); /* Remove multiple /'s */
1287: end = orig-1;
1288: }
1289: }
1290: else
1.25 cvs 1291: p++;
1.28 cvs 1292: }
1293: else
1294: p++;
1.25 cvs 1295: }
1296: }
1.28 cvs 1297: return;
1298: }
1299:
1300:
1301: /*----------------------------------------------------------------------
1302: NormalizeFile normalizes local names.
1303: Return TRUE if target and src differ.
1304: ----------------------------------------------------------------------*/
1305: #ifdef __STDC__
1306: boolean NormalizeFile (char *src, char *target)
1307: #else
1308: boolean NormalizeFile (src, target)
1309: char *src;
1310: char *target;
1311:
1312: #endif
1313: {
1314: char *s;
1315: boolean change;
1316:
1317: change = FALSE;
1318: if (src[0] == '~')
1319: {
1320: /* replace ~ */
1321: s = (char *) TtaGetEnvString ("HOME");
1322: strcpy (target, s);
1323: strcat (target, &src[1]);
1324: change = TRUE;
1325: }
1326: else if (strncmp (src, "file:", 5) == 0)
1327: {
1328: /* remove the prefix file: */
1329: if (src[5] == EOS)
1330: strcpy (target, DIR_STR);
1331: else if (src[0] == '~')
1332: {
1333: /* replace ~ */
1334: s = (char *) TtaGetEnvString ("HOME");
1335: strcpy (target, s);
1336: strcat (target, &src[5]);
1337: }
1338: else
1339: strcpy (target, &src[5]);
1340: change = TRUE;
1341: }
1342: else
1343: strcpy (target, src);
1344:
1345: /* remove /../ and /./ */
1.29 cvs 1346: SimplifyUrl (&target);
1.28 cvs 1347: return (change);
1.25 cvs 1348: }
1349:
1.28 cvs 1350:
1.25 cvs 1351: /*----------------------------------------------------------------------
1.31 cvs 1352: MakeRelativeURL: make relative name
1.25 cvs 1353:
1.28 cvs 1354: This function creates and returns a string which gives an expression of
1355: one address as related to another. Where there is no relation, an absolute
1356: address is retured.
1.25 cvs 1357:
1.28 cvs 1358: On entry,
1.25 cvs 1359: Both names must be absolute, fully qualified names of nodes
1360: (no fragment bits)
1361:
1.28 cvs 1362: On exit,
1.25 cvs 1363: The return result points to a newly allocated name which, if
1364: parsed by AmayaParseUrl relative to relatedName, will yield aName.
1365: The caller is responsible for freeing the resulting name later.
1366: ----------------------------------------------------------------------*/
1367: #ifdef __STDC__
1.31 cvs 1368: char *MakeRelativeURL (char *aName, char *relatedName)
1.25 cvs 1369: #else /* __STDC__ */
1.31 cvs 1370: char *MakeRelativeURL (aName, relatedName)
1.28 cvs 1371: char *aName;
1372: char *relatedName;
1.25 cvs 1373: #endif /* __STDC__ */
1374: {
1.29 cvs 1375: char *return_value;
1376: char result[MAX_LENGTH];
1377: char *p;
1378: char *q = relatedName;
1379: char *after_access;
1380: char *last_slash = NULL;
1381: int slashes, levels, len;
1382:
1383: if (aName == NULL || relatedName == NULL)
1384: return (NULL);
1385:
1386: slashes = 0;
1387: after_access = NULL;
1388: p = aName;
1389: q = relatedName;
1390: for (; *p && (*p == *q); p++, q++)
1.27 cvs 1391: {
1392: /* Find extent of match */
1.32 ! cvs 1393: if (*p == ':')
1.29 cvs 1394: after_access = p + 1;
1.28 cvs 1395: if (*p == DIR_SEP)
1.27 cvs 1396: {
1.29 cvs 1397: /* memorize the last slash position and count them */
1.27 cvs 1398: last_slash = p;
1399: slashes++;
1.25 cvs 1400: }
1401: }
1402:
1.31 cvs 1403: /* q, p point to the first non-matching character or zero */
1404: if (*q == EOS)
1405: {
1406: /* New name is a subset of the related name */
1407: /* exactly the right length */
1408: len = strlen (p);
1409: if ((return_value = (char *) TtaGetMemory (len + 1)) != NULL)
1410: strcpy (return_value, p);
1411: }
1412: else if ((slashes < 2 && after_access == NULL)
1413: || (slashes < 3 && after_access != NULL))
1414: {
1415: /* Two names whitout common path */
1416: /* exactly the right length */
1417: len = strlen (aName);
1418: if ((return_value = (char *) TtaGetMemory (len + 1)) != NULL)
1419: strcpy (return_value, aName);
1420: }
1421: else
1422: {
1423: /* Some path in common */
1424: if (slashes == 3 && strncmp (aName, "http:", 5) == 0)
1425: /* just the same server */
1426: strcpy (result, last_slash);
1427: else
1428: {
1429: levels= 0;
1430: for (; *q && (*q != '#'); q++)
1431: if (*q == DIR_SEP)
1432: levels++;
1433:
1434: result[0] = 0;
1435: for (;levels; levels--)
1436: strcat (result, "../");
1437: strcat (result, last_slash+1);
1438: }
1439:
1440: /* exactly the right length */
1441: len = strlen (result);
1442: if ((return_value = (char *) TtaGetMemory (len + 1)) != NULL)
1443: strcpy (return_value, result);
1.25 cvs 1444: }
1.29 cvs 1445: return (return_value);
1.24 cvs 1446: }
Webmaster