Annotation of Amaya/amaya/AHTURLTools.c, revision 1.120
1.7 cvs 1: /*
2: *
1.110 cvs 3: * (c) COPYRIGHT MIT and INRIA, 1996-2001
1.7 cvs 4: * Please first read the full copyright statement in file COPYRIGHT.
5: *
6: */
1.9 cvs 7:
1.10 cvs 8: /*
9: * AHTURLTools.c: contains all the functions for testing, manipulating,
1.25 cvs 10: * and normalizing URLs. It also contains a local copy of the libWWW
11: * URL parsing functions.
1.10 cvs 12: *
13: * Authors: J. Kahan, I. Vatton
1.106 cvs 14: * R. Guetari: Windows.
1.10 cvs 15: *
16: */
1.7 cvs 17:
1.15 cvs 18: #define THOT_EXPORT extern
1.3 cvs 19: #include "amaya.h"
20:
1.8 cvs 21: #include "init_f.h"
22: #include "AHTURLTools_f.h"
1.100 kahan 23: #include "query_f.h"
1.8 cvs 24:
1.24 cvs 25: #define MAX_PRINT_URL_LENGTH 50
1.106 cvs 26: typedef struct _HTURI
27: {
28: char *access; /* Now known as "scheme" */
29: char *host;
30: char *absolute;
31: char *relative;
32: char *fragment;
1.29 cvs 33: } HTURI;
1.24 cvs 34:
1.28 cvs 35:
36: /*----------------------------------------------------------------------
37: ConvertToLowerCase
38: Converts a string to lowercase.
39: ----------------------------------------------------------------------*/
1.106 cvs 40: void ConvertToLowerCase (char *string)
1.28 cvs 41: {
42: int i;
1.93 cvs 43:
1.28 cvs 44: if (!string)
45: return;
46:
1.106 cvs 47: for (i = 0; string[i] != EOS; i++)
1.67 cvs 48: string[i] = utolower (string[i]);
1.28 cvs 49: }
1.22 cvs 50:
1.8 cvs 51: /*----------------------------------------------------------------------
1.75 cvs 52: EscapeChar
53: writes the equivalent escape code of a char in a string
54: ----------------------------------------------------------------------*/
1.109 cvs 55: void EscapeChar (char *string, char c)
1.75 cvs 56: {
1.109 cvs 57: unsigned int i;
58:
59: i = (unsigned char) c & 0xFF;
60: sprintf (string, "%02x", i);
1.75 cvs 61: }
62:
63: /*----------------------------------------------------------------------
1.96 cvs 64: UnEscapeChar
65: writes the equivalent hex code to a %xx coded char
66: ----------------------------------------------------------------------*/
1.109 cvs 67: static char UnEscapeChar (char c)
1.96 cvs 68: {
1.106 cvs 69: return c >= '0' && c <= '9' ? c - '0'
70: : c >= 'A' && c <= 'F' ? c - 'A' + 10
71: : c - 'a' + 10; /* accept small letters just in case */
1.96 cvs 72: }
73:
74: /*----------------------------------------------------------------------
1.75 cvs 75: EscapeURL
76: Takes a URL and escapes all protected chars into
77: %xx sequences. Also, removes any leading white spaces
78: Returns either NULL or a new buffer, which must be freed by the caller
79: ----------------------------------------------------------------------*/
1.106 cvs 80: char *EscapeURL (const char *url)
81: {
82: char *buffer;
83: int buffer_len;
84: int buffer_free_mem;
85: char *ptr;
86: int new_chars;
1.75 cvs 87: void *status;
88:
89: if (url && *url)
90: {
1.106 cvs 91: buffer_free_mem = strlen (url) + 20;
92: buffer = TtaGetMemory (buffer_free_mem + 1);
1.107 kahan 93: ptr = (char *) url;
1.75 cvs 94: buffer_len = 0;
95:
96: while (*ptr)
97: {
98: switch (*ptr)
99: {
100: /* put here below all the chars that need to
101: be escaped into %xx */
1.81 cvs 102: case 0x27: /* & */
103: case 0x20: /* space */
1.75 cvs 104: new_chars = 3;
105: break;
106:
107: default:
108: new_chars = 1;
109: break;
110: }
111:
112: /* see if we need extra room in the buffer */
113: if (new_chars > buffer_free_mem)
114: {
1.76 cvs 115: buffer_free_mem = 20;
1.106 cvs 116: status = TtaRealloc (buffer, sizeof (char)
1.75 cvs 117: * (buffer_len + buffer_free_mem + 1));
118: if (status)
1.114 cvs 119: buffer = (char *) status;
1.106 cvs 120: else
121: {
122: /* @@ maybe we should do some other behavior here, like
123: freeing the buffer and return a void thing */
124: buffer[buffer_len] = EOS;
125: break;
126: }
1.75 cvs 127: }
128: /* escape the char */
129: if (new_chars == 3)
130: {
1.106 cvs 131: buffer[buffer_len] = '%';
1.75 cvs 132: EscapeChar (&buffer[buffer_len+1], *ptr);
133: }
134: else
135: buffer[buffer_len] = *ptr;
136:
137: /* update the status */
138: buffer_len += new_chars;
139: buffer_free_mem -= new_chars;
140: /* examine the next char */
141: ptr++;
142: }
1.106 cvs 143: buffer[buffer_len] = EOS;
1.75 cvs 144: }
1.76 cvs 145: else
146: buffer = NULL;
147:
1.75 cvs 148: return (buffer);
149: }
150:
151:
152: /*----------------------------------------------------------------------
1.11 cvs 153: ExplodeURL
1.8 cvs 154: ----------------------------------------------------------------------*/
1.106 cvs 155: void ExplodeURL (char *url, char **proto, char **host, char **dir,
156: char **file)
1.8 cvs 157: {
1.33 cvs 158: char *curr, *temp;
159: char used_sep;
1.32 cvs 160:
1.33 cvs 161: if (url && strchr (url, URL_SEP))
162: used_sep = URL_SEP;
163: else
164: used_sep = DIR_SEP;
1.8 cvs 165:
166: if ((url == NULL) || (proto == NULL) || (host == NULL) ||
167: (dir == NULL) || (file == NULL))
168: return;
169:
170: /* initialize every pointer */
171: *proto = *host = *dir = *file = NULL;
172:
173: /* skip any leading space */
174: while ((*url == SPACE) || (*url == TAB))
175: url++;
1.9 cvs 176: curr = url;
177: if (*curr == 0)
1.8 cvs 178: goto finished;
179:
180: /* go to the end of the URL */
1.68 cvs 181: while ((*curr != EOS) && (*curr != SPACE) && (*curr != BSPACE) &&
182: (*curr != __CR__) && (*curr != EOL))
1.9 cvs 183: curr++;
1.8 cvs 184:
185: /* mark the end of the chain */
1.9 cvs 186: *curr = EOS;
187: curr--;
188: if (curr <= url)
1.8 cvs 189: goto finished;
190:
191: /* search the next DIR_SEP indicating the beginning of the file name */
192: do
1.11 cvs 193: curr--;
1.33 cvs 194: while ((curr >= url) && (*curr != used_sep));
1.11 cvs 195:
1.9 cvs 196: if (curr < url)
1.8 cvs 197: goto finished;
1.9 cvs 198: *file = curr + 1;
1.8 cvs 199:
200: /* mark the end of the dir */
1.9 cvs 201: *curr = EOS;
202: curr--;
203: if (curr < url)
1.8 cvs 204: goto finished;
205:
1.29 cvs 206: /* search for the DIR_STR indicating the host name start */
1.33 cvs 207: while ((curr > url) && ((*curr != used_sep) || (*(curr + 1) != used_sep)))
1.9 cvs 208: curr--;
1.8 cvs 209:
210: /* if we found it, separate the host name from the directory */
1.102 kahan 211: if ((*curr == used_sep) && (*(curr + 1) == used_sep))
1.8 cvs 212: {
1.9 cvs 213: *host = temp = curr + 2;
1.33 cvs 214: while ((*temp != 0) && (*temp != used_sep))
1.8 cvs 215: temp++;
1.33 cvs 216: if (*temp == used_sep)
1.8 cvs 217: {
218: *temp = EOS;
219: *dir = temp + 1;
220: }
221: }
222: else
1.11 cvs 223: *dir = curr;
224:
1.9 cvs 225: if (curr <= url)
1.8 cvs 226: goto finished;
227:
228: /* mark the end of the proto */
1.9 cvs 229: *curr = EOS;
230: curr--;
231: if (curr < url)
1.8 cvs 232: goto finished;
233:
1.106 cvs 234: if (*curr == ':')
1.8 cvs 235: {
1.9 cvs 236: *curr = EOS;
237: curr--;
1.8 cvs 238: }
239: else
240: goto finished;
1.11 cvs 241:
1.9 cvs 242: if (curr < url)
1.8 cvs 243: goto finished;
1.9 cvs 244: while ((curr > url) && (isalpha (*curr)))
245: curr--;
246: *proto = curr;
1.8 cvs 247:
248: finished:;
249:
250: #ifdef AMAYA_DEBUG
251: fprintf (stderr, "ExplodeURL(%s)\n\t", url);
252: if (*proto)
253: fprintf (stderr, "proto : %s, ", *proto);
254: if (*host)
255: fprintf (stderr, "host : %s, ", *host);
256: if (*dir)
257: fprintf (stderr, "dir : %s, ", *dir);
258: if (*file)
259: fprintf (stderr, "file : %s ", *file);
260: fprintf (stderr, "\n");
261: #endif
262:
263: }
1.3 cvs 264:
1.116 kahan 265: /*----------------------------------------------------------------------
266: PicTypeToMime
267: Converts a Thot PicType into the equivalent MIME type. If no convertion
268: is possible, it returns NULL.
269: ----------------------------------------------------------------------*/
270: char *PicTypeToMIME (PicType contentType)
271: {
272: char *mime_type;
273:
274: switch (contentType)
275: {
276: case xbm_type:
277: mime_type ="image/x-xbitmap";
278: break;
279: case eps_type:
280: mime_type ="application/postscript";
281: break;
282: case xpm_type:
283: mime_type ="image/x-xpicmap";
284: break;
285: case gif_type:
286: mime_type ="image/gif";
287: break;
288: case jpeg_type:
289: mime_type ="image/jpeg";
290: break;
291: case png_type:
292: mime_type ="image/png";
293: break;
294: case svg_type:
1.118 kahan 295: mime_type ="image/svg+xml";
1.116 kahan 296: break;
297: case unknown_type:
298: default:
299: mime_type = NULL;
300: }
301:
302: return mime_type;
303: }
1.61 cvs 304:
305: /*----------------------------------------------------------------------
1.117 kahan 306: ImageElement
307: Returns the element (image parameter) and URL (url parameter) of an
308: image in a docImage document. The user must free the memory associated
1.120 ! kahan 309: with the url parameter if the function is succesful.
! 310: If the url parameter is NULL, we won't initialize it.
1.117 kahan 311: Returns TRUE if succesful, FALSE otherwise.
312: ----------------------------------------------------------------------*/
313: ThotBool ImageElement (Document doc, char **url, Element *image)
314: {
315: Element el, imgEl;
316: Attribute attr, srcAttr;
317: AttributeType attrType;
318: int length;
319: char *value;
320:
321: if (DocumentTypes[doc] != docImage)
322: return FALSE;
323:
324: /* find the value of the src attribute */
325: attrType.AttrSSchema = TtaGetSSchema ("HTML", doc);
326: attrType.AttrTypeNum = HTML_ATTR_SRC;
327: el = TtaGetRootElement (doc);
328: TtaSearchAttribute (attrType, SearchInTree, el, &imgEl, &srcAttr);
329:
330: if (!imgEl)
331: return FALSE;
332: *image = imgEl;
333:
1.120 ! kahan 334: if (url)
! 335: {
! 336: attr = TtaGetAttribute (imgEl, attrType);
! 337: length = TtaGetTextAttributeLength (srcAttr) + 1;
! 338: value = TtaGetMemory (length);
! 339: TtaGiveTextAttributeValue (srcAttr, value, &length);
! 340: *url = value;
! 341: }
1.117 kahan 342: return TRUE;
343: }
344:
345: /*----------------------------------------------------------------------
346: DocImageMimeType
347: Returns the MIME type of a docImage document.
348: ----------------------------------------------------------------------*/
349: char *DocImageMimeType (Document doc)
350: {
351: char *mime_type;
352: LoadedImageDesc *pImage;
353: PicType type;
354: char *url;
355: Element image;
356:
357: if (DocumentTypes[doc] != docImage)
358: return NULL;
359:
360: mime_type = NULL;
361: if (!IsHTTPPath (DocumentURLs[doc]))
362: {
363: /* it is a local image */
1.120 ! kahan 364: if (ImageElement (doc, NULL, &image))
1.117 kahan 365: {
366: type = TtaGetPictureType (image);
367: mime_type = PicTypeToMIME (type);
368: }
369: }
370: else
371: {
372: /* find the value of the src attribute */
373: pImage = ImageURLs;
374: while (pImage != NULL)
375: {
376: if (pImage->document == doc)
377: {
378: if (pImage->content_type)
379: mime_type = pImage->content_type;
380: else if (pImage->elImage && pImage->elImage->currentElement)
381: {
382: type = TtaGetPictureType (pImage->elImage->currentElement);
383: mime_type = PicTypeToMIME (type);
384: }
385: break;
386: }
387: }
388: }
389: return (mime_type);
390: }
391:
392: /*----------------------------------------------------------------------
1.61 cvs 393: ExtractSuffix extract suffix from document nane.
394: ----------------------------------------------------------------------*/
1.106 cvs 395: void ExtractSuffix (char *aName, char *aSuffix)
1.61 cvs 396: {
1.106 cvs 397: int lg, i;
398: char *ptr, *oldptr;
1.61 cvs 399:
400: if (!aSuffix || !aName)
401: /* bad suffix */
402: return;
403:
1.106 cvs 404: aSuffix[0] = EOS;
405: lg = strlen (aName);
1.61 cvs 406: if (lg)
407: {
408: /* the name is not empty */
409: oldptr = ptr = &aName[0];
410: do
411: {
1.106 cvs 412: ptr = strrchr (oldptr, '.');
1.61 cvs 413: if (ptr)
414: oldptr = &ptr[1];
415: }
416: while (ptr);
417:
418: i = (int) (oldptr) - (int) (aName); /* name length */
419: if (i > 1)
420: {
1.106 cvs 421: aName[i - 1] = EOS;
1.61 cvs 422: if (i != lg)
1.106 cvs 423: strcpy (aSuffix, oldptr);
1.61 cvs 424: }
425: }
426: }
427:
1.4 cvs 428: /*----------------------------------------------------------------------
1.9 cvs 429: IsHTMLName
430: returns TRUE if path points to an HTML resource.
1.4 cvs 431: ----------------------------------------------------------------------*/
1.109 cvs 432: ThotBool IsHTMLName (const char *path)
1.106 cvs 433: {
434: char temppath[MAX_LENGTH];
435: char suffix[MAX_LENGTH];
436: char nsuffix[MAX_LENGTH];
1.101 cvs 437: int i;
1.5 cvs 438:
1.101 cvs 439: if (!path)
440: return (FALSE);
1.5 cvs 441:
1.106 cvs 442: strcpy (temppath, path);
1.101 cvs 443: ExtractSuffix (temppath, suffix);
444: i = 0;
1.106 cvs 445: while (suffix[i] != EOS)
1.101 cvs 446: {
447: /* Normalize the suffix */
448: i = 0;
1.106 cvs 449: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.101 cvs 450: {
451: nsuffix[i] = utolower (suffix[i]);
452: i++;
453: }
1.106 cvs 454: nsuffix[i] = EOS;
455: if (!strcmp (nsuffix, "html") ||
456: !strcmp (nsuffix, "htm") ||
457: !strcmp (nsuffix, "shtml") ||
458: !strcmp (nsuffix, "jsp") ||
459: !strcmp (nsuffix, "xht") ||
460: !strcmp (nsuffix, "xhtm") ||
461: !strcmp (nsuffix, "xhtml"))
1.101 cvs 462: return (TRUE);
1.106 cvs 463: else if (!strcmp (nsuffix, "gz"))
1.101 cvs 464: {
465: /* take into account compressed files */
466: ExtractSuffix (temppath, suffix);
467: /* Normalize the suffix */
468: i = 0;
1.106 cvs 469: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.101 cvs 470: {
471: nsuffix[i] = utolower (suffix[i]);
472: i++;
473: }
1.106 cvs 474: nsuffix[i] = EOS;
475: if (!strcmp (nsuffix, "html") ||
476: !strcmp (nsuffix, "htm") ||
477: !strcmp (nsuffix, "shtml") ||
478: !strcmp (nsuffix, "jsp") ||
479: !strcmp (nsuffix, "xht") ||
480: !strcmp (nsuffix, "xhtm") ||
481: !strcmp (nsuffix, "xhtml"))
1.101 cvs 482: return (TRUE);
483: else
484: return (FALSE);
485: }
486: else
487: /* check if there is another suffix */
488: ExtractSuffix (temppath, suffix);
489: }
1.88 cvs 490: return (FALSE);
1.3 cvs 491: }
492:
1.4 cvs 493: /*----------------------------------------------------------------------
1.56 cvs 494: IsXMLName
495: returns TRUE if path points to an XML resource.
496: ----------------------------------------------------------------------*/
1.111 cvs 497: ThotBool IsXMLName (const char *path)
1.56 cvs 498: {
1.106 cvs 499: char temppath[MAX_LENGTH];
500: char suffix[MAX_LENGTH];
1.56 cvs 501:
502: if (!path)
503: return (FALSE);
504:
1.106 cvs 505: strcpy (temppath, path);
1.56 cvs 506: ExtractSuffix (temppath, suffix);
507:
1.106 cvs 508: if (!strcasecmp (suffix, "xml") ||
509: !strcasecmp (suffix, "xht") ||
510: !strcmp (suffix, "xhtm") ||
511: !strcmp (suffix, "xhtml"))
1.56 cvs 512: return (TRUE);
1.106 cvs 513: else if (!strcmp (suffix, "gz"))
1.56 cvs 514: {
515: /* take into account compressed files */
516: ExtractSuffix (temppath, suffix);
1.106 cvs 517: if (!strcasecmp (suffix, "xml") ||
518: !strcasecmp (suffix, "xht") ||
519: !strcmp (suffix, "xhtm") ||
520: !strcmp (suffix, "xhtml"))
1.60 cvs 521: return (TRUE);
522: else
523: return (FALSE);
524: }
525: else
526: return (FALSE);
527: }
528:
529: /*----------------------------------------------------------------------
1.103 cvs 530: IsMathMLName
531: returns TRUE if path points to an MathML resource.
532: ----------------------------------------------------------------------*/
1.111 cvs 533: ThotBool IsMathMLName (const char *path)
1.103 cvs 534: {
1.106 cvs 535: char temppath[MAX_LENGTH];
536: char suffix[MAX_LENGTH];
1.103 cvs 537:
538: if (!path)
539: return (FALSE);
540:
1.106 cvs 541: strcpy (temppath, path);
1.103 cvs 542: ExtractSuffix (temppath, suffix);
543:
1.106 cvs 544: if (!strcasecmp (suffix, "mml"))
1.103 cvs 545: return (TRUE);
1.106 cvs 546: else if (!strcmp (suffix, "gz"))
1.103 cvs 547: {
548: /* take into account compressed files */
549: ExtractSuffix (temppath, suffix);
1.106 cvs 550: if (!strcasecmp (suffix, "mml"))
1.103 cvs 551: return (TRUE);
552: else
553: return (FALSE);
554: }
555: else
556: return (FALSE);
557: }
558:
559: /*----------------------------------------------------------------------
560: IsSVGName
1.119 kahan 561: returns TRUE if path points to an SVG resource.
1.103 cvs 562: ----------------------------------------------------------------------*/
1.111 cvs 563: ThotBool IsSVGName (const char *path)
1.103 cvs 564: {
1.106 cvs 565: char temppath[MAX_LENGTH];
566: char suffix[MAX_LENGTH];
1.103 cvs 567:
568: if (!path)
569: return (FALSE);
570:
1.106 cvs 571: strcpy (temppath, path);
1.103 cvs 572: ExtractSuffix (temppath, suffix);
573:
1.106 cvs 574: if (!strcasecmp (suffix, "svg"))
1.103 cvs 575: return (TRUE);
1.106 cvs 576: else if (!strcmp (suffix, "gz"))
1.103 cvs 577: {
578: /* take into account compressed files */
579: ExtractSuffix (temppath, suffix);
1.106 cvs 580: if (!strcasecmp (suffix, "svg"))
1.103 cvs 581: return (TRUE);
582: else
583: return (FALSE);
584: }
585: else
586: return (FALSE);
587: }
588:
589: /*----------------------------------------------------------------------
1.60 cvs 590: IsCSSName
591: returns TRUE if path points to an XML resource.
592: ----------------------------------------------------------------------*/
1.111 cvs 593: ThotBool IsCSSName (const char *path)
1.60 cvs 594: {
1.106 cvs 595: char temppath[MAX_LENGTH];
596: char suffix[MAX_LENGTH];
1.60 cvs 597:
598: if (!path)
599: return (FALSE);
600:
1.106 cvs 601: strcpy (temppath, path);
1.60 cvs 602: ExtractSuffix (temppath, suffix);
603:
1.106 cvs 604: if (!strcasecmp (suffix, "css"))
1.60 cvs 605: return (TRUE);
1.106 cvs 606: else if (!strcmp (suffix, "gz"))
1.60 cvs 607: {
608: /* take into account compressed files */
609: ExtractSuffix (temppath, suffix);
1.106 cvs 610: if (!strcasecmp (suffix, "css"))
1.56 cvs 611: return (TRUE);
612: else
613: return (FALSE);
614: }
615: else
616: return (FALSE);
617: }
618:
619: /*----------------------------------------------------------------------
1.9 cvs 620: IsImageName
621: returns TRUE if path points to an image resource.
1.4 cvs 622: ----------------------------------------------------------------------*/
1.111 cvs 623: ThotBool IsImageName (const char *path)
1.106 cvs 624: {
625: char temppath[MAX_LENGTH];
626: char suffix[MAX_LENGTH];
627: char nsuffix[MAX_LENGTH];
1.5 cvs 628: int i;
629:
630: if (!path)
1.13 cvs 631: return (FALSE);
1.5 cvs 632:
1.106 cvs 633: strcpy (temppath, path);
1.5 cvs 634: ExtractSuffix (temppath, suffix);
635:
636: /* Normalize the suffix */
637: i = 0;
1.106 cvs 638: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.13 cvs 639: {
1.67 cvs 640: nsuffix[i] = utolower (suffix[i]);
1.13 cvs 641: i++;
642: }
1.106 cvs 643: nsuffix[i] = EOS;
644: if ((!strcmp (nsuffix, "gif")) || (!strcmp (nsuffix, "xbm")) ||
645: (!strcmp (nsuffix, "xpm")) || (!strcmp (nsuffix, "jpg")) ||
646: (!strcmp (nsuffix, "png")) || (!strcmp (nsuffix, "au")))
1.39 cvs 647: return (TRUE);
648: return (FALSE);
1.3 cvs 649: }
650:
1.4 cvs 651: /*----------------------------------------------------------------------
1.58 cvs 652: IsImageType
653: returns TRUE if type points to an image resource.
654: ----------------------------------------------------------------------*/
1.111 cvs 655: ThotBool IsImageType (const char *type)
1.58 cvs 656: {
1.106 cvs 657: char temptype[MAX_LENGTH];
1.58 cvs 658: int i;
659:
660: if (!type)
661: return (FALSE);
662:
1.106 cvs 663: strcpy (temptype, type);
1.58 cvs 664: /* Normalize the type */
665: i = 0;
1.106 cvs 666: while (temptype[i] != EOS)
1.58 cvs 667: {
668: temptype[i] = tolower (temptype[i]);
669: i++;
670: }
1.111 cvs 671: if (!strcmp (temptype, "gif") || !strcmp (temptype, "x-xbitmap") ||
672: !strcmp (temptype, "x-xpixmap") || !strcmp (temptype, "jpeg") ||
673: !strcmp (temptype, "png"))
1.58 cvs 674: return (TRUE);
675: return (FALSE);
676: }
677:
678: /*----------------------------------------------------------------------
1.9 cvs 679: IsTextName
1.4 cvs 680: ----------------------------------------------------------------------*/
1.111 cvs 681: ThotBool IsTextName (const char *path)
1.106 cvs 682: {
683: char temppath[MAX_LENGTH];
684: char suffix[MAX_LENGTH];
685: char nsuffix[MAX_LENGTH];
1.5 cvs 686: int i;
687:
688: if (!path)
1.13 cvs 689: return (FALSE);
1.5 cvs 690:
1.106 cvs 691: strcpy (temppath, path);
1.5 cvs 692: ExtractSuffix (temppath, suffix);
693:
694: /* Normalize the suffix */
695: i = 0;
1.106 cvs 696: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.5 cvs 697: {
1.25 cvs 698: nsuffix[i] = tolower (suffix[i]);
1.5 cvs 699: i++;
700: }
1.106 cvs 701: nsuffix[i] = EOS;
1.5 cvs 702:
1.111 cvs 703: if (!strcmp (nsuffix, "txt") || !strcmp (nsuffix, "dtd"))
1.13 cvs 704: return (TRUE);
1.106 cvs 705: else if (!strcmp (nsuffix, "gz"))
1.13 cvs 706: {
1.39 cvs 707: /* take into account compressed files */
1.13 cvs 708: ExtractSuffix (temppath, suffix);
709: /* Normalize the suffix */
710: i = 0;
1.106 cvs 711: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.13 cvs 712: {
1.25 cvs 713: nsuffix[i] = tolower (suffix[i]);
1.13 cvs 714: i++;
715: }
1.106 cvs 716: nsuffix[i] = EOS;
1.111 cvs 717: if (!strcmp (nsuffix, "txt") || !strcmp (nsuffix, "dtd"))
1.13 cvs 718: return (TRUE);
719: else
720: return (FALSE);
721: }
722: else
723: return (FALSE);
1.3 cvs 724: }
725:
1.4 cvs 726: /*----------------------------------------------------------------------
1.9 cvs 727: IsHTTPPath
728: returns TRUE if path is in fact an http URL.
1.4 cvs 729: ----------------------------------------------------------------------*/
1.112 cvs 730: ThotBool IsHTTPPath (const char *path)
1.3 cvs 731: {
1.5 cvs 732: if (!path)
733: return FALSE;
1.3 cvs 734:
1.106 cvs 735: if ((!strncmp (path, "http:", 5) != 0)
736: || (AHTFTPURL_flag () && !strncmp (path, "ftp:", 4))
737: || !strncmp (path, "internal:", 9))
1.58 cvs 738: return TRUE;
739: return FALSE;
1.3 cvs 740: }
741:
1.4 cvs 742: /*----------------------------------------------------------------------
1.9 cvs 743: IsWithParameters
744: returns TRUE if url has a concatenated query string.
1.4 cvs 745: ----------------------------------------------------------------------*/
1.66 cvs 746: ThotBool IsWithParameters (const char *url)
1.3 cvs 747: {
1.5 cvs 748: int i;
1.3 cvs 749:
1.9 cvs 750: if ((!url) || (url[0] == EOS))
1.5 cvs 751: return FALSE;
1.3 cvs 752:
1.9 cvs 753: i = strlen (url) - 1;
754: while (i > 0 && url[i--] != '?')
1.5 cvs 755: if (i < 0)
756: return FALSE;
1.3 cvs 757:
1.5 cvs 758: /* There is a parameter */
759: return TRUE;
1.3 cvs 760: }
761:
1.4 cvs 762: /*----------------------------------------------------------------------
1.9 cvs 763: IsW3Path
764: returns TRUE if path is in fact a URL.
1.4 cvs 765: ----------------------------------------------------------------------*/
1.106 cvs 766: ThotBool IsW3Path (const char *path)
767: {
768: if (strncmp (path, "http:", 5) &&
769: strncmp (path, "ftp:", 4) &&
770: strncmp (path, "telnet:", 7) &&
771: strncmp (path, "wais:", 5) &&
772: strncmp (path, "news:", 5) &&
773: strncmp (path, "gopher:", 7) &&
774: strncmp (path, "mailto:", 7) &&
775: strncmp (path, "archie:", 7))
1.72 cvs 776: return FALSE;
777: return TRUE;
1.3 cvs 778: }
779:
1.4 cvs 780: /*----------------------------------------------------------------------
1.90 cvs 781: IsFilePath
782: returns TRUE if path is in fact a URL.
783: ----------------------------------------------------------------------*/
1.106 cvs 784: ThotBool IsFilePath (const char *path)
1.90 cvs 785: {
1.106 cvs 786: if (strncmp (path, "file:", 5))
1.90 cvs 787: return FALSE;
788: return TRUE;
789: }
790:
791: /*----------------------------------------------------------------------
1.9 cvs 792: IsValidProtocol
793: returns true if the url protocol is supported by Amaya.
1.4 cvs 794: ----------------------------------------------------------------------*/
1.106 cvs 795: ThotBool IsValidProtocol (const char *url)
796: {
797: if (!strncmp (url, "http:", 5)
798: || !strncmp (url, "internal:", 9)
799: || (AHTFTPURL_flag () && !strncmp (url, "ftp:", 4)))
1.22 cvs 800: /* experimental */
1.24 cvs 801: /*** || !strncmp (path, "news:", 5)***/
1.8 cvs 802: return (TRUE);
1.5 cvs 803: else
1.8 cvs 804: return (FALSE);
1.3 cvs 805: }
806:
1.31 cvs 807:
808: /*----------------------------------------------------------------------
809: GetBaseURL
810: normalizes orgName according to a base associated with doc, and
811: following the standard URL format rules.
812: The function returns the base used to solve relative URL and SRC:
813: - the base of the document,
814: - or the document path (without document name).
815: ----------------------------------------------------------------------*/
1.106 cvs 816: char *GetBaseURL (Document doc)
1.31 cvs 817: {
818: Element el;
819: ElementType elType;
820: AttributeType attrType;
821: Attribute attr;
1.106 cvs 822: char *ptr, *basename;
1.31 cvs 823: int length;
824:
1.113 cvs 825: if (doc == 0 || !DocumentURLs[doc])
1.110 cvs 826: return NULL;
1.106 cvs 827: basename = TtaGetMemory (MAX_LENGTH);
828: strncpy (basename, DocumentURLs[doc], MAX_LENGTH-1);
829: basename[MAX_LENGTH-1] = EOS;
1.31 cvs 830: length = MAX_LENGTH -1;
1.113 cvs 831: /* is it a HTML document ? */
1.31 cvs 832: elType.ElSSchema = TtaGetDocumentSSchema (doc);
1.113 cvs 833: if (!strcmp (TtaGetSSchemaName (elType.ElSSchema), "HTML"))
834: /* it's a HTML document */
1.65 cvs 835: {
1.113 cvs 836: /* get the document element */
837: el = TtaGetMainRoot (doc);
838: /* search the BASE element */
839: elType.ElTypeNum = HTML_EL_HEAD;
840: el = TtaSearchTypedElement (elType, SearchForward, el);
841: if (el)
842: /* there is a HEAD element */
843: {
844: /* look for a BASE element within the HEAD */
845: elType.ElTypeNum = HTML_EL_BASE;
846: el = TtaSearchTypedElement (elType, SearchInTree, el);
847: }
848: if (el)
1.31 cvs 849: {
1.113 cvs 850: /* The document has a BASE element. Get the HREF attribute of the
851: BASE element */
852: attrType.AttrSSchema = elType.ElSSchema;
853: attrType.AttrTypeNum = HTML_ATTR_HREF_;
854: attr = TtaGetAttribute (el, attrType);
855: if (attr)
1.31 cvs 856: {
1.113 cvs 857: /* Use the base path of the document */
858: TtaGiveTextAttributeValue (attr, basename, &length);
859: /* base and orgName have to be separated by a DIR_SEP */
860: length--;
861: if (basename[0] != EOS && basename[length] != URL_SEP &&
862: basename[length] != DIR_SEP)
863: /* verify if the base has the form "protocol://server:port" */
1.31 cvs 864: {
1.113 cvs 865: ptr = AmayaParseUrl (basename, "", AMAYA_PARSE_ACCESS |
866: AMAYA_PARSE_HOST |
867: AMAYA_PARSE_PUNCTUATION);
868: if (ptr && !strcmp (ptr, basename))
869: {
870: /* it has this form, complete it by adding a URL_STR */
871: if (strchr (basename, DIR_SEP))
872: strcat (basename, DIR_STR);
873: else
874: strcat (basename, URL_STR);
875: length++;
876: }
877: if (ptr)
878: TtaFreeMemory (ptr);
1.31 cvs 879: }
880: }
881: }
1.113 cvs 882: }
883:
1.31 cvs 884: /* Remove anything after the last DIR_SEP char. If no such char is found,
885: * then search for the first ":" char, hoping that what's before that is a
886: * protocol. If found, end the string there. If neither char is found,
887: * then discard the whole base element.
888: */
1.106 cvs 889: length = strlen (basename) - 1;
1.31 cvs 890: /* search for the last DIR_SEP char */
1.106 cvs 891: while (length >= 0 && basename[length] != URL_SEP && basename[length] != DIR_SEP)
1.31 cvs 892: length--;
893: if (length >= 0)
894: /* found the last DIR_SEP char, end the string there */
1.106 cvs 895: basename[length + 1] = EOS;
1.31 cvs 896: else
897: /* search for the first PATH_STR char */
898: {
1.106 cvs 899: for (length = 0; basename[length] != ':' &&
900: basename[length] != EOS; length ++);
901: if (basename[length] == ':')
1.31 cvs 902: /* found, so end the string there */
1.106 cvs 903: basename[length + 1] = EOS;
1.31 cvs 904: else
905: /* not found, discard the base */
1.106 cvs 906: basename[0] = EOS;
1.31 cvs 907: }
908: return (basename);
909: }
910:
911:
1.4 cvs 912: /*----------------------------------------------------------------------
1.40 cvs 913: GetLocalPath
914: Allocate and return the local document path associated to the url
915: ----------------------------------------------------------------------*/
1.106 cvs 916: char *GetLocalPath (Document doc, char *url)
917: {
918: char *ptr;
919: char *n;
920: char *documentname;
921: char url_sep;
1.83 cvs 922: int len;
1.67 cvs 923: ThotBool noFile;
1.40 cvs 924:
925: if (url != NULL)
926: {
927: /* check whether the file name exists */
1.106 cvs 928: len = strlen (url) - 1;
1.71 cvs 929: if (IsW3Path (url))
1.106 cvs 930: url_sep = '/';
1.41 cvs 931: else
1.106 cvs 932: url_sep = DIR_SEP;
1.41 cvs 933: noFile = (url[len] == url_sep);
1.40 cvs 934: if (noFile)
1.106 cvs 935: url[len] = EOS;
936: ptr = TtaGetMemory (MAX_LENGTH);
937: documentname = TtaGetMemory (MAX_LENGTH);
1.78 cvs 938: TtaExtractName (url, ptr, documentname);
1.106 cvs 939: sprintf (ptr, "%s%s%d%s", TempFileDirectory, DIR_STR, doc, DIR_STR);
1.40 cvs 940: if (!TtaCheckDirectory (ptr))
941: /* directory did not exist */
1.72 cvs 942: TtaMakeDirectory (ptr);
1.47 cvs 943:
944: /* don't include the query string within document name */
1.106 cvs 945: n = strrchr (documentname, '?');
1.47 cvs 946: if (n != NULL)
1.106 cvs 947: *n = EOS;
1.46 cvs 948: /* don't include ':' within document name */
1.106 cvs 949: n = strchr (documentname, ':');
1.46 cvs 950: if (n != NULL)
1.106 cvs 951: *n = EOS;
1.69 cvs 952: /* if after all this operations document name
953: is empty, let's use noname.html instead */
1.106 cvs 954: if (documentname[0] == EOS)
955: strcat (ptr, "noname.html");
1.69 cvs 956: else
1.106 cvs 957: strcat (ptr, documentname);
1.40 cvs 958: TtaFreeMemory (documentname);
959: /* restore the url */
960: if (noFile)
1.41 cvs 961: url[len] = url_sep;
1.40 cvs 962: return (ptr);
963: }
964: else
965: return (NULL);
966: }
967:
1.73 cvs 968: /*----------------------------------------------------------------------
1.79 cvs 969: ExtractTarget extract the target name from document nane.
970: ----------------------------------------------------------------------*/
1.106 cvs 971: void ExtractTarget (char *aName, char *target)
1.79 cvs 972: {
1.106 cvs 973: int lg, i;
974: char *ptr;
975: char *oldptr;
1.79 cvs 976:
977: if (!target || !aName)
978: /* bad target */
979: return;
980:
1.106 cvs 981: target[0] = EOS;
982: lg = strlen (aName);
1.79 cvs 983: if (lg)
984: {
985: /* the name is not empty */
986: oldptr = ptr = &aName[0];
987: do
988: {
1.106 cvs 989: ptr = strrchr (oldptr, '#');
1.79 cvs 990: if (ptr)
991: oldptr = &ptr[1];
992: }
993: while (ptr);
994:
995: i = (int) (oldptr) - (int) (aName); /* name length */
996: if (i > 1)
997: {
1.106 cvs 998: aName[i - 1] = EOS;
1.79 cvs 999: if (i != lg)
1.106 cvs 1000: strcpy (target, oldptr);
1.79 cvs 1001: }
1002: }
1003: }
1004:
1005: /*----------------------------------------------------------------------
1.90 cvs 1006: RemoveNewLines (text)
1007: Removes any '\n' chars that are found in text.
1008: Returns TRUE if it did the operation, FALSE otherwise.
1.73 cvs 1009: ----------------------------------------------------------------------*/
1.106 cvs 1010: ThotBool RemoveNewLines (char *text)
1011: {
1012: ThotBool change = FALSE;
1013: char *src;
1014: char *dest;
1.90 cvs 1015:
1016: src = text;
1017: dest = text;
1.115 kahan 1018:
1019: /* remove any preceding whitespace */
1020: while (*src && *src == ' ')
1021: {
1022: src++;
1023: change = 1;
1024: }
1025:
1.90 cvs 1026: while (*src)
1027: {
1028: switch (*src)
1029: {
1.106 cvs 1030: case '\n':
1.90 cvs 1031: /* don't copy the newline */
1032: change = 1;
1033: break;
1034: default:
1035: *dest = *src;
1036: dest++;
1037: break;
1038: }
1039: src++;
1040: }
1041: /* copy the last EOS char */
1042: *dest = *src;
1043:
1044: return (change);
1045: }
1046:
1047: /*----------------------------------------------------------------------
1048: CleanCopyFileURL
1049: Copies a file url from a src string to destination string.
1.97 cvs 1050: convertion says which type of convertion (none, %xx, URL_SEP into DIR_SEP
1051: we want to do).
1.90 cvs 1052: ----------------------------------------------------------------------*/
1.106 cvs 1053: static void CleanCopyFileURL (char *dest, char *src,
1054: ConvertionType convertion)
1.90 cvs 1055: {
1056: while (*src)
1.89 cvs 1057: {
1.90 cvs 1058: switch (*src)
1.89 cvs 1059: {
1060: #ifdef _WINDOWS
1.106 cvs 1061: case URL_SEP:
1.96 cvs 1062: /* make DIR_SEP transformation */
1.97 cvs 1063: if (convertion & AM_CONV_URL_SEP)
1.106 cvs 1064: *dest = DIR_SEP;
1.96 cvs 1065: else
1066: *dest = *src;
1.90 cvs 1067: dest++;
1.96 cvs 1068: src++;
1.90 cvs 1069: break;
1.89 cvs 1070: #endif /* _WINDOWS */
1.96 cvs 1071:
1.106 cvs 1072: case '%':
1.97 cvs 1073: if (convertion & AM_CONV_PERCENT)
1.96 cvs 1074: {
1.97 cvs 1075: /* (code adapted from libwww's HTUnEscape function */
1.96 cvs 1076: src++;
1.106 cvs 1077: if (*src != EOS)
1.97 cvs 1078: {
1079: *dest = UnEscapeChar (*src) * 16;
1080: src++;
1081: }
1.106 cvs 1082: if (*src != EOS)
1.97 cvs 1083: {
1084: *dest = *dest + UnEscapeChar (*src);
1085: src++;
1086: }
1087: dest++;
1.96 cvs 1088: }
1.97 cvs 1089: else
1.96 cvs 1090: {
1.97 cvs 1091: *dest = *src;
1092: dest++;
1.96 cvs 1093: src++;
1094: }
1095: break;
1096:
1.90 cvs 1097: default:
1098: *dest = *src;
1.89 cvs 1099: dest++;
1.96 cvs 1100: src++;
1.90 cvs 1101: break;
1.89 cvs 1102: }
1103: }
1.90 cvs 1104: /* copy the EOS char */
1105: *dest = *src;
1.73 cvs 1106: }
1.40 cvs 1107:
1108: /*----------------------------------------------------------------------
1.9 cvs 1109: NormalizeURL
1110: normalizes orgName according to a base associated with doc, and
1111: following the standard URL format rules.
1.113 cvs 1112: if doc is < 0, use as a base the URL of the document that contains
1113: (or contained) the elements that are now in the copy/cut buffer.
1.53 cvs 1114: if doc is 0 and otherPath not NULL, normalizes orgName according to this
1115: other path.
1.9 cvs 1116: The function returns the new complete and normalized URL
1.12 cvs 1117: or file name path (newName) and the name of the document (docName).
1.9 cvs 1118: N.B. If the function can't find out what's the docName, it assigns
1119: the name "noname.html".
1.4 cvs 1120: ----------------------------------------------------------------------*/
1.106 cvs 1121: void NormalizeURL (char *orgName, Document doc, char *newName,
1122: char *docName, char *otherPath)
1123: {
1124: char *basename;
1125: char tempOrgName[MAX_LENGTH];
1126: char *ptr;
1127: char used_sep;
1.84 cvs 1128: int length;
1129: ThotBool check;
1.5 cvs 1130:
1.110 cvs 1131: #ifdef _WINDOWS
1.44 cvs 1132: int ndx;
1.110 cvs 1133: #endif /* _WINDOWS */
1.44 cvs 1134:
1.5 cvs 1135: if (!newName || !docName)
1136: return;
1.18 cvs 1137:
1.113 cvs 1138: if (doc < 0)
1139: basename = TtaStrdup (SavedDocumentURL);
1140: else if (doc > 0)
1.53 cvs 1141: basename = GetBaseURL (doc);
1142: else if (otherPath != NULL)
1.108 cvs 1143: basename = TtaStrdup (otherPath);
1.32 cvs 1144: else
1.53 cvs 1145: basename = NULL;
1.32 cvs 1146:
1.18 cvs 1147: /*
1.31 cvs 1148: * Clean orgName
1149: * Make sure we have a complete orgName, without any leading or trailing
1150: * white spaces, or trailinbg new lines
1151: */
1.5 cvs 1152: ptr = orgName;
1.18 cvs 1153: /* skip leading white space and new line characters */
1.106 cvs 1154: while ((*ptr == SPACE || *ptr == EOL) && *ptr++ != EOS);
1155: strncpy (tempOrgName, ptr, MAX_LENGTH -1);
1156: tempOrgName[MAX_LENGTH -1] = EOS;
1.18 cvs 1157: /*
1.31 cvs 1158: * Make orgName a complete URL
1159: * If the URL does not include a protocol, then try to calculate
1160: * one using the doc's base element (if it exists),
1161: */
1.106 cvs 1162: if (tempOrgName[0] == EOS)
1.53 cvs 1163: {
1.106 cvs 1164: newName[0] = EOS;
1165: docName[0] = EOS;
1.53 cvs 1166: TtaFreeMemory (basename);
1167: return;
1168: }
1.49 cvs 1169:
1170: /* clean trailing white space */
1.106 cvs 1171: length = strlen (tempOrgName) - 1;
1172: while (tempOrgName[length] == SPACE && tempOrgName[length] == EOL)
1.53 cvs 1173: {
1.106 cvs 1174: tempOrgName[length] = EOS;
1.53 cvs 1175: length--;
1176: }
1.50 cvs 1177:
1.55 cvs 1178: /* remove extra dot (which dot???) */
1179: /* ugly, but faster than a strcmp */
1.106 cvs 1180: if (tempOrgName[length] == '.'
1181: && (length == 0 || tempOrgName[length-1] != '.'))
1182: tempOrgName[length] = EOS;
1.50 cvs 1183:
1.94 cvs 1184: if (IsW3Path (tempOrgName))
1.53 cvs 1185: {
1186: /* the name is complete, go to the Sixth Step */
1.106 cvs 1187: strcpy (newName, tempOrgName);
1.53 cvs 1188: SimplifyUrl (&newName);
1189: /* verify if the URL has the form "protocol://server:port" */
1.110 cvs 1190: ptr = AmayaParseUrl (newName, "", AMAYA_PARSE_ACCESS |
1191: AMAYA_PARSE_HOST |
1192: AMAYA_PARSE_PUNCTUATION);
1193: if (ptr && !strcmp (ptr, newName))
1194: /* it has this form, we complete it by adding a DIR_STR */
1.106 cvs 1195: strcat (newName, URL_STR);
1.49 cvs 1196:
1.53 cvs 1197: if (ptr)
1.50 cvs 1198: TtaFreeMemory (ptr);
1.53 cvs 1199: }
1.113 cvs 1200: else if (basename == NULL)
1.53 cvs 1201: /* the name is complete, go to the Sixth Step */
1.106 cvs 1202: strcpy (newName, tempOrgName);
1.53 cvs 1203: else
1204: {
1.31 cvs 1205: /* Calculate the absolute URL, using the base or document URL */
1.110 cvs 1206: #ifdef _WINDOWS
1.53 cvs 1207: if (!IsW3Path (basename))
1208: {
1.106 cvs 1209: length = strlen (tempOrgName);
1.53 cvs 1210: for (ndx = 0; ndx < length; ndx++)
1.106 cvs 1211: if (tempOrgName [ndx] == '/')
1212: tempOrgName [ndx] = '\\';
1.53 cvs 1213: }
1.110 cvs 1214: #endif /* _WINDOWS */
1.25 cvs 1215: ptr = AmayaParseUrl (tempOrgName, basename, AMAYA_PARSE_ALL);
1.53 cvs 1216: if (ptr)
1217: {
1218: SimplifyUrl (&ptr);
1.106 cvs 1219: strcpy (newName, ptr);
1.53 cvs 1220: TtaFreeMemory (ptr);
1221: }
1222: else
1.106 cvs 1223: newName[0] = EOS;
1.53 cvs 1224: }
1.36 cvs 1225:
1226: TtaFreeMemory (basename);
1.18 cvs 1227: /*
1.31 cvs 1228: * Prepare the docname that will refer to this ressource in the
1229: * .amaya directory. If the new URL finishes on DIR_SEP, then use
1230: * noname.html as a default ressource name
1.18 cvs 1231: */
1.106 cvs 1232: if (newName[0] != EOS)
1.53 cvs 1233: {
1.106 cvs 1234: length = strlen (newName) - 1;
1235: if (newName[length] == URL_SEP || newName[length] == DIR_SEP)
1.53 cvs 1236: {
1237: used_sep = newName[length];
1238: check = TRUE;
1239: while (check)
1240: {
1.50 cvs 1241: length--;
1242: while (length >= 0 && newName[length] != used_sep)
1.53 cvs 1243: length--;
1.106 cvs 1244: if (!strncmp (&newName[length+1], "..", 2))
1.53 cvs 1245: {
1.106 cvs 1246: newName[length+1] = EOS;
1.53 cvs 1247: /* remove also previous directory */
1248: length--;
1249: while (length >= 0 && newName[length] != used_sep)
1250: length--;
1.106 cvs 1251: if (strncmp (&newName[length+1], "//", 2))
1.53 cvs 1252: /* don't remove server name */
1.106 cvs 1253: newName[length+1] = EOS;
1.53 cvs 1254: }
1.106 cvs 1255: else if (!strncmp (&newName[length+1], ".", 1))
1256: newName[length+1] = EOS;
1.50 cvs 1257: else
1.53 cvs 1258: check = FALSE;
1259: }
1260: /* docname was not comprised inside the URL, so let's */
1261: /* assign the default ressource name */
1.106 cvs 1262: strcpy (docName, "noname.html");
1.53 cvs 1263: }
1264: else
1265: { /* docname is comprised inside the URL */
1.110 cvs 1266: while (length >= 0 && newName[length] != URL_SEP &&
1267: newName[length] != DIR_SEP)
1.53 cvs 1268: length--;
1269: if (length < 0)
1.106 cvs 1270: strcpy (docName, newName);
1.53 cvs 1271: else
1.106 cvs 1272: strcpy (docName, &newName[length+1]);
1.53 cvs 1273: }
1274: }
1275: else
1.106 cvs 1276: docName[0] = EOS;
1.18 cvs 1277: }
1.3 cvs 1278:
1.4 cvs 1279: /*----------------------------------------------------------------------
1.9 cvs 1280: IsSameHost
1.4 cvs 1281: ----------------------------------------------------------------------*/
1.106 cvs 1282: ThotBool IsSameHost (const char *url1, const char *url2)
1.3 cvs 1283: {
1.106 cvs 1284: char *basename_ptr1, *basename_ptr2;
1285: ThotBool result;
1.3 cvs 1286:
1.106 cvs 1287: basename_ptr1 = AmayaParseUrl (url1, "",
1288: AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION);
1289: basename_ptr2 = AmayaParseUrl (url2, "",
1290: AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION);
1.3 cvs 1291:
1.106 cvs 1292: if (strcmp (basename_ptr1, basename_ptr2))
1293: result = FALSE;
1294: else
1295: result = TRUE;
1296: TtaFreeMemory (basename_ptr1);
1297: TtaFreeMemory (basename_ptr2);
1298: return (result);
1.3 cvs 1299: }
1300:
1301:
1.4 cvs 1302: /*----------------------------------------------------------------------
1.22 cvs 1303: HasKnownFileSuffix
1304: returns TRUE if path points to a file ending with a suffix.
1305: ----------------------------------------------------------------------*/
1.106 cvs 1306: ThotBool HasKnownFileSuffix (const char *path)
1307: {
1308: char *root;
1309: char temppath[MAX_LENGTH];
1310: char suffix[MAX_LENGTH];
1.22 cvs 1311:
1.106 cvs 1312: if (!path || path[0] == EOS || path[strlen(path)] == DIR_SEP)
1.22 cvs 1313: return (FALSE);
1314:
1.106 cvs 1315: root = AmayaParseUrl(path, "", AMAYA_PARSE_PATH | AMAYA_PARSE_PUNCTUATION);
1.22 cvs 1316:
1317: if (root)
1318: {
1.106 cvs 1319: strcpy (temppath, root);
1.25 cvs 1320: TtaFreeMemory (root);
1.22 cvs 1321: /* Get the suffix */
1322: ExtractSuffix (temppath, suffix);
1323:
1.106 cvs 1324: if( suffix[0] == EOS)
1.22 cvs 1325: /* no suffix */
1326: return (FALSE);
1327:
1328: /* Normalize the suffix */
1329: ConvertToLowerCase (suffix);
1330:
1.106 cvs 1331: if (!strcmp (suffix, "gz"))
1.22 cvs 1332: /* skip the compressed suffix */
1333: {
1334: ExtractSuffix (temppath, suffix);
1.106 cvs 1335: if(suffix[0] == EOS)
1.22 cvs 1336: /* no suffix */
1337: return (FALSE);
1338: /* Normalize the suffix */
1339: ConvertToLowerCase (suffix);
1340: }
1341:
1.106 cvs 1342: if (strcmp (suffix, "gif") &&
1343: strcmp (suffix, "xbm") &&
1344: strcmp (suffix, "xpm") &&
1345: strcmp (suffix, "jpg") &&
1346: strcmp (suffix, "pdf") &&
1347: strcmp (suffix, "png") &&
1348: strcmp (suffix, "tgz") &&
1349: strcmp (suffix, "xpg") &&
1350: strcmp (suffix, "xpd") &&
1351: strcmp (suffix, "ps") &&
1352: strcmp (suffix, "au") &&
1353: strcmp (suffix, "html") &&
1354: strcmp (suffix, "htm") &&
1355: strcmp (suffix, "shtml") &&
1356: strcmp (suffix, "xht") &&
1357: strcmp (suffix, "xhtm") &&
1358: strcmp (suffix, "xhtml") &&
1359: strcmp (suffix, "txt") &&
1360: strcmp (suffix, "css") &&
1361: strcmp (suffix, "eps"))
1.22 cvs 1362: return (FALSE);
1363: else
1364: return (TRUE);
1365: }
1366: else
1367: return (FALSE);
1368: }
1369:
1370:
1371: /*----------------------------------------------------------------------
1.24 cvs 1372: ChopURL
1373: Gives back a URL no longer than MAX_PRINT_URL_LENGTH chars (outputURL).
1374: If inputURL is bigger than that size, outputURL receives
1375: MAX_PRINT_URL_LENGTH / 2 chars from the beginning of inputURL, "...",
1376: and MAX_PRINT_URL_LENGTH / 2 chars from the end of inputURL.
1377: If inputURL is not longer than MAX_PRINT_URL_LENGTH chars, it gets
1378: copied into outputURL.
1379: N.B.: outputURL must point to a memory block of MAX_PRINT_URL_LENGTH
1380: chars.
1381: ----------------------------------------------------------------------*/
1.106 cvs 1382: void ChopURL (char *outputURL, const char *inputURL)
1.24 cvs 1383: {
1384: int len;
1.9 cvs 1385:
1.106 cvs 1386: len = strlen (inputURL);
1.24 cvs 1387: if (len <= MAX_PRINT_URL_LENGTH)
1.106 cvs 1388: strcpy (outputURL, inputURL);
1.24 cvs 1389: else
1390: /* make a truncated urlName on the status window */
1391: {
1.106 cvs 1392: strncpy (outputURL, inputURL, MAX_PRINT_URL_LENGTH / 2);
1393: outputURL [MAX_PRINT_URL_LENGTH / 2] = EOS;
1394: strcat (outputURL, "...");
1395: strcat (outputURL, &(inputURL[len - MAX_PRINT_URL_LENGTH / 2 ]));
1.24 cvs 1396: }
1.25 cvs 1397: }
1398:
1399:
1400: /*----------------------------------------------------------------------
1401: scan
1.47 cvs 1402: Scan a filename for its constituents
1.25 cvs 1403: -----------------------------------
1404:
1405: On entry,
1406: name points to a document name which may be incomplete.
1407: On exit,
1408: absolute or relative may be nonzero (but not both).
1409: host, fragment and access may be nonzero if they were specified.
1410: Any which are nonzero point to zero terminated strings.
1411: ----------------------------------------------------------------------*/
1.106 cvs 1412: static void scan (char *name, HTURI *parts)
1.25 cvs 1413: {
1.106 cvs 1414: char * p;
1415: char * after_access = name;
1.32 cvs 1416:
1.43 cvs 1417: memset (parts, '\0', sizeof (HTURI));
1.28 cvs 1418: /* Look for fragment identifier */
1.106 cvs 1419: if ((p = strchr(name, '#')) != NULL)
1.28 cvs 1420: {
1.106 cvs 1421: *p++ = '\0';
1.28 cvs 1422: parts->fragment = p;
1.25 cvs 1423: }
1424:
1.28 cvs 1425: for (p=name; *p; p++)
1426: {
1.106 cvs 1427: if (*p == URL_SEP || *p == DIR_SEP || *p == '#' || *p == '?')
1.28 cvs 1428: break;
1.106 cvs 1429: if (*p == ':')
1.28 cvs 1430: {
1431: *p = 0;
1432: parts->access = after_access; /* Scheme has been specified */
1433:
1434: /* The combination of gcc, the "-O" flag and the HP platform is
1435: unhealthy. The following three lines is a quick & dirty fix, but is
1436: not recommended. Rather, turn off "-O". */
1437:
1438: /* after_access = p;*/
1439: /* while (*after_access == 0)*/
1440: /* after_access++;*/
1441: after_access = p+1;
1.106 cvs 1442: if (!strcasecmp("URL", parts->access))
1.28 cvs 1443: /* Ignore IETF's URL: pre-prefix */
1444: parts->access = NULL;
1445: else
1.25 cvs 1446: break;
1447: }
1448: }
1449:
1450: p = after_access;
1.43 cvs 1451: if (*p == URL_SEP || *p == DIR_SEP)
1.28 cvs 1452: {
1.43 cvs 1453: if (p[1] == URL_SEP)
1.28 cvs 1454: {
1.25 cvs 1455: parts->host = p+2; /* host has been specified */
1.28 cvs 1456: *p = 0; /* Terminate access */
1457: /* look for end of host name if any */
1.106 cvs 1458: p = strchr (parts->host, URL_SEP);
1.28 cvs 1459: if (p)
1460: {
1.106 cvs 1461: *p = EOS; /* Terminate host */
1.25 cvs 1462: parts->absolute = p+1; /* Root has been found */
1.28 cvs 1463: }
1464: }
1465: else
1466: /* Root found but no host */
1467: parts->absolute = p+1;
1468: }
1469: else
1470: {
1.25 cvs 1471: parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
1.28 cvs 1472: }
1.25 cvs 1473: }
1474:
1475:
1476: /*----------------------------------------------------------------------
1.28 cvs 1477: AmayaParseUrl: parse a Name relative to another name
1478:
1479: This returns those parts of a name which are given (and requested)
1480: substituting bits from the related name where necessary.
1.25 cvs 1481:
1.28 cvs 1482: On entry,
1.25 cvs 1483: aName A filename given
1484: relatedName A name relative to which aName is to be parsed. Give
1485: it an empty string if aName is absolute.
1486: wanted A mask for the bits which are wanted.
1487:
1.28 cvs 1488: On exit,
1.25 cvs 1489: returns A pointer to a malloc'd string which MUST BE FREED
1490: ----------------------------------------------------------------------*/
1.106 cvs 1491: char *AmayaParseUrl (const char *aName, char *relatedName, int wanted)
1492: {
1493: char *return_value;
1494: char result[MAX_LENGTH];
1495: char name[MAX_LENGTH];
1496: char rel[MAX_LENGTH];
1497: char *p, *access;
1.29 cvs 1498: HTURI given, related;
1499: int len;
1.106 cvs 1500: char used_sep;
1501: char *used_str;
1.32 cvs 1502:
1.106 cvs 1503: if (strchr (aName, DIR_SEP) || strchr (relatedName, DIR_SEP))
1.33 cvs 1504: {
1.106 cvs 1505: used_str = DIR_STR;
1506: used_sep = DIR_SEP;
1.33 cvs 1507: }
1.32 cvs 1508: else
1.33 cvs 1509: {
1.106 cvs 1510: used_str = URL_STR;
1511: used_sep = URL_SEP;
1.33 cvs 1512: }
1.32 cvs 1513:
1.29 cvs 1514: /* Make working copies of input strings to cut up: */
1515: return_value = NULL;
1516: result[0] = 0; /* Clear string */
1.106 cvs 1517: strcpy (name, aName);
1.29 cvs 1518: if (relatedName != NULL)
1.106 cvs 1519: strcpy (rel, relatedName);
1.29 cvs 1520: else
1.106 cvs 1521: relatedName[0] = EOS;
1.29 cvs 1522:
1523: scan (name, &given);
1524: scan (rel, &related);
1525: access = given.access ? given.access : related.access;
1526: if (wanted & AMAYA_PARSE_ACCESS)
1527: if (access)
1528: {
1.106 cvs 1529: strcat (result, access);
1.29 cvs 1530: if(wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1531: strcat (result, ":");
1.29 cvs 1532: }
1533:
1534: if (given.access && related.access)
1535: /* If different, inherit nothing. */
1.106 cvs 1536: if (strcmp (given.access, related.access) != 0)
1.29 cvs 1537: {
1538: related.host = 0;
1539: related.absolute = 0;
1540: related.relative = 0;
1541: related.fragment = 0;
1542: }
1543:
1544: if (wanted & AMAYA_PARSE_HOST)
1545: if(given.host || related.host)
1546: {
1547: if(wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1548: strcat (result, "//");
1549: strcat (result, given.host ? given.host : related.host);
1.29 cvs 1550: }
1551:
1552: if (given.host && related.host)
1553: /* If different hosts, inherit no path. */
1.106 cvs 1554: if (strcmp (given.host, related.host) != 0)
1.29 cvs 1555: {
1556: related.absolute = 0;
1557: related.relative = 0;
1558: related.fragment = 0;
1559: }
1560:
1561: if (wanted & AMAYA_PARSE_PATH)
1562: {
1563: if (given.absolute)
1564: {
1565: /* All is given */
1566: if (wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1567: strcat (result, used_str);
1568: strcat (result, given.absolute);
1.25 cvs 1569: }
1.29 cvs 1570: else if (related.absolute)
1571: {
1572: /* Adopt path not name */
1.106 cvs 1573: strcat (result, used_str);
1574: strcat (result, related.absolute);
1.29 cvs 1575: if (given.relative)
1576: {
1577: /* Search part? */
1.106 cvs 1578: p = strchr (result, '?');
1.29 cvs 1579: if (!p)
1.106 cvs 1580: p=result+strlen(result)-1;
1.33 cvs 1581: for (; *p!=used_sep; p--); /* last / */
1.29 cvs 1582: /* Remove filename */
1583: p[1]=0;
1584: /* Add given one */
1.106 cvs 1585: strcat (result, given.relative);
1.25 cvs 1586: }
1587: }
1.29 cvs 1588: else if (given.relative)
1589: /* what we've got */
1.106 cvs 1590: strcat (result, given.relative);
1.29 cvs 1591: else if (related.relative)
1.106 cvs 1592: strcat (result, related.relative);
1.29 cvs 1593: else
1594: /* No inheritance */
1.106 cvs 1595: strcat (result, used_str);
1.25 cvs 1596: }
1.29 cvs 1597:
1598: if (wanted & AMAYA_PARSE_ANCHOR)
1599: if (given.fragment || related.fragment)
1600: {
1601: if (given.absolute && given.fragment)
1602: {
1603: /*Fixes for relURLs...*/
1604: if (wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1605: strcat (result, "#");
1606: strcat (result, given.fragment);
1.29 cvs 1607: }
1608: else if (!(given.absolute) && !(given.fragment))
1.106 cvs 1609: strcat (result, "");
1.29 cvs 1610: else
1611: {
1.110 cvs 1612: if (wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1613: strcat (result, "#");
1.110 cvs 1614: strcat (result, given.fragment ? given.fragment : related.fragment);
1.29 cvs 1615: }
1616: }
1.106 cvs 1617: len = strlen (result);
1618: if ((return_value = TtaGetMemory (len + 1)) != NULL)
1619: strcpy (return_value, result);
1.29 cvs 1620: return (return_value); /* exactly the right length */
1.25 cvs 1621: }
1622:
1623: /*----------------------------------------------------------------------
1624: HTCanon
1625: Canonicalizes the URL in the following manner starting from the host
1626: pointer:
1627:
1628: 1) The host name is converted to lowercase
1629: 2) Chop off port if `:80' (http), `:70' (gopher), or `:21' (ftp)
1630:
1631: Return: OK The position of the current path part of the URL
1632: which might be the old one or a new one.
1633:
1634: ----------------------------------------------------------------------*/
1.106 cvs 1635: static char *HTCanon (char **filename, char *host)
1636: {
1637: char *newname = NULL;
1638: char used_sep;
1639: char *path;
1640: char *strptr;
1641: char *port;
1642: char *access = host-3;
1643:
1644: if (*filename && strchr (*filename, URL_SEP))
1645: used_sep = URL_SEP;
1.33 cvs 1646: else
1.106 cvs 1647: used_sep = DIR_SEP;
1.32 cvs 1648:
1.110 cvs 1649: while (access > *filename && *(access - 1) != used_sep) /* Find access method */
1.25 cvs 1650: access--;
1.110 cvs 1651: if ((path = strchr (host, used_sep)) == NULL) /* Find path */
1.106 cvs 1652: path = host + strlen (host);
1653: if ((strptr = strchr (host, '@')) != NULL && strptr < path) /* UserId */
1.82 cvs 1654: host = strptr;
1.110 cvs 1655: if ((port = strchr (host, ':')) != NULL && port > path) /* Port number */
1.82 cvs 1656: port = NULL;
1.25 cvs 1657:
1658: strptr = host; /* Convert to lower-case */
1.82 cvs 1659: while (strptr < path)
1.33 cvs 1660: {
1.84 cvs 1661: *strptr = utolower (*strptr);
1.82 cvs 1662: strptr++;
1.33 cvs 1663: }
1.25 cvs 1664:
1665: /* Does the URL contain a full domain name? This also works for a
1666: numerical host name. The domain name is already made lower-case
1667: and without a trailing dot. */
1668: {
1.106 cvs 1669: char *dot = port ? port : path;
1670: if (dot > *filename && *--dot == '.')
1.33 cvs 1671: {
1.106 cvs 1672: char *orig = dot;
1673: char *dest = dot + 1;
1.82 cvs 1674: while ((*orig++ = *dest++));
1675: if (port) port--;
1.33 cvs 1676: path--;
1.25 cvs 1677: }
1678: }
1679: /* Chop off port if `:', `:80' (http), `:70' (gopher), or `:21' (ftp) */
1.33 cvs 1680: if (port)
1681: {
1.82 cvs 1682: if (!*(port+1) || *(port+1) == used_sep)
1.33 cvs 1683: {
1684: if (!newname)
1685: {
1.106 cvs 1686: char *orig = port;
1687: char *dest = port + 1;
1.82 cvs 1688: while ((*orig++ = *dest++));
1.33 cvs 1689: }
1690: }
1.106 cvs 1691: else if ((!strncmp (access, "http", 4) &&
1692: (*(port + 1) == '8' &&
1693: *(port+2) == '0' &&
1.82 cvs 1694: (*(port+3) == used_sep || !*(port + 3)))) ||
1.106 cvs 1695: (!strncmp (access, "gopher", 6) &&
1696: (*(port+1) == '7' &&
1697: *(port+2) == '0' &&
1.82 cvs 1698: (*(port+3) == used_sep || !*(port+3)))) ||
1.106 cvs 1699: (!strncmp (access, "ftp", 3) &&
1700: (*(port+1) == '2' &&
1701: *(port + 2) == '1' &&
1.82 cvs 1702: (*(port+3) == used_sep || !*(port+3))))) {
1.33 cvs 1703: if (!newname)
1704: {
1.106 cvs 1705: char *orig = port;
1706: char *dest = port + 3;
1.33 cvs 1707: while((*orig++ = *dest++));
1708: /* Update path position, Henry Minsky */
1709: path -= 3;
1.25 cvs 1710: }
1.33 cvs 1711: }
1712: else if (newname)
1.106 cvs 1713: strncat (newname, port, (int) (path - port));
1.33 cvs 1714: }
1.25 cvs 1715:
1.33 cvs 1716: if (newname)
1717: {
1.106 cvs 1718: char *newpath = newname + strlen (newname);
1719: strcat (newname, path);
1.25 cvs 1720: path = newpath;
1.28 cvs 1721: /* Free old copy */
1722: TtaFreeMemory(*filename);
1.25 cvs 1723: *filename = newname;
1.33 cvs 1724: }
1.25 cvs 1725: return path;
1726: }
1727:
1728:
1729: /*----------------------------------------------------------------------
1.29 cvs 1730: SimplifyUrl: simplify a URI
1.32 cvs 1731: A URI is allowed to contain the sequence xxx/../ which may be
1732: replaced by "" , and the sequence "/./" which may be replaced by DIR_STR.
1.28 cvs 1733: Simplification helps us recognize duplicate URIs.
1.25 cvs 1734:
1.28 cvs 1735: Thus, /etc/junk/../fred becomes /etc/fred
1736: /etc/junk/./fred becomes /etc/junk/fred
1.25 cvs 1737:
1.28 cvs 1738: but we should NOT change
1739: http://fred.xxx.edu/../..
1.25 cvs 1740:
1741: or ../../albert.html
1742:
1.28 cvs 1743: In order to avoid empty URLs the following URLs become:
1.25 cvs 1744:
1745: /fred/.. becomes /fred/..
1746: /fred/././.. becomes /fred/..
1747: /fred/.././junk/.././ becomes /fred/..
1748:
1.28 cvs 1749: If more than one set of `://' is found (several proxies in cascade) then
1750: only the part after the last `://' is simplified.
1.25 cvs 1751:
1.28 cvs 1752: Returns: A string which might be the old one or a new one.
1.25 cvs 1753: ----------------------------------------------------------------------*/
1.106 cvs 1754: void SimplifyUrl (char **url)
1755: {
1756: char *path;
1757: char *access;
1758: char *newptr;
1759: char *p;
1760: char *orig, *dest, *end;
1.28 cvs 1761:
1.106 cvs 1762: char used_sep;
1.77 cvs 1763: ThotBool ddot_simplify; /* used to desactivate the double dot simplifcation:
1764: something/../ simplification in relative URLs when they start with a ../ */
1.32 cvs 1765:
1766:
1.28 cvs 1767: if (!url || !*url)
1768: return;
1769:
1.106 cvs 1770: if (strchr (*url, URL_SEP))
1771: used_sep = URL_SEP;
1.32 cvs 1772: else
1.106 cvs 1773: used_sep = DIR_SEP;
1.32 cvs 1774:
1.77 cvs 1775: /* should we simplify double dot? */
1776: path = *url;
1.106 cvs 1777: if (*path == '.' && *(path + 1) == '.')
1.77 cvs 1778: ddot_simplify = FALSE;
1779: else
1780: ddot_simplify = TRUE;
1781:
1.28 cvs 1782: /* Find any scheme name */
1.106 cvs 1783: if ((path = strstr (*url, "://")) != NULL)
1.33 cvs 1784: {
1785: /* Find host name */
1.28 cvs 1786: access = *url;
1.84 cvs 1787: while (access < path && (*access = utolower (*access)))
1.82 cvs 1788: access++;
1.28 cvs 1789: path += 3;
1.106 cvs 1790: while ((newptr = strstr (path, "://")) != NULL)
1.82 cvs 1791: /* For proxies */
1.106 cvs 1792: path = newptr + 3;
1.82 cvs 1793: /* We have a host name */
1.84 cvs 1794: path = HTCanon (url, path);
1.25 cvs 1795: }
1.106 cvs 1796: else if ((path = strstr (*url, ":/")) != NULL)
1.28 cvs 1797: path += 2;
1798: else
1799: path = *url;
1.25 cvs 1800:
1.84 cvs 1801: if (*path == used_sep && *(path+1) == used_sep)
1.28 cvs 1802: /* Some URLs start //<foo> */
1803: path += 1;
1.94 cvs 1804: else if (IsFilePath (path))
1805: {
1806: /* doesn't need to do anything more */
1807: return;
1808: }
1.106 cvs 1809: else if (!strncmp (path, "news:", 5))
1.28 cvs 1810: {
1.106 cvs 1811: newptr = strchr (path+5, '@');
1.28 cvs 1812: if (!newptr)
1813: newptr = path + 5;
1814: while (*newptr)
1815: {
1816: /* Make group or host lower case */
1.84 cvs 1817: *newptr = utolower (*newptr);
1.28 cvs 1818: newptr++;
1.25 cvs 1819: }
1.28 cvs 1820: /* Doesn't need to do any more */
1821: return;
1.25 cvs 1822: }
1.28 cvs 1823:
1824: if ((p = path))
1825: {
1.106 cvs 1826: if (!((end = strchr (path, ';')) || (end = strchr (path, '?')) ||
1827: (end = strchr (path, '#'))))
1828: end = path + strlen (path);
1.28 cvs 1829:
1830: /* Parse string second time to simplify */
1831: p = path;
1832: while (p < end)
1833: {
1.110 cvs 1834: /* if we're pointing to a char, it's safe to reactivate the
1835: ../ convertion */
1.106 cvs 1836: if (!ddot_simplify && *p != '.' && *p != used_sep)
1.77 cvs 1837: ddot_simplify = TRUE;
1838:
1.33 cvs 1839: if (*p==used_sep)
1.28 cvs 1840: {
1.106 cvs 1841: if (p > *url && *(p+1) == '.' && (*(p+2) == used_sep || !*(p+2)))
1.28 cvs 1842: {
1843: orig = p + 1;
1.84 cvs 1844: dest = (*(p+2) != used_sep) ? p+2 : p+3;
1.52 cvs 1845: while ((*orig++ = *dest++)); /* Remove a used_sep and a dot*/
1.28 cvs 1846: end = orig - 1;
1847: }
1.106 cvs 1848: else if (ddot_simplify && *(p+1) == '.' && *(p+2) == '.'
1.77 cvs 1849: && (*(p+3) == used_sep || !*(p+3)))
1.28 cvs 1850: {
1851: newptr = p;
1.52 cvs 1852: while (newptr>path && *--newptr!=used_sep); /* prev used_sep */
1853: if (*newptr == used_sep)
1854: orig = newptr + 1;
1.28 cvs 1855: else
1.52 cvs 1856: orig = newptr;
1857:
1858: dest = (*(p+3) != used_sep) ? p+3 : p+4;
1859: while ((*orig++ = *dest++)); /* Remove /xxx/.. */
1860: end = orig-1;
1861: /* Start again with prev slash */
1862: p = newptr;
1.28 cvs 1863: }
1.33 cvs 1864: else if (*(p+1) == used_sep)
1.28 cvs 1865: {
1.33 cvs 1866: while (*(p+1) == used_sep)
1.28 cvs 1867: {
1868: orig = p;
1869: dest = p + 1;
1870: while ((*orig++ = *dest++)); /* Remove multiple /'s */
1871: end = orig-1;
1872: }
1873: }
1874: else
1.25 cvs 1875: p++;
1.28 cvs 1876: }
1877: else
1878: p++;
1.25 cvs 1879: }
1880: }
1.51 cvs 1881:
1882: /*
1883: ** Check for host/../.. kind of things
1884: */
1.106 cvs 1885: if (*path == used_sep && *(path+1) == '.' && *(path+2) == '.'
1.77 cvs 1886: && (!*(path+3) || *(path+3) == used_sep))
1.106 cvs 1887: *(path+1) = EOS;
1.51 cvs 1888:
1.28 cvs 1889: return;
1890: }
1891:
1892:
1893: /*----------------------------------------------------------------------
1.96 cvs 1894: NormalizeFile normalizes local names.
1.28 cvs 1895: Return TRUE if target and src differ.
1896: ----------------------------------------------------------------------*/
1.106 cvs 1897: ThotBool NormalizeFile (char *src, char *target, ConvertionType convertion)
1.28 cvs 1898: {
1.110 cvs 1899: #ifndef _WINDOWS
1.106 cvs 1900: char *s;
1.93 cvs 1901: int i;
1.110 cvs 1902: #endif /* !_WINDOWS */
1.82 cvs 1903: ThotBool change;
1.90 cvs 1904: int start_index; /* the first char that we'll copy */
1.28 cvs 1905:
1.54 cvs 1906: change = FALSE;
1.90 cvs 1907: start_index = 0;
1908:
1.106 cvs 1909: if (!src || src[0] == EOS)
1.96 cvs 1910: {
1.106 cvs 1911: target[0] = EOS;
1.96 cvs 1912: return FALSE;
1913: }
1.90 cvs 1914:
1915: /* @@ do I need file: or file:/ here? */
1.106 cvs 1916: if (strncmp (src, "file:", 5) == 0)
1.28 cvs 1917: {
1.90 cvs 1918: /* remove the prefix file: */
1919: start_index += 5;
1920:
1921: /* remove the localhost prefix */
1.106 cvs 1922: if (strncmp (&src[start_index], "//localhost/", 12) == 0)
1.94 cvs 1923: start_index += 11;
1924:
1925: /* remove the first two slashes in / / /path */
1926: while (src[start_index] &&
1.106 cvs 1927: src[start_index] == '/'
1928: && src[start_index + 1] == '/')
1.94 cvs 1929: start_index++;
1930:
1931: #ifdef _WINDOWS
1932: /* remove any extra slash before the drive name */
1.106 cvs 1933: if (src[start_index] == '/'
1934: &&src[start_index+2] == ':')
1.94 cvs 1935: start_index++;
1936: #endif /* _WINDOWS */
1.90 cvs 1937:
1.106 cvs 1938: if (src[start_index] == EOS)
1.90 cvs 1939: /* if there's nothing afterwards, add a DIR_STR */
1.106 cvs 1940: strcpy (target, DIR_STR);
1.90 cvs 1941: else
1.97 cvs 1942: /* as we're inside a file: URL, we'll apply all the convertions
1943: we know */
1944: CleanCopyFileURL (target, &src[start_index], AM_CONV_ALL);
1.96 cvs 1945:
1946: change = TRUE;
1947: }
1.97 cvs 1948: else if (convertion != AM_CONV_NONE)
1.96 cvs 1949: {
1950: /* we are following a "local" relative link, we do all the
1951: convertions except for the HOME_DIR ~ one */
1.97 cvs 1952: CleanCopyFileURL (target, src, convertion);
1.28 cvs 1953: }
1.90 cvs 1954: #ifndef _WINDOWS
1.106 cvs 1955: else if (src[0] == '~')
1.53 cvs 1956: {
1.96 cvs 1957: /* it must be a URL typed in a text input field */
1958: /* do the HOME_DIR ~ substitution */
1.82 cvs 1959: s = TtaGetEnvString ("HOME");
1.106 cvs 1960: strcpy (target, s);
1.90 cvs 1961: #if 0
1.96 cvs 1962: /* JK: invalidated this part of the code as it's simpler
1963: to add the DIR_SEP whenever we have something to add
1964: to the path rather than adding it systematically */
1.106 cvs 1965: if (src[1] != DIR_SEP)
1966: strcat (target, DIR_STR);
1.90 cvs 1967: #endif
1.106 cvs 1968: i = strlen (target);
1969: strcpy (&target[i], &src[1]);
1.54 cvs 1970: change = TRUE;
1.53 cvs 1971: }
1.90 cvs 1972: #endif /* _WINDOWS */
1.28 cvs 1973: else
1.96 cvs 1974: /* leave it as it is */
1.106 cvs 1975: strcpy (target, src);
1.96 cvs 1976:
1.28 cvs 1977: /* remove /../ and /./ */
1.29 cvs 1978: SimplifyUrl (&target);
1.54 cvs 1979: if (!change)
1.106 cvs 1980: change = strcmp (src, target);
1.28 cvs 1981: return (change);
1.25 cvs 1982: }
1983:
1.28 cvs 1984:
1.25 cvs 1985: /*----------------------------------------------------------------------
1.31 cvs 1986: MakeRelativeURL: make relative name
1.25 cvs 1987:
1.28 cvs 1988: This function creates and returns a string which gives an expression of
1989: one address as related to another. Where there is no relation, an absolute
1990: address is retured.
1.25 cvs 1991:
1.28 cvs 1992: On entry,
1.25 cvs 1993: Both names must be absolute, fully qualified names of nodes
1994: (no fragment bits)
1995:
1.28 cvs 1996: On exit,
1.25 cvs 1997: The return result points to a newly allocated name which, if
1998: parsed by AmayaParseUrl relative to relatedName, will yield aName.
1999: The caller is responsible for freeing the resulting name later.
2000: ----------------------------------------------------------------------*/
1.106 cvs 2001: char *MakeRelativeURL (char *aName, char *relatedName)
2002: {
2003: char *return_value;
2004: char result[MAX_LENGTH];
2005: char *p;
2006: char *q;
2007: char *after_access;
2008: char *last_slash = NULL;
2009: int slashes, levels, len;
1.110 cvs 2010: #ifdef _WINDOWS
1.44 cvs 2011: int ndx;
1.110 cvs 2012: #endif /* _WINDOWS */
1.44 cvs 2013:
1.29 cvs 2014: if (aName == NULL || relatedName == NULL)
2015: return (NULL);
2016:
2017: slashes = 0;
2018: after_access = NULL;
2019: p = aName;
2020: q = relatedName;
2021: for (; *p && (*p == *q); p++, q++)
1.27 cvs 2022: {
2023: /* Find extent of match */
1.106 cvs 2024: if (*p == ':')
1.29 cvs 2025: after_access = p + 1;
1.28 cvs 2026: if (*p == DIR_SEP)
1.27 cvs 2027: {
1.29 cvs 2028: /* memorize the last slash position and count them */
1.27 cvs 2029: last_slash = p;
2030: slashes++;
1.25 cvs 2031: }
2032: }
2033:
1.31 cvs 2034: /* q, p point to the first non-matching character or zero */
1.106 cvs 2035: if (*q == EOS)
1.31 cvs 2036: {
2037: /* New name is a subset of the related name */
2038: /* exactly the right length */
1.106 cvs 2039: len = strlen (p);
2040: if ((return_value = TtaGetMemory (len + 1)) != NULL)
2041: strcpy (return_value, p);
1.31 cvs 2042: }
2043: else if ((slashes < 2 && after_access == NULL)
2044: || (slashes < 3 && after_access != NULL))
2045: {
2046: /* Two names whitout common path */
2047: /* exactly the right length */
1.106 cvs 2048: len = strlen (aName);
2049: if ((return_value = TtaGetMemory (len + 1)) != NULL)
2050: strcpy (return_value, aName);
1.31 cvs 2051: }
2052: else
2053: {
2054: /* Some path in common */
1.106 cvs 2055: if (slashes == 3 && strncmp (aName, "http:", 5) == 0)
1.31 cvs 2056: /* just the same server */
1.106 cvs 2057: strcpy (result, last_slash);
1.31 cvs 2058: else
2059: {
2060: levels= 0;
1.106 cvs 2061: for (; *q && *q != '#' && *q != ';' && *q != '?'; q++)
1.31 cvs 2062: if (*q == DIR_SEP)
2063: levels++;
2064:
1.106 cvs 2065: result[0] = EOS;
1.31 cvs 2066: for (;levels; levels--)
1.106 cvs 2067: strcat (result, "../");
2068: strcat (result, last_slash+1);
1.31 cvs 2069: }
1.52 cvs 2070:
2071: if (!*result)
1.106 cvs 2072: strcat (result, "./");
1.52 cvs 2073:
1.31 cvs 2074: /* exactly the right length */
1.106 cvs 2075: len = strlen (result);
2076: if ((return_value = TtaGetMemory (len + 1)) != NULL)
2077: strcpy (return_value, result);
1.52 cvs 2078:
1.25 cvs 2079: }
1.110 cvs 2080: #ifdef _WINDOWS
1.106 cvs 2081: len = strlen (return_value);
1.44 cvs 2082: for (ndx = 0; ndx < len; ndx ++)
1.106 cvs 2083: if (return_value[ndx] == '\\')
2084: return_value[ndx] = '/' ;
1.110 cvs 2085: #endif /* _WINDOWS */
1.29 cvs 2086: return (return_value);
1.24 cvs 2087: }
1.35 cvs 2088:
1.104 kahan 2089: /*----------------------------------------------------------------------
2090: AM_GetFileSize
2091: Returns TRUE and the filesize in the 2nd parameter.
2092: Otherwise, in case of a system error, returns FALSE, with a
2093: filesize of 0L.
2094: ---------------------------------------------------------------------*/
1.106 cvs 2095: ThotBool AM_GetFileSize (char *filename, unsigned long *file_size)
1.104 kahan 2096: {
1.106 cvs 2097: ThotFileHandle handle = ThotFile_BADHANDLE;
2098: ThotFileInfo info;
1.35 cvs 2099:
1.104 kahan 2100: *file_size = 0L;
2101: if (!TtaFileExist (filename))
2102: return FALSE;
2103:
2104: handle = TtaFileOpen (filename, ThotFile_READWRITE);
2105: if (handle == ThotFile_BADHANDLE)
2106: /* ThotFile_BADHANDLE */
2107: return FALSE;
2108: if (TtaFileStat (handle, &info) == 0)
2109: /* bad stat */
2110: info.size = 0L;
2111: TtaFileClose (handle);
2112: *file_size = (unsigned long) info.size;
2113: return TRUE;
2114: }
Webmaster