Annotation of Amaya/amaya/AHTURLTools.c, revision 1.146
1.7 cvs 1: /*
2: *
1.133 vatton 3: * (c) COPYRIGHT MIT and INRIA, 1996-2002
1.7 cvs 4: * Please first read the full copyright statement in file COPYRIGHT.
5: *
6: */
1.9 cvs 7:
1.10 cvs 8: /*
9: * AHTURLTools.c: contains all the functions for testing, manipulating,
1.25 cvs 10: * and normalizing URLs. It also contains a local copy of the libWWW
11: * URL parsing functions.
1.10 cvs 12: *
13: * Authors: J. Kahan, I. Vatton
1.106 cvs 14: * R. Guetari: Windows.
1.10 cvs 15: *
16: */
1.7 cvs 17:
1.15 cvs 18: #define THOT_EXPORT extern
1.3 cvs 19: #include "amaya.h"
20:
1.8 cvs 21: #include "init_f.h"
22: #include "AHTURLTools_f.h"
1.100 kahan 23: #include "query_f.h"
1.8 cvs 24:
1.24 cvs 25: #define MAX_PRINT_URL_LENGTH 50
1.106 cvs 26: typedef struct _HTURI
27: {
28: char *access; /* Now known as "scheme" */
29: char *host;
30: char *absolute;
31: char *relative;
32: char *fragment;
1.29 cvs 33: } HTURI;
1.24 cvs 34:
1.28 cvs 35:
36: /*----------------------------------------------------------------------
37: ConvertToLowerCase
38: Converts a string to lowercase.
39: ----------------------------------------------------------------------*/
1.124 vatton 40: void ConvertToLowerCase (char *string)
1.28 cvs 41: {
42: int i;
1.93 cvs 43:
1.28 cvs 44: if (!string)
45: return;
46:
1.106 cvs 47: for (i = 0; string[i] != EOS; i++)
1.123 vatton 48: string[i] = tolower (string[i]);
1.28 cvs 49: }
1.22 cvs 50:
1.8 cvs 51: /*----------------------------------------------------------------------
1.75 cvs 52: EscapeChar
53: writes the equivalent escape code of a char in a string
54: ----------------------------------------------------------------------*/
1.109 cvs 55: void EscapeChar (char *string, char c)
1.75 cvs 56: {
1.109 cvs 57: unsigned int i;
58:
59: i = (unsigned char) c & 0xFF;
60: sprintf (string, "%02x", i);
1.75 cvs 61: }
62:
63: /*----------------------------------------------------------------------
1.96 cvs 64: UnEscapeChar
65: writes the equivalent hex code to a %xx coded char
66: ----------------------------------------------------------------------*/
1.109 cvs 67: static char UnEscapeChar (char c)
1.96 cvs 68: {
1.106 cvs 69: return c >= '0' && c <= '9' ? c - '0'
70: : c >= 'A' && c <= 'F' ? c - 'A' + 10
71: : c - 'a' + 10; /* accept small letters just in case */
1.96 cvs 72: }
73:
74: /*----------------------------------------------------------------------
1.75 cvs 75: EscapeURL
76: Takes a URL and escapes all protected chars into
77: %xx sequences. Also, removes any leading white spaces
78: Returns either NULL or a new buffer, which must be freed by the caller
79: ----------------------------------------------------------------------*/
1.106 cvs 80: char *EscapeURL (const char *url)
81: {
82: char *buffer;
83: int buffer_len;
84: int buffer_free_mem;
85: char *ptr;
86: int new_chars;
1.75 cvs 87: void *status;
88:
89: if (url && *url)
90: {
1.106 cvs 91: buffer_free_mem = strlen (url) + 20;
92: buffer = TtaGetMemory (buffer_free_mem + 1);
1.107 kahan 93: ptr = (char *) url;
1.75 cvs 94: buffer_len = 0;
95:
96: while (*ptr)
97: {
98: switch (*ptr)
99: {
100: /* put here below all the chars that need to
101: be escaped into %xx */
1.81 cvs 102: case 0x20: /* space */
1.143 vatton 103: /*case 0x26:*/ /* & */
1.140 kahan 104: case 0x27: /* antislash */
1.75 cvs 105: new_chars = 3;
106: break;
107:
108: default:
1.122 kahan 109: if ((unsigned char )*ptr > 127)
110: new_chars = 3;
111: else
112: new_chars = 1;
1.75 cvs 113: break;
114: }
115:
116: /* see if we need extra room in the buffer */
117: if (new_chars > buffer_free_mem)
118: {
1.76 cvs 119: buffer_free_mem = 20;
1.106 cvs 120: status = TtaRealloc (buffer, sizeof (char)
1.75 cvs 121: * (buffer_len + buffer_free_mem + 1));
122: if (status)
1.114 cvs 123: buffer = (char *) status;
1.106 cvs 124: else
125: {
126: /* @@ maybe we should do some other behavior here, like
127: freeing the buffer and return a void thing */
128: buffer[buffer_len] = EOS;
129: break;
130: }
1.75 cvs 131: }
132: /* escape the char */
1.140 kahan 133: if (new_chars == 3)
134: {
1.106 cvs 135: buffer[buffer_len] = '%';
1.75 cvs 136: EscapeChar (&buffer[buffer_len+1], *ptr);
137: }
1.140 kahan 138: else
139: buffer[buffer_len] = *ptr;
140:
141: /* update the status */
142: buffer_len += new_chars;
143: buffer_free_mem -= new_chars;
144: /* examine the next char */
145: ptr++;
146: }
147: buffer[buffer_len] = EOS;
148: }
149: else
150: buffer = NULL;
151:
152: return (buffer);
153: }
154:
155: /*----------------------------------------------------------------------
156: EscapeXML
157: Takes a string and escapes all protected chars into entity
158: sequences.
159: Returns either NULL or a new buffer, which must be freed by the caller
160: ----------------------------------------------------------------------*/
161: char *EscapeXML (const char *string)
162: {
163: char *buffer;
164: int buffer_len;
165: int buffer_free_mem;
166: char *ptr;
167: char *entity = NULL;
168: int new_chars;
169: void *status;
170:
171: if (string && *string)
172: {
173: buffer_free_mem = strlen (string) + 20;
174: buffer = TtaGetMemory (buffer_free_mem + 1);
175: ptr = (char *) string;
176: buffer_len = 0;
177:
178: while (*ptr)
179: {
180: switch (*ptr)
181: {
182: case 0x26: /* & */
183: entity = "&";
1.141 kahan 184: new_chars = sizeof (entity) - 1;
185: break;
1.140 kahan 186:
1.141 kahan 187: case '<': /* < */
1.140 kahan 188: entity = "<";
1.141 kahan 189: new_chars = sizeof (entity) - 1;
1.140 kahan 190: break;
191:
1.141 kahan 192: case '>': /* > */
1.140 kahan 193: entity = ">";
1.141 kahan 194: new_chars = sizeof (entity) - 1;
195: break;
196:
197: case '"': /* "e; */
198: entity = ""e;";
199: new_chars = sizeof (entity) - 1;
1.140 kahan 200: break;
201:
202: default:
203: new_chars = 1;
204: break;
205: }
206:
207: /* see if we need extra room in the buffer */
208: if (new_chars > buffer_free_mem)
209: {
210: buffer_free_mem = 20;
211: status = TtaRealloc (buffer, sizeof (char)
212: * (buffer_len + buffer_free_mem + 1));
213: if (status)
214: buffer = (char *) status;
215: else
216: {
217: /* @@ maybe we should do some other behavior here, like
218: freeing the buffer and return a void thing */
219: buffer[buffer_len] = EOS;
220: break;
221: }
222: }
223: /* escape the char */
224: if (entity)
225: {
226: sprintf (&buffer[buffer_len], "%s", entity);
227: entity = NULL;
228: }
1.75 cvs 229: else
230: buffer[buffer_len] = *ptr;
231:
232: /* update the status */
233: buffer_len += new_chars;
234: buffer_free_mem -= new_chars;
235: /* examine the next char */
236: ptr++;
237: }
1.106 cvs 238: buffer[buffer_len] = EOS;
1.75 cvs 239: }
1.76 cvs 240: else
241: buffer = NULL;
242:
1.75 cvs 243: return (buffer);
1.122 kahan 244: }
245:
1.75 cvs 246:
247: /*----------------------------------------------------------------------
1.11 cvs 248: ExplodeURL
1.8 cvs 249: ----------------------------------------------------------------------*/
1.106 cvs 250: void ExplodeURL (char *url, char **proto, char **host, char **dir,
251: char **file)
1.8 cvs 252: {
1.33 cvs 253: char *curr, *temp;
254: char used_sep;
1.32 cvs 255:
1.33 cvs 256: if (url && strchr (url, URL_SEP))
257: used_sep = URL_SEP;
258: else
259: used_sep = DIR_SEP;
1.8 cvs 260:
261: if ((url == NULL) || (proto == NULL) || (host == NULL) ||
262: (dir == NULL) || (file == NULL))
263: return;
264:
265: /* initialize every pointer */
266: *proto = *host = *dir = *file = NULL;
267:
268: /* skip any leading space */
269: while ((*url == SPACE) || (*url == TAB))
270: url++;
1.9 cvs 271: curr = url;
272: if (*curr == 0)
1.8 cvs 273: goto finished;
274:
275: /* go to the end of the URL */
1.68 cvs 276: while ((*curr != EOS) && (*curr != SPACE) && (*curr != BSPACE) &&
277: (*curr != __CR__) && (*curr != EOL))
1.9 cvs 278: curr++;
1.8 cvs 279:
280: /* mark the end of the chain */
1.9 cvs 281: *curr = EOS;
282: curr--;
283: if (curr <= url)
1.8 cvs 284: goto finished;
285:
286: /* search the next DIR_SEP indicating the beginning of the file name */
287: do
1.11 cvs 288: curr--;
1.33 cvs 289: while ((curr >= url) && (*curr != used_sep));
1.11 cvs 290:
1.9 cvs 291: if (curr < url)
1.8 cvs 292: goto finished;
1.9 cvs 293: *file = curr + 1;
1.8 cvs 294:
295: /* mark the end of the dir */
1.9 cvs 296: *curr = EOS;
297: curr--;
298: if (curr < url)
1.8 cvs 299: goto finished;
300:
1.29 cvs 301: /* search for the DIR_STR indicating the host name start */
1.33 cvs 302: while ((curr > url) && ((*curr != used_sep) || (*(curr + 1) != used_sep)))
1.9 cvs 303: curr--;
1.8 cvs 304:
305: /* if we found it, separate the host name from the directory */
1.102 kahan 306: if ((*curr == used_sep) && (*(curr + 1) == used_sep))
1.8 cvs 307: {
1.9 cvs 308: *host = temp = curr + 2;
1.33 cvs 309: while ((*temp != 0) && (*temp != used_sep))
1.8 cvs 310: temp++;
1.33 cvs 311: if (*temp == used_sep)
1.8 cvs 312: {
313: *temp = EOS;
314: *dir = temp + 1;
315: }
316: }
317: else
1.11 cvs 318: *dir = curr;
319:
1.9 cvs 320: if (curr <= url)
1.8 cvs 321: goto finished;
322:
323: /* mark the end of the proto */
1.9 cvs 324: *curr = EOS;
325: curr--;
326: if (curr < url)
1.8 cvs 327: goto finished;
328:
1.106 cvs 329: if (*curr == ':')
1.8 cvs 330: {
1.9 cvs 331: *curr = EOS;
332: curr--;
1.8 cvs 333: }
334: else
335: goto finished;
1.11 cvs 336:
1.9 cvs 337: if (curr < url)
1.8 cvs 338: goto finished;
1.9 cvs 339: while ((curr > url) && (isalpha (*curr)))
340: curr--;
341: *proto = curr;
1.8 cvs 342:
343: finished:;
344:
345: #ifdef AMAYA_DEBUG
346: fprintf (stderr, "ExplodeURL(%s)\n\t", url);
347: if (*proto)
348: fprintf (stderr, "proto : %s, ", *proto);
349: if (*host)
350: fprintf (stderr, "host : %s, ", *host);
351: if (*dir)
352: fprintf (stderr, "dir : %s, ", *dir);
353: if (*file)
354: fprintf (stderr, "file : %s ", *file);
355: fprintf (stderr, "\n");
356: #endif
357:
358: }
1.3 cvs 359:
1.116 kahan 360: /*----------------------------------------------------------------------
361: PicTypeToMime
362: Converts a Thot PicType into the equivalent MIME type. If no convertion
363: is possible, it returns NULL.
364: ----------------------------------------------------------------------*/
365: char *PicTypeToMIME (PicType contentType)
366: {
367: char *mime_type;
368:
369: switch (contentType)
370: {
371: case xbm_type:
372: mime_type ="image/x-xbitmap";
373: break;
374: case eps_type:
375: mime_type ="application/postscript";
376: break;
377: case xpm_type:
378: mime_type ="image/x-xpicmap";
379: break;
380: case gif_type:
381: mime_type ="image/gif";
382: break;
383: case jpeg_type:
384: mime_type ="image/jpeg";
385: break;
386: case png_type:
387: mime_type ="image/png";
388: break;
389: case svg_type:
1.138 kahan 390: mime_type =AM_SVG_MIME_TYPE;
1.116 kahan 391: break;
392: case unknown_type:
393: default:
394: mime_type = NULL;
395: }
396:
397: return mime_type;
398: }
1.61 cvs 399:
400: /*----------------------------------------------------------------------
1.117 kahan 401: ImageElement
402: Returns the element (image parameter) and URL (url parameter) of an
403: image in a docImage document. The user must free the memory associated
1.120 kahan 404: with the url parameter if the function is succesful.
405: If the url parameter is NULL, we won't initialize it.
1.117 kahan 406: Returns TRUE if succesful, FALSE otherwise.
407: ----------------------------------------------------------------------*/
408: ThotBool ImageElement (Document doc, char **url, Element *image)
409: {
410: Element el, imgEl;
411: Attribute attr, srcAttr;
412: AttributeType attrType;
413: int length;
414: char *value;
415:
416: if (DocumentTypes[doc] != docImage)
417: return FALSE;
418:
419: /* find the value of the src attribute */
420: attrType.AttrSSchema = TtaGetSSchema ("HTML", doc);
421: attrType.AttrTypeNum = HTML_ATTR_SRC;
422: el = TtaGetRootElement (doc);
423: TtaSearchAttribute (attrType, SearchInTree, el, &imgEl, &srcAttr);
424:
425: if (!imgEl)
426: return FALSE;
427: *image = imgEl;
428:
1.120 kahan 429: if (url)
430: {
431: attr = TtaGetAttribute (imgEl, attrType);
432: length = TtaGetTextAttributeLength (srcAttr) + 1;
433: value = TtaGetMemory (length);
434: TtaGiveTextAttributeValue (srcAttr, value, &length);
435: *url = value;
436: }
1.117 kahan 437: return TRUE;
438: }
439:
440: /*----------------------------------------------------------------------
441: DocImageMimeType
442: Returns the MIME type of a docImage document.
443: ----------------------------------------------------------------------*/
444: char *DocImageMimeType (Document doc)
445: {
446: char *mime_type;
447: LoadedImageDesc *pImage;
448: PicType type;
449: Element image;
450:
451: if (DocumentTypes[doc] != docImage)
452: return NULL;
453:
454: mime_type = NULL;
455: if (!IsHTTPPath (DocumentURLs[doc]))
456: {
457: /* it is a local image */
1.120 kahan 458: if (ImageElement (doc, NULL, &image))
1.117 kahan 459: {
460: type = TtaGetPictureType (image);
461: mime_type = PicTypeToMIME (type);
462: }
463: }
464: else
465: {
466: /* find the value of the src attribute */
467: pImage = ImageURLs;
468: while (pImage != NULL)
469: {
470: if (pImage->document == doc)
471: {
472: if (pImage->content_type)
473: mime_type = pImage->content_type;
474: else if (pImage->elImage && pImage->elImage->currentElement)
475: {
476: type = TtaGetPictureType (pImage->elImage->currentElement);
477: mime_type = PicTypeToMIME (type);
478: }
479: break;
480: }
481: }
482: }
483: return (mime_type);
484: }
485:
1.4 cvs 486: /*----------------------------------------------------------------------
1.9 cvs 487: IsHTMLName
488: returns TRUE if path points to an HTML resource.
1.4 cvs 489: ----------------------------------------------------------------------*/
1.109 cvs 490: ThotBool IsHTMLName (const char *path)
1.106 cvs 491: {
1.136 cvs 492: char temppath[MAX_LENGTH];
493: char suffix[MAX_LENGTH];
494: char nsuffix[MAX_LENGTH];
495: int i;
1.5 cvs 496:
1.101 cvs 497: if (!path)
498: return (FALSE);
1.5 cvs 499:
1.106 cvs 500: strcpy (temppath, path);
1.124 vatton 501: TtaExtractSuffix (temppath, suffix);
1.101 cvs 502: i = 0;
1.106 cvs 503: while (suffix[i] != EOS)
1.101 cvs 504: {
505: /* Normalize the suffix */
506: i = 0;
1.106 cvs 507: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.101 cvs 508: {
1.123 vatton 509: nsuffix[i] = tolower (suffix[i]);
1.101 cvs 510: i++;
511: }
1.106 cvs 512: nsuffix[i] = EOS;
513: if (!strcmp (nsuffix, "html") ||
514: !strcmp (nsuffix, "htm") ||
515: !strcmp (nsuffix, "shtml") ||
516: !strcmp (nsuffix, "jsp") ||
517: !strcmp (nsuffix, "xht") ||
518: !strcmp (nsuffix, "xhtm") ||
1.144 cvs 519: !strcmp (nsuffix, "lhtml") ||
1.106 cvs 520: !strcmp (nsuffix, "xhtml"))
1.101 cvs 521: return (TRUE);
1.106 cvs 522: else if (!strcmp (nsuffix, "gz"))
1.101 cvs 523: {
524: /* take into account compressed files */
1.124 vatton 525: TtaExtractSuffix (temppath, suffix);
1.101 cvs 526: /* Normalize the suffix */
527: i = 0;
1.106 cvs 528: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.101 cvs 529: {
1.123 vatton 530: nsuffix[i] = tolower (suffix[i]);
1.101 cvs 531: i++;
532: }
1.106 cvs 533: nsuffix[i] = EOS;
534: if (!strcmp (nsuffix, "html") ||
535: !strcmp (nsuffix, "htm") ||
536: !strcmp (nsuffix, "shtml") ||
537: !strcmp (nsuffix, "jsp") ||
538: !strcmp (nsuffix, "xht") ||
539: !strcmp (nsuffix, "xhtm") ||
1.144 cvs 540: !strcmp (nsuffix, "lhtml") ||
1.106 cvs 541: !strcmp (nsuffix, "xhtml"))
1.101 cvs 542: return (TRUE);
543: else
544: return (FALSE);
545: }
546: else
547: /* check if there is another suffix */
1.124 vatton 548: TtaExtractSuffix (temppath, suffix);
1.101 cvs 549: }
1.88 cvs 550: return (FALSE);
1.3 cvs 551: }
552:
1.4 cvs 553: /*----------------------------------------------------------------------
1.136 cvs 554: IsMathMLName
555: returns TRUE if path points to an MathML resource.
1.56 cvs 556: ----------------------------------------------------------------------*/
1.136 cvs 557: ThotBool IsMathMLName (const char *path)
1.56 cvs 558: {
1.136 cvs 559: char temppath[MAX_LENGTH];
560: char suffix[MAX_LENGTH];
1.56 cvs 561:
562: if (!path)
563: return (FALSE);
564:
1.106 cvs 565: strcpy (temppath, path);
1.124 vatton 566: TtaExtractSuffix (temppath, suffix);
1.56 cvs 567:
1.136 cvs 568: if (!strcasecmp (suffix, "mml"))
1.56 cvs 569: return (TRUE);
1.106 cvs 570: else if (!strcmp (suffix, "gz"))
1.56 cvs 571: {
572: /* take into account compressed files */
1.124 vatton 573: TtaExtractSuffix (temppath, suffix);
1.136 cvs 574: if (!strcasecmp (suffix, "mml"))
1.60 cvs 575: return (TRUE);
576: else
577: return (FALSE);
578: }
579: else
580: return (FALSE);
581: }
582:
583: /*----------------------------------------------------------------------
1.136 cvs 584: IsSVGName
585: returns TRUE if path points to an SVG resource.
1.133 vatton 586: ----------------------------------------------------------------------*/
1.136 cvs 587: ThotBool IsSVGName (const char *path)
1.133 vatton 588: {
1.136 cvs 589: char temppath[MAX_LENGTH];
590: char suffix[MAX_LENGTH];
1.133 vatton 591:
592: if (!path)
593: return (FALSE);
594:
595: strcpy (temppath, path);
596: TtaExtractSuffix (temppath, suffix);
597:
1.136 cvs 598: if (!strcasecmp (suffix, "svg"))
1.133 vatton 599: return (TRUE);
600: else if (!strcmp (suffix, "gz"))
601: {
602: /* take into account compressed files */
603: TtaExtractSuffix (temppath, suffix);
1.136 cvs 604: if (!strcasecmp (suffix, "svg"))
1.133 vatton 605: return (TRUE);
606: else
607: return (FALSE);
608: }
609: else
610: return (FALSE);
611: }
612:
613: /*----------------------------------------------------------------------
1.136 cvs 614: IsXMLName
615: returns TRUE if path points to an XML resource.
1.103 cvs 616: ----------------------------------------------------------------------*/
1.136 cvs 617: ThotBool IsXMLName (const char *path)
1.103 cvs 618: {
1.136 cvs 619: char temppath[MAX_LENGTH];
620: char suffix[MAX_LENGTH];
1.103 cvs 621:
622: if (!path)
623: return (FALSE);
624:
1.106 cvs 625: strcpy (temppath, path);
1.124 vatton 626: TtaExtractSuffix (temppath, suffix);
1.103 cvs 627:
1.136 cvs 628: if (!strcasecmp (suffix, "xml") ||
629: !strcasecmp (suffix, "xht") ||
630: !strcmp (suffix, "xhtm") ||
1.145 kahan 631: !strcmp (suffix, "xhtml") ||
632: !strcmp (suffix, "smi"))
1.103 cvs 633: return (TRUE);
1.106 cvs 634: else if (!strcmp (suffix, "gz"))
1.103 cvs 635: {
636: /* take into account compressed files */
1.124 vatton 637: TtaExtractSuffix (temppath, suffix);
1.136 cvs 638: if (!strcasecmp (suffix, "xml") ||
639: !strcasecmp (suffix, "xht") ||
640: !strcmp (suffix, "xhtm") ||
1.145 kahan 641: !strcmp (suffix, "xhtml") ||
642: !strcmp (suffix, "smi"))
1.103 cvs 643: return (TRUE);
644: else
645: return (FALSE);
646: }
647: else
648: return (FALSE);
649: }
650:
651: /*----------------------------------------------------------------------
1.136 cvs 652: IsUndisplayedName
653: returns TRUE if path points to an undisplayed resource.
1.103 cvs 654: ----------------------------------------------------------------------*/
1.136 cvs 655: ThotBool IsUndisplayedName (const char *path)
1.103 cvs 656: {
1.106 cvs 657: char temppath[MAX_LENGTH];
658: char suffix[MAX_LENGTH];
1.103 cvs 659:
660: if (!path)
661: return (FALSE);
662:
1.106 cvs 663: strcpy (temppath, path);
1.124 vatton 664: TtaExtractSuffix (temppath, suffix);
1.103 cvs 665:
1.136 cvs 666: if (!strcasecmp (suffix, "exe") ||
667: !strcasecmp (suffix, "zip") ||
668: !strcasecmp (suffix, "ppt") ||
669: !strcasecmp (suffix, "pdf") ||
670: !strcasecmp (suffix, "ps") ||
671: !strcasecmp (suffix, "eps") ||
672: !strcasecmp (suffix, "tar") ||
673: !strcasecmp (suffix, "tgz") ||
674: !strcasecmp (suffix, "ddl") ||
675: !strcasecmp (suffix, "o"))
1.103 cvs 676: return (TRUE);
1.106 cvs 677: else if (!strcmp (suffix, "gz"))
1.103 cvs 678: {
679: /* take into account compressed files */
1.124 vatton 680: TtaExtractSuffix (temppath, suffix);
1.136 cvs 681: if (!strcasecmp (suffix, "exe") ||
682: !strcasecmp (suffix, "zip") ||
683: !strcasecmp (suffix, "ppt") ||
684: !strcasecmp (suffix, "pdf") ||
685: !strcasecmp (suffix, "ps") ||
686: !strcasecmp (suffix, "eps") ||
687: !strcasecmp (suffix, "tar") ||
688: !strcasecmp (suffix, "ddl") ||
689: !strcasecmp (suffix, "o"))
1.103 cvs 690: return (TRUE);
691: else
692: return (FALSE);
693: }
694: else
695: return (FALSE);
696: }
697:
698: /*----------------------------------------------------------------------
1.60 cvs 699: IsCSSName
700: returns TRUE if path points to an XML resource.
701: ----------------------------------------------------------------------*/
1.111 cvs 702: ThotBool IsCSSName (const char *path)
1.60 cvs 703: {
1.106 cvs 704: char temppath[MAX_LENGTH];
705: char suffix[MAX_LENGTH];
1.60 cvs 706:
707: if (!path)
708: return (FALSE);
709:
1.106 cvs 710: strcpy (temppath, path);
1.124 vatton 711: TtaExtractSuffix (temppath, suffix);
1.60 cvs 712:
1.106 cvs 713: if (!strcasecmp (suffix, "css"))
1.60 cvs 714: return (TRUE);
1.106 cvs 715: else if (!strcmp (suffix, "gz"))
1.60 cvs 716: {
717: /* take into account compressed files */
1.124 vatton 718: TtaExtractSuffix (temppath, suffix);
1.106 cvs 719: if (!strcasecmp (suffix, "css"))
1.56 cvs 720: return (TRUE);
721: else
722: return (FALSE);
723: }
724: else
725: return (FALSE);
726: }
727:
728: /*----------------------------------------------------------------------
1.9 cvs 729: IsImageName
730: returns TRUE if path points to an image resource.
1.4 cvs 731: ----------------------------------------------------------------------*/
1.111 cvs 732: ThotBool IsImageName (const char *path)
1.106 cvs 733: {
734: char temppath[MAX_LENGTH];
735: char suffix[MAX_LENGTH];
736: char nsuffix[MAX_LENGTH];
1.5 cvs 737: int i;
738:
739: if (!path)
1.13 cvs 740: return (FALSE);
1.5 cvs 741:
1.106 cvs 742: strcpy (temppath, path);
1.124 vatton 743: TtaExtractSuffix (temppath, suffix);
1.5 cvs 744:
745: /* Normalize the suffix */
746: i = 0;
1.106 cvs 747: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.13 cvs 748: {
1.123 vatton 749: nsuffix[i] = tolower (suffix[i]);
1.13 cvs 750: i++;
751: }
1.106 cvs 752: nsuffix[i] = EOS;
753: if ((!strcmp (nsuffix, "gif")) || (!strcmp (nsuffix, "xbm")) ||
754: (!strcmp (nsuffix, "xpm")) || (!strcmp (nsuffix, "jpg")) ||
755: (!strcmp (nsuffix, "png")) || (!strcmp (nsuffix, "au")))
1.39 cvs 756: return (TRUE);
757: return (FALSE);
1.3 cvs 758: }
759:
1.4 cvs 760: /*----------------------------------------------------------------------
1.58 cvs 761: IsImageType
762: returns TRUE if type points to an image resource.
763: ----------------------------------------------------------------------*/
1.111 cvs 764: ThotBool IsImageType (const char *type)
1.58 cvs 765: {
1.106 cvs 766: char temptype[MAX_LENGTH];
1.58 cvs 767: int i;
768:
769: if (!type)
770: return (FALSE);
771:
1.106 cvs 772: strcpy (temptype, type);
1.58 cvs 773: /* Normalize the type */
774: i = 0;
1.106 cvs 775: while (temptype[i] != EOS)
1.58 cvs 776: {
777: temptype[i] = tolower (temptype[i]);
778: i++;
779: }
1.111 cvs 780: if (!strcmp (temptype, "gif") || !strcmp (temptype, "x-xbitmap") ||
781: !strcmp (temptype, "x-xpixmap") || !strcmp (temptype, "jpeg") ||
782: !strcmp (temptype, "png"))
1.58 cvs 783: return (TRUE);
784: return (FALSE);
785: }
786:
787: /*----------------------------------------------------------------------
1.9 cvs 788: IsTextName
1.4 cvs 789: ----------------------------------------------------------------------*/
1.111 cvs 790: ThotBool IsTextName (const char *path)
1.106 cvs 791: {
792: char temppath[MAX_LENGTH];
793: char suffix[MAX_LENGTH];
794: char nsuffix[MAX_LENGTH];
1.5 cvs 795: int i;
796:
797: if (!path)
1.13 cvs 798: return (FALSE);
1.5 cvs 799:
1.106 cvs 800: strcpy (temppath, path);
1.124 vatton 801: TtaExtractSuffix (temppath, suffix);
1.5 cvs 802:
803: /* Normalize the suffix */
804: i = 0;
1.106 cvs 805: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.5 cvs 806: {
1.25 cvs 807: nsuffix[i] = tolower (suffix[i]);
1.5 cvs 808: i++;
809: }
1.106 cvs 810: nsuffix[i] = EOS;
1.5 cvs 811:
1.111 cvs 812: if (!strcmp (nsuffix, "txt") || !strcmp (nsuffix, "dtd"))
1.13 cvs 813: return (TRUE);
1.106 cvs 814: else if (!strcmp (nsuffix, "gz"))
1.13 cvs 815: {
1.39 cvs 816: /* take into account compressed files */
1.124 vatton 817: TtaExtractSuffix (temppath, suffix);
1.13 cvs 818: /* Normalize the suffix */
819: i = 0;
1.106 cvs 820: while (suffix[i] != EOS && i < MAX_LENGTH -1)
1.13 cvs 821: {
1.25 cvs 822: nsuffix[i] = tolower (suffix[i]);
1.13 cvs 823: i++;
824: }
1.106 cvs 825: nsuffix[i] = EOS;
1.111 cvs 826: if (!strcmp (nsuffix, "txt") || !strcmp (nsuffix, "dtd"))
1.13 cvs 827: return (TRUE);
828: else
829: return (FALSE);
830: }
831: else
832: return (FALSE);
1.3 cvs 833: }
834:
1.4 cvs 835: /*----------------------------------------------------------------------
1.9 cvs 836: IsHTTPPath
837: returns TRUE if path is in fact an http URL.
1.4 cvs 838: ----------------------------------------------------------------------*/
1.112 cvs 839: ThotBool IsHTTPPath (const char *path)
1.3 cvs 840: {
1.5 cvs 841: if (!path)
842: return FALSE;
1.3 cvs 843:
1.106 cvs 844: if ((!strncmp (path, "http:", 5) != 0)
845: || (AHTFTPURL_flag () && !strncmp (path, "ftp:", 4))
846: || !strncmp (path, "internal:", 9))
1.58 cvs 847: return TRUE;
848: return FALSE;
1.3 cvs 849: }
850:
1.4 cvs 851: /*----------------------------------------------------------------------
1.9 cvs 852: IsWithParameters
853: returns TRUE if url has a concatenated query string.
1.4 cvs 854: ----------------------------------------------------------------------*/
1.133 vatton 855: ThotBool IsWithParameters (const char *url)
1.3 cvs 856: {
1.5 cvs 857: int i;
1.3 cvs 858:
1.9 cvs 859: if ((!url) || (url[0] == EOS))
1.5 cvs 860: return FALSE;
1.3 cvs 861:
1.9 cvs 862: i = strlen (url) - 1;
863: while (i > 0 && url[i--] != '?')
1.5 cvs 864: if (i < 0)
865: return FALSE;
1.3 cvs 866:
1.5 cvs 867: /* There is a parameter */
868: return TRUE;
1.3 cvs 869: }
870:
1.4 cvs 871: /*----------------------------------------------------------------------
1.9 cvs 872: IsW3Path
873: returns TRUE if path is in fact a URL.
1.4 cvs 874: ----------------------------------------------------------------------*/
1.133 vatton 875: ThotBool IsW3Path (const char *path)
1.106 cvs 876: {
877: if (strncmp (path, "http:", 5) &&
878: strncmp (path, "ftp:", 4) &&
879: strncmp (path, "telnet:", 7) &&
880: strncmp (path, "wais:", 5) &&
881: strncmp (path, "news:", 5) &&
882: strncmp (path, "gopher:", 7) &&
883: strncmp (path, "mailto:", 7) &&
1.132 cheyroul 884: strncmp (path, "archie:", 7) &&
885: strncmp (path, "https:", 6))
1.72 cvs 886: return FALSE;
887: return TRUE;
1.3 cvs 888: }
889:
1.4 cvs 890: /*----------------------------------------------------------------------
1.90 cvs 891: IsFilePath
892: returns TRUE if path is in fact a URL.
893: ----------------------------------------------------------------------*/
1.133 vatton 894: ThotBool IsFilePath (const char *path)
1.90 cvs 895: {
1.106 cvs 896: if (strncmp (path, "file:", 5))
1.90 cvs 897: return FALSE;
898: return TRUE;
899: }
900:
901: /*----------------------------------------------------------------------
1.9 cvs 902: IsValidProtocol
903: returns true if the url protocol is supported by Amaya.
1.4 cvs 904: ----------------------------------------------------------------------*/
1.133 vatton 905: ThotBool IsValidProtocol (const char *url)
1.106 cvs 906: {
907: if (!strncmp (url, "http:", 5)
908: || !strncmp (url, "internal:", 9)
909: || (AHTFTPURL_flag () && !strncmp (url, "ftp:", 4)))
1.22 cvs 910: /* experimental */
1.24 cvs 911: /*** || !strncmp (path, "news:", 5)***/
1.8 cvs 912: return (TRUE);
1.5 cvs 913: else
1.8 cvs 914: return (FALSE);
1.3 cvs 915: }
916:
1.31 cvs 917:
918: /*----------------------------------------------------------------------
919: GetBaseURL
920: normalizes orgName according to a base associated with doc, and
921: following the standard URL format rules.
922: The function returns the base used to solve relative URL and SRC:
923: - the base of the document,
924: - or the document path (without document name).
925: ----------------------------------------------------------------------*/
1.106 cvs 926: char *GetBaseURL (Document doc)
1.31 cvs 927: {
928: Element el;
929: ElementType elType;
930: AttributeType attrType;
931: Attribute attr;
1.106 cvs 932: char *ptr, *basename;
1.31 cvs 933: int length;
934:
1.113 cvs 935: if (doc == 0 || !DocumentURLs[doc])
1.110 cvs 936: return NULL;
1.106 cvs 937: basename = TtaGetMemory (MAX_LENGTH);
938: strncpy (basename, DocumentURLs[doc], MAX_LENGTH-1);
939: basename[MAX_LENGTH-1] = EOS;
1.31 cvs 940: length = MAX_LENGTH -1;
1.113 cvs 941: /* is it a HTML document ? */
1.31 cvs 942: elType.ElSSchema = TtaGetDocumentSSchema (doc);
1.113 cvs 943: if (!strcmp (TtaGetSSchemaName (elType.ElSSchema), "HTML"))
944: /* it's a HTML document */
1.65 cvs 945: {
1.113 cvs 946: /* get the document element */
947: el = TtaGetMainRoot (doc);
948: /* search the BASE element */
949: elType.ElTypeNum = HTML_EL_HEAD;
950: el = TtaSearchTypedElement (elType, SearchForward, el);
951: if (el)
952: /* there is a HEAD element */
953: {
954: /* look for a BASE element within the HEAD */
955: elType.ElTypeNum = HTML_EL_BASE;
956: el = TtaSearchTypedElement (elType, SearchInTree, el);
957: }
958: if (el)
1.31 cvs 959: {
1.113 cvs 960: /* The document has a BASE element. Get the HREF attribute of the
961: BASE element */
962: attrType.AttrSSchema = elType.ElSSchema;
963: attrType.AttrTypeNum = HTML_ATTR_HREF_;
964: attr = TtaGetAttribute (el, attrType);
965: if (attr)
1.31 cvs 966: {
1.113 cvs 967: /* Use the base path of the document */
968: TtaGiveTextAttributeValue (attr, basename, &length);
969: /* base and orgName have to be separated by a DIR_SEP */
970: length--;
971: if (basename[0] != EOS && basename[length] != URL_SEP &&
972: basename[length] != DIR_SEP)
973: /* verify if the base has the form "protocol://server:port" */
1.31 cvs 974: {
1.113 cvs 975: ptr = AmayaParseUrl (basename, "", AMAYA_PARSE_ACCESS |
976: AMAYA_PARSE_HOST |
977: AMAYA_PARSE_PUNCTUATION);
978: if (ptr && !strcmp (ptr, basename))
979: {
980: /* it has this form, complete it by adding a URL_STR */
981: if (strchr (basename, DIR_SEP))
982: strcat (basename, DIR_STR);
983: else
984: strcat (basename, URL_STR);
985: length++;
986: }
987: if (ptr)
988: TtaFreeMemory (ptr);
1.31 cvs 989: }
990: }
991: }
1.113 cvs 992: }
993:
1.31 cvs 994: /* Remove anything after the last DIR_SEP char. If no such char is found,
995: * then search for the first ":" char, hoping that what's before that is a
996: * protocol. If found, end the string there. If neither char is found,
997: * then discard the whole base element.
998: */
1.106 cvs 999: length = strlen (basename) - 1;
1.31 cvs 1000: /* search for the last DIR_SEP char */
1.106 cvs 1001: while (length >= 0 && basename[length] != URL_SEP && basename[length] != DIR_SEP)
1.31 cvs 1002: length--;
1003: if (length >= 0)
1004: /* found the last DIR_SEP char, end the string there */
1.106 cvs 1005: basename[length + 1] = EOS;
1.31 cvs 1006: else
1007: /* search for the first PATH_STR char */
1008: {
1.106 cvs 1009: for (length = 0; basename[length] != ':' &&
1010: basename[length] != EOS; length ++);
1011: if (basename[length] == ':')
1.31 cvs 1012: /* found, so end the string there */
1.106 cvs 1013: basename[length + 1] = EOS;
1.31 cvs 1014: else
1015: /* not found, discard the base */
1.106 cvs 1016: basename[0] = EOS;
1.31 cvs 1017: }
1018: return (basename);
1019: }
1020:
1021:
1.4 cvs 1022: /*----------------------------------------------------------------------
1.40 cvs 1023: GetLocalPath
1024: Allocate and return the local document path associated to the url
1025: ----------------------------------------------------------------------*/
1.106 cvs 1026: char *GetLocalPath (Document doc, char *url)
1027: {
1028: char *ptr;
1029: char *n;
1030: char *documentname;
1031: char url_sep;
1.83 cvs 1032: int len;
1.67 cvs 1033: ThotBool noFile;
1.40 cvs 1034:
1035: if (url != NULL)
1036: {
1037: /* check whether the file name exists */
1.106 cvs 1038: len = strlen (url) - 1;
1.71 cvs 1039: if (IsW3Path (url))
1.106 cvs 1040: url_sep = '/';
1.41 cvs 1041: else
1.106 cvs 1042: url_sep = DIR_SEP;
1.41 cvs 1043: noFile = (url[len] == url_sep);
1.40 cvs 1044: if (noFile)
1.106 cvs 1045: url[len] = EOS;
1046: ptr = TtaGetMemory (MAX_LENGTH);
1047: documentname = TtaGetMemory (MAX_LENGTH);
1.78 cvs 1048: TtaExtractName (url, ptr, documentname);
1.106 cvs 1049: sprintf (ptr, "%s%s%d%s", TempFileDirectory, DIR_STR, doc, DIR_STR);
1.40 cvs 1050: if (!TtaCheckDirectory (ptr))
1051: /* directory did not exist */
1.72 cvs 1052: TtaMakeDirectory (ptr);
1.47 cvs 1053:
1054: /* don't include the query string within document name */
1.106 cvs 1055: n = strrchr (documentname, '?');
1.47 cvs 1056: if (n != NULL)
1.106 cvs 1057: *n = EOS;
1.46 cvs 1058: /* don't include ':' within document name */
1.106 cvs 1059: n = strchr (documentname, ':');
1.46 cvs 1060: if (n != NULL)
1.106 cvs 1061: *n = EOS;
1.69 cvs 1062: /* if after all this operations document name
1063: is empty, let's use noname.html instead */
1.106 cvs 1064: if (documentname[0] == EOS)
1065: strcat (ptr, "noname.html");
1.69 cvs 1066: else
1.106 cvs 1067: strcat (ptr, documentname);
1.40 cvs 1068: TtaFreeMemory (documentname);
1069: /* restore the url */
1070: if (noFile)
1.41 cvs 1071: url[len] = url_sep;
1.40 cvs 1072: return (ptr);
1073: }
1074: else
1075: return (NULL);
1076: }
1077:
1.73 cvs 1078: /*----------------------------------------------------------------------
1.79 cvs 1079: ExtractTarget extract the target name from document nane.
1080: ----------------------------------------------------------------------*/
1.106 cvs 1081: void ExtractTarget (char *aName, char *target)
1.79 cvs 1082: {
1.106 cvs 1083: int lg, i;
1084: char *ptr;
1085: char *oldptr;
1.79 cvs 1086:
1087: if (!target || !aName)
1088: /* bad target */
1089: return;
1090:
1.106 cvs 1091: target[0] = EOS;
1092: lg = strlen (aName);
1.79 cvs 1093: if (lg)
1094: {
1095: /* the name is not empty */
1096: oldptr = ptr = &aName[0];
1097: do
1098: {
1.106 cvs 1099: ptr = strrchr (oldptr, '#');
1.79 cvs 1100: if (ptr)
1101: oldptr = &ptr[1];
1102: }
1103: while (ptr);
1104:
1105: i = (int) (oldptr) - (int) (aName); /* name length */
1106: if (i > 1)
1107: {
1.106 cvs 1108: aName[i - 1] = EOS;
1.79 cvs 1109: if (i != lg)
1.106 cvs 1110: strcpy (target, oldptr);
1.79 cvs 1111: }
1112: }
1113: }
1114:
1115: /*----------------------------------------------------------------------
1.90 cvs 1116: RemoveNewLines (text)
1117: Removes any '\n' chars that are found in text.
1118: Returns TRUE if it did the operation, FALSE otherwise.
1.73 cvs 1119: ----------------------------------------------------------------------*/
1.106 cvs 1120: ThotBool RemoveNewLines (char *text)
1121: {
1122: ThotBool change = FALSE;
1123: char *src;
1124: char *dest;
1.90 cvs 1125:
1126: src = text;
1127: dest = text;
1.115 kahan 1128:
1129: /* remove any preceding whitespace */
1130: while (*src && *src == ' ')
1131: {
1132: src++;
1133: change = 1;
1134: }
1135:
1.90 cvs 1136: while (*src)
1137: {
1138: switch (*src)
1139: {
1.106 cvs 1140: case '\n':
1.90 cvs 1141: /* don't copy the newline */
1142: change = 1;
1143: break;
1144: default:
1145: *dest = *src;
1146: dest++;
1147: break;
1148: }
1149: src++;
1150: }
1151: /* copy the last EOS char */
1152: *dest = *src;
1153:
1154: return (change);
1155: }
1156:
1157: /*----------------------------------------------------------------------
1158: CleanCopyFileURL
1159: Copies a file url from a src string to destination string.
1.97 cvs 1160: convertion says which type of convertion (none, %xx, URL_SEP into DIR_SEP
1161: we want to do).
1.90 cvs 1162: ----------------------------------------------------------------------*/
1.106 cvs 1163: static void CleanCopyFileURL (char *dest, char *src,
1164: ConvertionType convertion)
1.90 cvs 1165: {
1166: while (*src)
1.89 cvs 1167: {
1.90 cvs 1168: switch (*src)
1.89 cvs 1169: {
1170: #ifdef _WINDOWS
1.106 cvs 1171: case URL_SEP:
1.96 cvs 1172: /* make DIR_SEP transformation */
1.97 cvs 1173: if (convertion & AM_CONV_URL_SEP)
1.106 cvs 1174: *dest = DIR_SEP;
1.96 cvs 1175: else
1176: *dest = *src;
1.90 cvs 1177: dest++;
1.96 cvs 1178: src++;
1.90 cvs 1179: break;
1.89 cvs 1180: #endif /* _WINDOWS */
1.96 cvs 1181:
1.106 cvs 1182: case '%':
1.97 cvs 1183: if (convertion & AM_CONV_PERCENT)
1.96 cvs 1184: {
1.97 cvs 1185: /* (code adapted from libwww's HTUnEscape function */
1.96 cvs 1186: src++;
1.106 cvs 1187: if (*src != EOS)
1.97 cvs 1188: {
1189: *dest = UnEscapeChar (*src) * 16;
1190: src++;
1191: }
1.106 cvs 1192: if (*src != EOS)
1.97 cvs 1193: {
1194: *dest = *dest + UnEscapeChar (*src);
1195: src++;
1196: }
1197: dest++;
1.96 cvs 1198: }
1.97 cvs 1199: else
1.96 cvs 1200: {
1.97 cvs 1201: *dest = *src;
1202: dest++;
1.96 cvs 1203: src++;
1204: }
1205: break;
1206:
1.90 cvs 1207: default:
1208: *dest = *src;
1.89 cvs 1209: dest++;
1.96 cvs 1210: src++;
1.90 cvs 1211: break;
1.89 cvs 1212: }
1213: }
1.90 cvs 1214: /* copy the EOS char */
1215: *dest = *src;
1.73 cvs 1216: }
1.40 cvs 1217:
1218: /*----------------------------------------------------------------------
1.9 cvs 1219: NormalizeURL
1220: normalizes orgName according to a base associated with doc, and
1221: following the standard URL format rules.
1.113 cvs 1222: if doc is < 0, use as a base the URL of the document that contains
1223: (or contained) the elements that are now in the copy/cut buffer.
1.53 cvs 1224: if doc is 0 and otherPath not NULL, normalizes orgName according to this
1225: other path.
1.9 cvs 1226: The function returns the new complete and normalized URL
1.12 cvs 1227: or file name path (newName) and the name of the document (docName).
1.9 cvs 1228: N.B. If the function can't find out what's the docName, it assigns
1229: the name "noname.html".
1.4 cvs 1230: ----------------------------------------------------------------------*/
1.106 cvs 1231: void NormalizeURL (char *orgName, Document doc, char *newName,
1232: char *docName, char *otherPath)
1233: {
1234: char *basename;
1235: char tempOrgName[MAX_LENGTH];
1236: char *ptr;
1237: char used_sep;
1.84 cvs 1238: int length;
1239: ThotBool check;
1.5 cvs 1240:
1.110 cvs 1241: #ifdef _WINDOWS
1.44 cvs 1242: int ndx;
1.110 cvs 1243: #endif /* _WINDOWS */
1.44 cvs 1244:
1.5 cvs 1245: if (!newName || !docName)
1246: return;
1.18 cvs 1247:
1.113 cvs 1248: if (doc < 0)
1249: basename = TtaStrdup (SavedDocumentURL);
1250: else if (doc > 0)
1.53 cvs 1251: basename = GetBaseURL (doc);
1252: else if (otherPath != NULL)
1.108 cvs 1253: basename = TtaStrdup (otherPath);
1.32 cvs 1254: else
1.53 cvs 1255: basename = NULL;
1.32 cvs 1256:
1.18 cvs 1257: /*
1.31 cvs 1258: * Clean orgName
1259: * Make sure we have a complete orgName, without any leading or trailing
1260: * white spaces, or trailinbg new lines
1261: */
1.5 cvs 1262: ptr = orgName;
1.18 cvs 1263: /* skip leading white space and new line characters */
1.106 cvs 1264: while ((*ptr == SPACE || *ptr == EOL) && *ptr++ != EOS);
1265: strncpy (tempOrgName, ptr, MAX_LENGTH -1);
1266: tempOrgName[MAX_LENGTH -1] = EOS;
1.18 cvs 1267: /*
1.31 cvs 1268: * Make orgName a complete URL
1269: * If the URL does not include a protocol, then try to calculate
1270: * one using the doc's base element (if it exists),
1271: */
1.106 cvs 1272: if (tempOrgName[0] == EOS)
1.53 cvs 1273: {
1.106 cvs 1274: newName[0] = EOS;
1275: docName[0] = EOS;
1.53 cvs 1276: TtaFreeMemory (basename);
1277: return;
1278: }
1.49 cvs 1279:
1280: /* clean trailing white space */
1.106 cvs 1281: length = strlen (tempOrgName) - 1;
1282: while (tempOrgName[length] == SPACE && tempOrgName[length] == EOL)
1.53 cvs 1283: {
1.106 cvs 1284: tempOrgName[length] = EOS;
1.53 cvs 1285: length--;
1286: }
1.50 cvs 1287:
1.55 cvs 1288: /* remove extra dot (which dot???) */
1289: /* ugly, but faster than a strcmp */
1.106 cvs 1290: if (tempOrgName[length] == '.'
1291: && (length == 0 || tempOrgName[length-1] != '.'))
1292: tempOrgName[length] = EOS;
1.50 cvs 1293:
1.94 cvs 1294: if (IsW3Path (tempOrgName))
1.53 cvs 1295: {
1296: /* the name is complete, go to the Sixth Step */
1.106 cvs 1297: strcpy (newName, tempOrgName);
1.53 cvs 1298: SimplifyUrl (&newName);
1299: /* verify if the URL has the form "protocol://server:port" */
1.110 cvs 1300: ptr = AmayaParseUrl (newName, "", AMAYA_PARSE_ACCESS |
1301: AMAYA_PARSE_HOST |
1302: AMAYA_PARSE_PUNCTUATION);
1303: if (ptr && !strcmp (ptr, newName))
1304: /* it has this form, we complete it by adding a DIR_STR */
1.106 cvs 1305: strcat (newName, URL_STR);
1.49 cvs 1306:
1.53 cvs 1307: if (ptr)
1.50 cvs 1308: TtaFreeMemory (ptr);
1.53 cvs 1309: }
1.113 cvs 1310: else if (basename == NULL)
1.53 cvs 1311: /* the name is complete, go to the Sixth Step */
1.106 cvs 1312: strcpy (newName, tempOrgName);
1.53 cvs 1313: else
1314: {
1.31 cvs 1315: /* Calculate the absolute URL, using the base or document URL */
1.110 cvs 1316: #ifdef _WINDOWS
1.53 cvs 1317: if (!IsW3Path (basename))
1318: {
1.106 cvs 1319: length = strlen (tempOrgName);
1.53 cvs 1320: for (ndx = 0; ndx < length; ndx++)
1.106 cvs 1321: if (tempOrgName [ndx] == '/')
1322: tempOrgName [ndx] = '\\';
1.53 cvs 1323: }
1.110 cvs 1324: #endif /* _WINDOWS */
1.25 cvs 1325: ptr = AmayaParseUrl (tempOrgName, basename, AMAYA_PARSE_ALL);
1.53 cvs 1326: if (ptr)
1327: {
1328: SimplifyUrl (&ptr);
1.106 cvs 1329: strcpy (newName, ptr);
1.53 cvs 1330: TtaFreeMemory (ptr);
1331: }
1332: else
1.106 cvs 1333: newName[0] = EOS;
1.53 cvs 1334: }
1.36 cvs 1335:
1336: TtaFreeMemory (basename);
1.18 cvs 1337: /*
1.31 cvs 1338: * Prepare the docname that will refer to this ressource in the
1339: * .amaya directory. If the new URL finishes on DIR_SEP, then use
1340: * noname.html as a default ressource name
1.18 cvs 1341: */
1.106 cvs 1342: if (newName[0] != EOS)
1.53 cvs 1343: {
1.106 cvs 1344: length = strlen (newName) - 1;
1345: if (newName[length] == URL_SEP || newName[length] == DIR_SEP)
1.53 cvs 1346: {
1347: used_sep = newName[length];
1348: check = TRUE;
1349: while (check)
1350: {
1.50 cvs 1351: length--;
1352: while (length >= 0 && newName[length] != used_sep)
1.53 cvs 1353: length--;
1.106 cvs 1354: if (!strncmp (&newName[length+1], "..", 2))
1.53 cvs 1355: {
1.106 cvs 1356: newName[length+1] = EOS;
1.53 cvs 1357: /* remove also previous directory */
1358: length--;
1359: while (length >= 0 && newName[length] != used_sep)
1360: length--;
1.106 cvs 1361: if (strncmp (&newName[length+1], "//", 2))
1.131 cheyroul 1362: /* don't remove server name */
1.106 cvs 1363: newName[length+1] = EOS;
1.53 cvs 1364: }
1.106 cvs 1365: else if (!strncmp (&newName[length+1], ".", 1))
1366: newName[length+1] = EOS;
1.50 cvs 1367: else
1.53 cvs 1368: check = FALSE;
1369: }
1370: /* docname was not comprised inside the URL, so let's */
1371: /* assign the default ressource name */
1.106 cvs 1372: strcpy (docName, "noname.html");
1.53 cvs 1373: }
1374: else
1375: { /* docname is comprised inside the URL */
1.110 cvs 1376: while (length >= 0 && newName[length] != URL_SEP &&
1377: newName[length] != DIR_SEP)
1.53 cvs 1378: length--;
1379: if (length < 0)
1.106 cvs 1380: strcpy (docName, newName);
1.53 cvs 1381: else
1.106 cvs 1382: strcpy (docName, &newName[length+1]);
1.53 cvs 1383: }
1384: }
1385: else
1.106 cvs 1386: docName[0] = EOS;
1.18 cvs 1387: }
1.3 cvs 1388:
1.4 cvs 1389: /*----------------------------------------------------------------------
1.9 cvs 1390: IsSameHost
1.4 cvs 1391: ----------------------------------------------------------------------*/
1.106 cvs 1392: ThotBool IsSameHost (const char *url1, const char *url2)
1.3 cvs 1393: {
1.106 cvs 1394: char *basename_ptr1, *basename_ptr2;
1395: ThotBool result;
1.3 cvs 1396:
1.106 cvs 1397: basename_ptr1 = AmayaParseUrl (url1, "",
1398: AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION);
1399: basename_ptr2 = AmayaParseUrl (url2, "",
1400: AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION);
1.3 cvs 1401:
1.106 cvs 1402: if (strcmp (basename_ptr1, basename_ptr2))
1403: result = FALSE;
1404: else
1405: result = TRUE;
1406: TtaFreeMemory (basename_ptr1);
1407: TtaFreeMemory (basename_ptr2);
1408: return (result);
1.3 cvs 1409: }
1410:
1411:
1.4 cvs 1412: /*----------------------------------------------------------------------
1.22 cvs 1413: HasKnownFileSuffix
1414: returns TRUE if path points to a file ending with a suffix.
1415: ----------------------------------------------------------------------*/
1.106 cvs 1416: ThotBool HasKnownFileSuffix (const char *path)
1417: {
1418: char *root;
1419: char temppath[MAX_LENGTH];
1420: char suffix[MAX_LENGTH];
1.22 cvs 1421:
1.106 cvs 1422: if (!path || path[0] == EOS || path[strlen(path)] == DIR_SEP)
1.22 cvs 1423: return (FALSE);
1424:
1.106 cvs 1425: root = AmayaParseUrl(path, "", AMAYA_PARSE_PATH | AMAYA_PARSE_PUNCTUATION);
1.22 cvs 1426:
1427: if (root)
1428: {
1.106 cvs 1429: strcpy (temppath, root);
1.25 cvs 1430: TtaFreeMemory (root);
1.22 cvs 1431: /* Get the suffix */
1.124 vatton 1432: TtaExtractSuffix (temppath, suffix);
1.22 cvs 1433:
1.106 cvs 1434: if( suffix[0] == EOS)
1.22 cvs 1435: /* no suffix */
1436: return (FALSE);
1437:
1438: /* Normalize the suffix */
1439: ConvertToLowerCase (suffix);
1440:
1.106 cvs 1441: if (!strcmp (suffix, "gz"))
1.22 cvs 1442: /* skip the compressed suffix */
1443: {
1.124 vatton 1444: TtaExtractSuffix (temppath, suffix);
1.106 cvs 1445: if(suffix[0] == EOS)
1.22 cvs 1446: /* no suffix */
1447: return (FALSE);
1448: /* Normalize the suffix */
1449: ConvertToLowerCase (suffix);
1450: }
1451:
1.106 cvs 1452: if (strcmp (suffix, "gif") &&
1453: strcmp (suffix, "xbm") &&
1454: strcmp (suffix, "xpm") &&
1455: strcmp (suffix, "jpg") &&
1456: strcmp (suffix, "pdf") &&
1457: strcmp (suffix, "png") &&
1458: strcmp (suffix, "tgz") &&
1459: strcmp (suffix, "xpg") &&
1460: strcmp (suffix, "xpd") &&
1461: strcmp (suffix, "ps") &&
1462: strcmp (suffix, "au") &&
1463: strcmp (suffix, "html") &&
1464: strcmp (suffix, "htm") &&
1465: strcmp (suffix, "shtml") &&
1466: strcmp (suffix, "xht") &&
1467: strcmp (suffix, "xhtm") &&
1468: strcmp (suffix, "xhtml") &&
1469: strcmp (suffix, "txt") &&
1470: strcmp (suffix, "css") &&
1471: strcmp (suffix, "eps"))
1.22 cvs 1472: return (FALSE);
1473: else
1474: return (TRUE);
1475: }
1476: else
1477: return (FALSE);
1478: }
1479:
1480:
1481: /*----------------------------------------------------------------------
1.24 cvs 1482: ChopURL
1483: Gives back a URL no longer than MAX_PRINT_URL_LENGTH chars (outputURL).
1484: If inputURL is bigger than that size, outputURL receives
1485: MAX_PRINT_URL_LENGTH / 2 chars from the beginning of inputURL, "...",
1486: and MAX_PRINT_URL_LENGTH / 2 chars from the end of inputURL.
1487: If inputURL is not longer than MAX_PRINT_URL_LENGTH chars, it gets
1488: copied into outputURL.
1489: N.B.: outputURL must point to a memory block of MAX_PRINT_URL_LENGTH
1490: chars.
1491: ----------------------------------------------------------------------*/
1.106 cvs 1492: void ChopURL (char *outputURL, const char *inputURL)
1.24 cvs 1493: {
1494: int len;
1.9 cvs 1495:
1.106 cvs 1496: len = strlen (inputURL);
1.24 cvs 1497: if (len <= MAX_PRINT_URL_LENGTH)
1.106 cvs 1498: strcpy (outputURL, inputURL);
1.24 cvs 1499: else
1500: /* make a truncated urlName on the status window */
1501: {
1.106 cvs 1502: strncpy (outputURL, inputURL, MAX_PRINT_URL_LENGTH / 2);
1503: outputURL [MAX_PRINT_URL_LENGTH / 2] = EOS;
1504: strcat (outputURL, "...");
1505: strcat (outputURL, &(inputURL[len - MAX_PRINT_URL_LENGTH / 2 ]));
1.24 cvs 1506: }
1.25 cvs 1507: }
1508:
1509:
1510: /*----------------------------------------------------------------------
1511: scan
1.47 cvs 1512: Scan a filename for its constituents
1.25 cvs 1513: -----------------------------------
1514:
1515: On entry,
1516: name points to a document name which may be incomplete.
1517: On exit,
1518: absolute or relative may be nonzero (but not both).
1519: host, fragment and access may be nonzero if they were specified.
1520: Any which are nonzero point to zero terminated strings.
1521: ----------------------------------------------------------------------*/
1.106 cvs 1522: static void scan (char *name, HTURI *parts)
1.25 cvs 1523: {
1.106 cvs 1524: char * p;
1525: char * after_access = name;
1.32 cvs 1526:
1.43 cvs 1527: memset (parts, '\0', sizeof (HTURI));
1.28 cvs 1528: /* Look for fragment identifier */
1.106 cvs 1529: if ((p = strchr(name, '#')) != NULL)
1.28 cvs 1530: {
1.106 cvs 1531: *p++ = '\0';
1.28 cvs 1532: parts->fragment = p;
1.25 cvs 1533: }
1534:
1.28 cvs 1535: for (p=name; *p; p++)
1536: {
1.106 cvs 1537: if (*p == URL_SEP || *p == DIR_SEP || *p == '#' || *p == '?')
1.28 cvs 1538: break;
1.106 cvs 1539: if (*p == ':')
1.28 cvs 1540: {
1541: *p = 0;
1542: parts->access = after_access; /* Scheme has been specified */
1543:
1544: /* The combination of gcc, the "-O" flag and the HP platform is
1545: unhealthy. The following three lines is a quick & dirty fix, but is
1546: not recommended. Rather, turn off "-O". */
1547:
1548: /* after_access = p;*/
1549: /* while (*after_access == 0)*/
1550: /* after_access++;*/
1551: after_access = p+1;
1.106 cvs 1552: if (!strcasecmp("URL", parts->access))
1.28 cvs 1553: /* Ignore IETF's URL: pre-prefix */
1554: parts->access = NULL;
1555: else
1.25 cvs 1556: break;
1557: }
1558: }
1559:
1560: p = after_access;
1.43 cvs 1561: if (*p == URL_SEP || *p == DIR_SEP)
1.28 cvs 1562: {
1.43 cvs 1563: if (p[1] == URL_SEP)
1.28 cvs 1564: {
1.25 cvs 1565: parts->host = p+2; /* host has been specified */
1.28 cvs 1566: *p = 0; /* Terminate access */
1567: /* look for end of host name if any */
1.106 cvs 1568: p = strchr (parts->host, URL_SEP);
1.28 cvs 1569: if (p)
1570: {
1.106 cvs 1571: *p = EOS; /* Terminate host */
1.25 cvs 1572: parts->absolute = p+1; /* Root has been found */
1.28 cvs 1573: }
1574: }
1575: else
1576: /* Root found but no host */
1577: parts->absolute = p+1;
1578: }
1579: else
1580: {
1.25 cvs 1581: parts->relative = (*after_access) ? after_access : 0; /* zero for "" */
1.28 cvs 1582: }
1.25 cvs 1583: }
1584:
1585:
1586: /*----------------------------------------------------------------------
1.28 cvs 1587: AmayaParseUrl: parse a Name relative to another name
1588:
1589: This returns those parts of a name which are given (and requested)
1590: substituting bits from the related name where necessary.
1.25 cvs 1591:
1.28 cvs 1592: On entry,
1.25 cvs 1593: aName A filename given
1594: relatedName A name relative to which aName is to be parsed. Give
1595: it an empty string if aName is absolute.
1596: wanted A mask for the bits which are wanted.
1597:
1.28 cvs 1598: On exit,
1.25 cvs 1599: returns A pointer to a malloc'd string which MUST BE FREED
1600: ----------------------------------------------------------------------*/
1.106 cvs 1601: char *AmayaParseUrl (const char *aName, char *relatedName, int wanted)
1602: {
1603: char *return_value;
1604: char result[MAX_LENGTH];
1605: char name[MAX_LENGTH];
1606: char rel[MAX_LENGTH];
1607: char *p, *access;
1.29 cvs 1608: HTURI given, related;
1609: int len;
1.106 cvs 1610: char used_sep;
1611: char *used_str;
1.32 cvs 1612:
1.106 cvs 1613: if (strchr (aName, DIR_SEP) || strchr (relatedName, DIR_SEP))
1.33 cvs 1614: {
1.106 cvs 1615: used_str = DIR_STR;
1616: used_sep = DIR_SEP;
1.33 cvs 1617: }
1.32 cvs 1618: else
1.33 cvs 1619: {
1.106 cvs 1620: used_str = URL_STR;
1621: used_sep = URL_SEP;
1.33 cvs 1622: }
1.32 cvs 1623:
1.29 cvs 1624: /* Make working copies of input strings to cut up: */
1625: return_value = NULL;
1626: result[0] = 0; /* Clear string */
1.106 cvs 1627: strcpy (name, aName);
1.29 cvs 1628: if (relatedName != NULL)
1.106 cvs 1629: strcpy (rel, relatedName);
1.29 cvs 1630: else
1.106 cvs 1631: relatedName[0] = EOS;
1.29 cvs 1632:
1633: scan (name, &given);
1634: scan (rel, &related);
1635: access = given.access ? given.access : related.access;
1636: if (wanted & AMAYA_PARSE_ACCESS)
1637: if (access)
1638: {
1.106 cvs 1639: strcat (result, access);
1.29 cvs 1640: if(wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1641: strcat (result, ":");
1.29 cvs 1642: }
1643:
1644: if (given.access && related.access)
1645: /* If different, inherit nothing. */
1.106 cvs 1646: if (strcmp (given.access, related.access) != 0)
1.29 cvs 1647: {
1648: related.host = 0;
1649: related.absolute = 0;
1650: related.relative = 0;
1651: related.fragment = 0;
1652: }
1653:
1654: if (wanted & AMAYA_PARSE_HOST)
1655: if(given.host || related.host)
1656: {
1657: if(wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1658: strcat (result, "//");
1659: strcat (result, given.host ? given.host : related.host);
1.29 cvs 1660: }
1661:
1662: if (given.host && related.host)
1663: /* If different hosts, inherit no path. */
1.106 cvs 1664: if (strcmp (given.host, related.host) != 0)
1.29 cvs 1665: {
1666: related.absolute = 0;
1667: related.relative = 0;
1668: related.fragment = 0;
1669: }
1670:
1671: if (wanted & AMAYA_PARSE_PATH)
1672: {
1673: if (given.absolute)
1674: {
1675: /* All is given */
1676: if (wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1677: strcat (result, used_str);
1678: strcat (result, given.absolute);
1.25 cvs 1679: }
1.29 cvs 1680: else if (related.absolute)
1681: {
1682: /* Adopt path not name */
1.106 cvs 1683: strcat (result, used_str);
1684: strcat (result, related.absolute);
1.29 cvs 1685: if (given.relative)
1686: {
1687: /* Search part? */
1.106 cvs 1688: p = strchr (result, '?');
1.29 cvs 1689: if (!p)
1.106 cvs 1690: p=result+strlen(result)-1;
1.33 cvs 1691: for (; *p!=used_sep; p--); /* last / */
1.29 cvs 1692: /* Remove filename */
1693: p[1]=0;
1694: /* Add given one */
1.106 cvs 1695: strcat (result, given.relative);
1.25 cvs 1696: }
1697: }
1.29 cvs 1698: else if (given.relative)
1699: /* what we've got */
1.106 cvs 1700: strcat (result, given.relative);
1.29 cvs 1701: else if (related.relative)
1.106 cvs 1702: strcat (result, related.relative);
1.29 cvs 1703: else
1704: /* No inheritance */
1.106 cvs 1705: strcat (result, used_str);
1.25 cvs 1706: }
1.29 cvs 1707:
1708: if (wanted & AMAYA_PARSE_ANCHOR)
1709: if (given.fragment || related.fragment)
1710: {
1711: if (given.absolute && given.fragment)
1712: {
1713: /*Fixes for relURLs...*/
1714: if (wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1715: strcat (result, "#");
1716: strcat (result, given.fragment);
1.29 cvs 1717: }
1718: else if (!(given.absolute) && !(given.fragment))
1.106 cvs 1719: strcat (result, "");
1.29 cvs 1720: else
1721: {
1.110 cvs 1722: if (wanted & AMAYA_PARSE_PUNCTUATION)
1.106 cvs 1723: strcat (result, "#");
1.110 cvs 1724: strcat (result, given.fragment ? given.fragment : related.fragment);
1.29 cvs 1725: }
1726: }
1.106 cvs 1727: len = strlen (result);
1728: if ((return_value = TtaGetMemory (len + 1)) != NULL)
1729: strcpy (return_value, result);
1.29 cvs 1730: return (return_value); /* exactly the right length */
1.25 cvs 1731: }
1732:
1733: /*----------------------------------------------------------------------
1734: HTCanon
1735: Canonicalizes the URL in the following manner starting from the host
1736: pointer:
1737:
1738: 1) The host name is converted to lowercase
1739: 2) Chop off port if `:80' (http), `:70' (gopher), or `:21' (ftp)
1740:
1741: Return: OK The position of the current path part of the URL
1742: which might be the old one or a new one.
1743:
1744: ----------------------------------------------------------------------*/
1.106 cvs 1745: static char *HTCanon (char **filename, char *host)
1746: {
1747: char *newname = NULL;
1748: char used_sep;
1749: char *path;
1750: char *strptr;
1751: char *port;
1752: char *access = host-3;
1753:
1754: if (*filename && strchr (*filename, URL_SEP))
1755: used_sep = URL_SEP;
1.33 cvs 1756: else
1.106 cvs 1757: used_sep = DIR_SEP;
1.32 cvs 1758:
1.110 cvs 1759: while (access > *filename && *(access - 1) != used_sep) /* Find access method */
1.25 cvs 1760: access--;
1.110 cvs 1761: if ((path = strchr (host, used_sep)) == NULL) /* Find path */
1.106 cvs 1762: path = host + strlen (host);
1763: if ((strptr = strchr (host, '@')) != NULL && strptr < path) /* UserId */
1.82 cvs 1764: host = strptr;
1.110 cvs 1765: if ((port = strchr (host, ':')) != NULL && port > path) /* Port number */
1.82 cvs 1766: port = NULL;
1.25 cvs 1767:
1768: strptr = host; /* Convert to lower-case */
1.82 cvs 1769: while (strptr < path)
1.33 cvs 1770: {
1.123 vatton 1771: *strptr = tolower (*strptr);
1.82 cvs 1772: strptr++;
1.33 cvs 1773: }
1.25 cvs 1774:
1775: /* Does the URL contain a full domain name? This also works for a
1776: numerical host name. The domain name is already made lower-case
1777: and without a trailing dot. */
1778: {
1.106 cvs 1779: char *dot = port ? port : path;
1780: if (dot > *filename && *--dot == '.')
1.33 cvs 1781: {
1.106 cvs 1782: char *orig = dot;
1783: char *dest = dot + 1;
1.82 cvs 1784: while ((*orig++ = *dest++));
1785: if (port) port--;
1.33 cvs 1786: path--;
1.25 cvs 1787: }
1788: }
1789: /* Chop off port if `:', `:80' (http), `:70' (gopher), or `:21' (ftp) */
1.33 cvs 1790: if (port)
1791: {
1.82 cvs 1792: if (!*(port+1) || *(port+1) == used_sep)
1.33 cvs 1793: {
1794: if (!newname)
1795: {
1.106 cvs 1796: char *orig = port;
1797: char *dest = port + 1;
1.82 cvs 1798: while ((*orig++ = *dest++));
1.33 cvs 1799: }
1800: }
1.106 cvs 1801: else if ((!strncmp (access, "http", 4) &&
1802: (*(port + 1) == '8' &&
1803: *(port+2) == '0' &&
1.82 cvs 1804: (*(port+3) == used_sep || !*(port + 3)))) ||
1.106 cvs 1805: (!strncmp (access, "gopher", 6) &&
1806: (*(port+1) == '7' &&
1807: *(port+2) == '0' &&
1.82 cvs 1808: (*(port+3) == used_sep || !*(port+3)))) ||
1.106 cvs 1809: (!strncmp (access, "ftp", 3) &&
1810: (*(port+1) == '2' &&
1811: *(port + 2) == '1' &&
1.82 cvs 1812: (*(port+3) == used_sep || !*(port+3))))) {
1.33 cvs 1813: if (!newname)
1814: {
1.106 cvs 1815: char *orig = port;
1816: char *dest = port + 3;
1.33 cvs 1817: while((*orig++ = *dest++));
1818: /* Update path position, Henry Minsky */
1819: path -= 3;
1.25 cvs 1820: }
1.33 cvs 1821: }
1822: else if (newname)
1.106 cvs 1823: strncat (newname, port, (int) (path - port));
1.33 cvs 1824: }
1.25 cvs 1825:
1.33 cvs 1826: if (newname)
1827: {
1.106 cvs 1828: char *newpath = newname + strlen (newname);
1829: strcat (newname, path);
1.25 cvs 1830: path = newpath;
1.28 cvs 1831: /* Free old copy */
1832: TtaFreeMemory(*filename);
1.25 cvs 1833: *filename = newname;
1.33 cvs 1834: }
1.25 cvs 1835: return path;
1836: }
1837:
1838:
1839: /*----------------------------------------------------------------------
1.29 cvs 1840: SimplifyUrl: simplify a URI
1.32 cvs 1841: A URI is allowed to contain the sequence xxx/../ which may be
1842: replaced by "" , and the sequence "/./" which may be replaced by DIR_STR.
1.28 cvs 1843: Simplification helps us recognize duplicate URIs.
1.25 cvs 1844:
1.28 cvs 1845: Thus, /etc/junk/../fred becomes /etc/fred
1846: /etc/junk/./fred becomes /etc/junk/fred
1.25 cvs 1847:
1.28 cvs 1848: but we should NOT change
1849: http://fred.xxx.edu/../..
1.25 cvs 1850:
1851: or ../../albert.html
1852:
1.28 cvs 1853: In order to avoid empty URLs the following URLs become:
1.25 cvs 1854:
1855: /fred/.. becomes /fred/..
1856: /fred/././.. becomes /fred/..
1857: /fred/.././junk/.././ becomes /fred/..
1858:
1.28 cvs 1859: If more than one set of `://' is found (several proxies in cascade) then
1860: only the part after the last `://' is simplified.
1.25 cvs 1861:
1.28 cvs 1862: Returns: A string which might be the old one or a new one.
1.25 cvs 1863: ----------------------------------------------------------------------*/
1.106 cvs 1864: void SimplifyUrl (char **url)
1865: {
1866: char *path;
1867: char *access;
1868: char *newptr;
1869: char *p;
1870: char *orig, *dest, *end;
1.28 cvs 1871:
1.106 cvs 1872: char used_sep;
1.77 cvs 1873: ThotBool ddot_simplify; /* used to desactivate the double dot simplifcation:
1874: something/../ simplification in relative URLs when they start with a ../ */
1.32 cvs 1875:
1.28 cvs 1876: if (!url || !*url)
1877: return;
1878:
1.106 cvs 1879: if (strchr (*url, URL_SEP))
1880: used_sep = URL_SEP;
1.32 cvs 1881: else
1.106 cvs 1882: used_sep = DIR_SEP;
1.32 cvs 1883:
1.77 cvs 1884: /* should we simplify double dot? */
1885: path = *url;
1.106 cvs 1886: if (*path == '.' && *(path + 1) == '.')
1.77 cvs 1887: ddot_simplify = FALSE;
1888: else
1889: ddot_simplify = TRUE;
1890:
1.28 cvs 1891: /* Find any scheme name */
1.106 cvs 1892: if ((path = strstr (*url, "://")) != NULL)
1.33 cvs 1893: {
1894: /* Find host name */
1.28 cvs 1895: access = *url;
1.123 vatton 1896: while (access < path && (*access = tolower (*access)))
1.82 cvs 1897: access++;
1.28 cvs 1898: path += 3;
1.106 cvs 1899: while ((newptr = strstr (path, "://")) != NULL)
1.82 cvs 1900: /* For proxies */
1.106 cvs 1901: path = newptr + 3;
1.82 cvs 1902: /* We have a host name */
1.84 cvs 1903: path = HTCanon (url, path);
1.25 cvs 1904: }
1.106 cvs 1905: else if ((path = strstr (*url, ":/")) != NULL)
1.28 cvs 1906: path += 2;
1907: else
1908: path = *url;
1.84 cvs 1909: if (*path == used_sep && *(path+1) == used_sep)
1.28 cvs 1910: /* Some URLs start //<foo> */
1911: path += 1;
1.94 cvs 1912: else if (IsFilePath (path))
1913: {
1914: /* doesn't need to do anything more */
1915: return;
1916: }
1.106 cvs 1917: else if (!strncmp (path, "news:", 5))
1.28 cvs 1918: {
1.106 cvs 1919: newptr = strchr (path+5, '@');
1.28 cvs 1920: if (!newptr)
1921: newptr = path + 5;
1922: while (*newptr)
1923: {
1924: /* Make group or host lower case */
1.123 vatton 1925: *newptr = tolower (*newptr);
1.28 cvs 1926: newptr++;
1.25 cvs 1927: }
1.28 cvs 1928: /* Doesn't need to do any more */
1929: return;
1.25 cvs 1930: }
1.130 cheyroul 1931:
1.126 cheyroul 1932:
1.28 cvs 1933: if ((p = path))
1934: {
1.106 cvs 1935: if (!((end = strchr (path, ';')) || (end = strchr (path, '?')) ||
1936: (end = strchr (path, '#'))))
1937: end = path + strlen (path);
1.28 cvs 1938:
1939: /* Parse string second time to simplify */
1940: p = path;
1941: while (p < end)
1942: {
1.110 cvs 1943: /* if we're pointing to a char, it's safe to reactivate the
1944: ../ convertion */
1.106 cvs 1945: if (!ddot_simplify && *p != '.' && *p != used_sep)
1.77 cvs 1946: ddot_simplify = TRUE;
1947:
1.33 cvs 1948: if (*p==used_sep)
1.28 cvs 1949: {
1.106 cvs 1950: if (p > *url && *(p+1) == '.' && (*(p+2) == used_sep || !*(p+2)))
1.28 cvs 1951: {
1952: orig = p + 1;
1.84 cvs 1953: dest = (*(p+2) != used_sep) ? p+2 : p+3;
1.52 cvs 1954: while ((*orig++ = *dest++)); /* Remove a used_sep and a dot*/
1.28 cvs 1955: end = orig - 1;
1956: }
1.106 cvs 1957: else if (ddot_simplify && *(p+1) == '.' && *(p+2) == '.'
1.77 cvs 1958: && (*(p+3) == used_sep || !*(p+3)))
1.28 cvs 1959: {
1960: newptr = p;
1.52 cvs 1961: while (newptr>path && *--newptr!=used_sep); /* prev used_sep */
1962: if (*newptr == used_sep)
1963: orig = newptr + 1;
1.28 cvs 1964: else
1.52 cvs 1965: orig = newptr;
1966:
1967: dest = (*(p+3) != used_sep) ? p+3 : p+4;
1968: while ((*orig++ = *dest++)); /* Remove /xxx/.. */
1969: end = orig-1;
1970: /* Start again with prev slash */
1971: p = newptr;
1.28 cvs 1972: }
1.33 cvs 1973: else if (*(p+1) == used_sep)
1.28 cvs 1974: {
1.33 cvs 1975: while (*(p+1) == used_sep)
1.28 cvs 1976: {
1977: orig = p;
1978: dest = p + 1;
1979: while ((*orig++ = *dest++)); /* Remove multiple /'s */
1980: end = orig-1;
1981: }
1982: }
1983: else
1.25 cvs 1984: p++;
1.28 cvs 1985: }
1986: else
1987: p++;
1.25 cvs 1988: }
1989: }
1.51 cvs 1990: /*
1991: ** Check for host/../.. kind of things
1992: */
1.106 cvs 1993: if (*path == used_sep && *(path+1) == '.' && *(path+2) == '.'
1.77 cvs 1994: && (!*(path+3) || *(path+3) == used_sep))
1.106 cvs 1995: *(path+1) = EOS;
1.28 cvs 1996: return;
1997: }
1998:
1999:
2000: /*----------------------------------------------------------------------
1.96 cvs 2001: NormalizeFile normalizes local names.
1.28 cvs 2002: Return TRUE if target and src differ.
2003: ----------------------------------------------------------------------*/
1.106 cvs 2004: ThotBool NormalizeFile (char *src, char *target, ConvertionType convertion)
1.28 cvs 2005: {
1.110 cvs 2006: #ifndef _WINDOWS
1.106 cvs 2007: char *s;
1.93 cvs 2008: int i;
1.110 cvs 2009: #endif /* !_WINDOWS */
1.82 cvs 2010: ThotBool change;
1.90 cvs 2011: int start_index; /* the first char that we'll copy */
1.28 cvs 2012:
1.54 cvs 2013: change = FALSE;
1.90 cvs 2014: start_index = 0;
2015:
1.106 cvs 2016: if (!src || src[0] == EOS)
1.96 cvs 2017: {
1.106 cvs 2018: target[0] = EOS;
1.96 cvs 2019: return FALSE;
2020: }
1.90 cvs 2021:
2022: /* @@ do I need file: or file:/ here? */
1.106 cvs 2023: if (strncmp (src, "file:", 5) == 0)
1.28 cvs 2024: {
1.90 cvs 2025: /* remove the prefix file: */
2026: start_index += 5;
2027:
2028: /* remove the localhost prefix */
1.106 cvs 2029: if (strncmp (&src[start_index], "//localhost/", 12) == 0)
1.94 cvs 2030: start_index += 11;
2031:
2032: /* remove the first two slashes in / / /path */
2033: while (src[start_index] &&
1.106 cvs 2034: src[start_index] == '/'
2035: && src[start_index + 1] == '/')
1.94 cvs 2036: start_index++;
2037:
2038: #ifdef _WINDOWS
2039: /* remove any extra slash before the drive name */
1.106 cvs 2040: if (src[start_index] == '/'
2041: &&src[start_index+2] == ':')
1.94 cvs 2042: start_index++;
2043: #endif /* _WINDOWS */
1.90 cvs 2044:
1.106 cvs 2045: if (src[start_index] == EOS)
1.90 cvs 2046: /* if there's nothing afterwards, add a DIR_STR */
1.106 cvs 2047: strcpy (target, DIR_STR);
1.90 cvs 2048: else
1.97 cvs 2049: /* as we're inside a file: URL, we'll apply all the convertions
2050: we know */
2051: CleanCopyFileURL (target, &src[start_index], AM_CONV_ALL);
1.96 cvs 2052:
2053: change = TRUE;
2054: }
1.97 cvs 2055: else if (convertion != AM_CONV_NONE)
1.96 cvs 2056: {
2057: /* we are following a "local" relative link, we do all the
2058: convertions except for the HOME_DIR ~ one */
1.97 cvs 2059: CleanCopyFileURL (target, src, convertion);
1.28 cvs 2060: }
1.90 cvs 2061: #ifndef _WINDOWS
1.106 cvs 2062: else if (src[0] == '~')
1.53 cvs 2063: {
1.96 cvs 2064: /* it must be a URL typed in a text input field */
2065: /* do the HOME_DIR ~ substitution */
1.82 cvs 2066: s = TtaGetEnvString ("HOME");
1.106 cvs 2067: strcpy (target, s);
1.90 cvs 2068: #if 0
1.96 cvs 2069: /* JK: invalidated this part of the code as it's simpler
2070: to add the DIR_SEP whenever we have something to add
2071: to the path rather than adding it systematically */
1.106 cvs 2072: if (src[1] != DIR_SEP)
2073: strcat (target, DIR_STR);
1.90 cvs 2074: #endif
1.106 cvs 2075: i = strlen (target);
2076: strcpy (&target[i], &src[1]);
1.54 cvs 2077: change = TRUE;
1.53 cvs 2078: }
1.90 cvs 2079: #endif /* _WINDOWS */
1.28 cvs 2080: else
1.96 cvs 2081: /* leave it as it is */
1.106 cvs 2082: strcpy (target, src);
1.96 cvs 2083:
1.28 cvs 2084: /* remove /../ and /./ */
1.29 cvs 2085: SimplifyUrl (&target);
1.54 cvs 2086: if (!change)
1.106 cvs 2087: change = strcmp (src, target);
1.28 cvs 2088: return (change);
1.25 cvs 2089: }
2090:
1.28 cvs 2091:
1.25 cvs 2092: /*----------------------------------------------------------------------
1.31 cvs 2093: MakeRelativeURL: make relative name
1.25 cvs 2094:
1.28 cvs 2095: This function creates and returns a string which gives an expression of
2096: one address as related to another. Where there is no relation, an absolute
2097: address is retured.
1.25 cvs 2098:
1.28 cvs 2099: On entry,
1.25 cvs 2100: Both names must be absolute, fully qualified names of nodes
2101: (no fragment bits)
2102:
1.28 cvs 2103: On exit,
1.25 cvs 2104: The return result points to a newly allocated name which, if
2105: parsed by AmayaParseUrl relative to relatedName, will yield aName.
2106: The caller is responsible for freeing the resulting name later.
2107: ----------------------------------------------------------------------*/
1.106 cvs 2108: char *MakeRelativeURL (char *aName, char *relatedName)
2109: {
2110: char *return_value;
2111: char result[MAX_LENGTH];
2112: char *p;
2113: char *q;
2114: char *after_access;
2115: char *last_slash = NULL;
2116: int slashes, levels, len;
1.110 cvs 2117: #ifdef _WINDOWS
1.44 cvs 2118: int ndx;
1.110 cvs 2119: #endif /* _WINDOWS */
1.44 cvs 2120:
1.29 cvs 2121: if (aName == NULL || relatedName == NULL)
2122: return (NULL);
2123:
2124: slashes = 0;
2125: after_access = NULL;
2126: p = aName;
2127: q = relatedName;
1.146 ! cvs 2128: for (; *p && !strncasecmp (p, q, 1); p++, q++)
1.27 cvs 2129: {
2130: /* Find extent of match */
1.106 cvs 2131: if (*p == ':')
1.146 ! cvs 2132: {
! 2133: after_access = p + 1;
! 2134: slashes++;
! 2135: }
1.28 cvs 2136: if (*p == DIR_SEP)
1.27 cvs 2137: {
1.29 cvs 2138: /* memorize the last slash position and count them */
1.27 cvs 2139: last_slash = p;
2140: slashes++;
1.25 cvs 2141: }
2142: }
2143:
1.31 cvs 2144: /* q, p point to the first non-matching character or zero */
1.106 cvs 2145: if (*q == EOS)
1.31 cvs 2146: {
2147: /* New name is a subset of the related name */
2148: /* exactly the right length */
1.106 cvs 2149: len = strlen (p);
2150: if ((return_value = TtaGetMemory (len + 1)) != NULL)
2151: strcpy (return_value, p);
1.31 cvs 2152: }
2153: else if ((slashes < 2 && after_access == NULL)
2154: || (slashes < 3 && after_access != NULL))
2155: {
2156: /* Two names whitout common path */
2157: /* exactly the right length */
1.106 cvs 2158: len = strlen (aName);
2159: if ((return_value = TtaGetMemory (len + 1)) != NULL)
2160: strcpy (return_value, aName);
1.31 cvs 2161: }
2162: else
2163: {
2164: /* Some path in common */
1.106 cvs 2165: if (slashes == 3 && strncmp (aName, "http:", 5) == 0)
1.31 cvs 2166: /* just the same server */
1.106 cvs 2167: strcpy (result, last_slash);
1.31 cvs 2168: else
2169: {
2170: levels= 0;
1.106 cvs 2171: for (; *q && *q != '#' && *q != ';' && *q != '?'; q++)
1.31 cvs 2172: if (*q == DIR_SEP)
2173: levels++;
2174:
1.106 cvs 2175: result[0] = EOS;
1.31 cvs 2176: for (;levels; levels--)
1.106 cvs 2177: strcat (result, "../");
2178: strcat (result, last_slash+1);
1.31 cvs 2179: }
1.52 cvs 2180:
2181: if (!*result)
1.106 cvs 2182: strcat (result, "./");
1.52 cvs 2183:
1.31 cvs 2184: /* exactly the right length */
1.106 cvs 2185: len = strlen (result);
2186: if ((return_value = TtaGetMemory (len + 1)) != NULL)
2187: strcpy (return_value, result);
1.52 cvs 2188:
1.25 cvs 2189: }
1.110 cvs 2190: #ifdef _WINDOWS
1.106 cvs 2191: len = strlen (return_value);
1.44 cvs 2192: for (ndx = 0; ndx < len; ndx ++)
1.106 cvs 2193: if (return_value[ndx] == '\\')
2194: return_value[ndx] = '/' ;
1.110 cvs 2195: #endif /* _WINDOWS */
1.29 cvs 2196: return (return_value);
1.24 cvs 2197: }
1.35 cvs 2198:
1.104 kahan 2199: /*----------------------------------------------------------------------
2200: AM_GetFileSize
2201: Returns TRUE and the filesize in the 2nd parameter.
2202: Otherwise, in case of a system error, returns FALSE, with a
2203: filesize of 0L.
2204: ---------------------------------------------------------------------*/
1.106 cvs 2205: ThotBool AM_GetFileSize (char *filename, unsigned long *file_size)
1.104 kahan 2206: {
1.106 cvs 2207: ThotFileHandle handle = ThotFile_BADHANDLE;
2208: ThotFileInfo info;
1.35 cvs 2209:
1.104 kahan 2210: *file_size = 0L;
2211: if (!TtaFileExist (filename))
2212: return FALSE;
2213:
2214: handle = TtaFileOpen (filename, ThotFile_READWRITE);
2215: if (handle == ThotFile_BADHANDLE)
2216: /* ThotFile_BADHANDLE */
2217: return FALSE;
2218: if (TtaFileStat (handle, &info) == 0)
2219: /* bad stat */
2220: info.size = 0L;
2221: TtaFileClose (handle);
2222: *file_size = (unsigned long) info.size;
2223: return TRUE;
2224: }
1.139 kahan 2225:
2226: /*----------------------------------------------------------------------
2227: AM_UseXHTMLMimeType
2228: Returns TRUE if the user has configured Amaya to use this MIME type,
2229: FALSE otherwise.
2230: ---------------------------------------------------------------------*/
2231: ThotBool AM_UseXHTMLMimeType (void)
2232: {
2233: ThotBool xhtml_mimetype;
2234:
2235: /* does the user wants to use the new MIME type? */
2236: TtaGetEnvBoolean ("ENABLE_XHTML_MIMETYPE", &xhtml_mimetype);
2237:
2238: return (xhtml_mimetype);
2239: }
Webmaster