Annotation of Amaya/amaya/AHTURLTools.c, revision 1.8
1.7 cvs 1: /*
2: *
3: * (c) COPYRIGHT MIT and INRIA, 1996.
4: * Please first read the full copyright statement in file COPYRIGHT.
5: *
6: */
7:
1.8 ! cvs 8: /* Amaya includes */
! 9: #define EXPORT extern
1.3 cvs 10: #include "amaya.h"
11:
1.8 ! cvs 12:
! 13: #include "init_f.h"
! 14: #include "AHTURLTools_f.h"
! 15:
! 16: /*----------------------------------------------------------------------
! 17: ExplodeURL :
! 18: ----------------------------------------------------------------------*/
! 19:
! 20: #ifdef __STDC__
! 21: void ExplodeURL (char *url, char **proto, char **host, char **dir, char **file)
! 22: #else
! 23: void ExplodeURL (url, proto, host, dir, file)
! 24: char *url;
! 25: char **proto;
! 26: char **host;
! 27: char **dir;
! 28: char **file;
! 29:
! 30: #endif
! 31: {
! 32: char *cour, *temp;
! 33:
! 34: if ((url == NULL) || (proto == NULL) || (host == NULL) ||
! 35: (dir == NULL) || (file == NULL))
! 36: return;
! 37:
! 38: /* initialize every pointer */
! 39: *proto = *host = *dir = *file = NULL;
! 40:
! 41: /* skip any leading space */
! 42: while ((*url == SPACE) || (*url == TAB))
! 43: url++;
! 44: cour = url;
! 45: if (*cour == 0)
! 46: goto finished;
! 47:
! 48: /* go to the end of the URL */
! 49: while ((*cour != 0) && (*cour != SPACE) && (*cour != '\b') &&
! 50: (*cour != '\r') && (*cour != EOL))
! 51: cour++;
! 52:
! 53: /* mark the end of the chain */
! 54: *cour = EOS;
! 55: cour--;
! 56: if (cour <= url)
! 57: goto finished;
! 58:
! 59: /* search the next DIR_SEP indicating the beginning of the file name */
! 60: do
! 61: {
! 62: cour--;
! 63: }
! 64: while ((cour >= url) && (*cour != DIR_SEP));
! 65: if (cour < url)
! 66: goto finished;
! 67: *file = cour + 1;
! 68:
! 69: /* mark the end of the dir */
! 70: *cour = EOS;
! 71: cour--;
! 72: if (cour < url)
! 73: goto finished;
! 74:
! 75: /* search for the "/" indicating the host name start */
! 76: while ((cour > url) && ((*cour != DIR_SEP) || (*(cour + 1) != DIR_SEP)))
! 77: cour--;
! 78:
! 79: /* if we found it, separate the host name from the directory */
! 80: if ((*cour == DIR_SEP) && (*(cour + 1) == DIR_SEP))
! 81: {
! 82: *host = temp = cour + 2;
! 83: while ((*temp != 0) && (*temp != DIR_SEP))
! 84: temp++;
! 85: if (*temp == DIR_SEP)
! 86: {
! 87: *temp = EOS;
! 88: *dir = temp + 1;
! 89: }
! 90: }
! 91: else
! 92: {
! 93: *dir = cour;
! 94: }
! 95: if (cour <= url)
! 96: goto finished;
! 97:
! 98: /* mark the end of the proto */
! 99: *cour = EOS;
! 100: cour--;
! 101: if (cour < url)
! 102: goto finished;
! 103:
! 104: if (*cour == ':')
! 105: {
! 106: *cour = EOS;
! 107: cour--;
! 108: }
! 109: else
! 110: goto finished;
! 111: if (cour < url)
! 112: goto finished;
! 113: while ((cour > url) && (isalpha (*cour)))
! 114: cour--;
! 115: *proto = cour;
! 116:
! 117: finished:;
! 118:
! 119: #ifdef AMAYA_DEBUG
! 120: fprintf (stderr, "ExplodeURL(%s)\n\t", url);
! 121: if (*proto)
! 122: fprintf (stderr, "proto : %s, ", *proto);
! 123: if (*host)
! 124: fprintf (stderr, "host : %s, ", *host);
! 125: if (*dir)
! 126: fprintf (stderr, "dir : %s, ", *dir);
! 127: if (*file)
! 128: fprintf (stderr, "file : %s ", *file);
! 129: fprintf (stderr, "\n");
! 130: #endif
! 131:
! 132: }
1.3 cvs 133:
1.4 cvs 134: /*----------------------------------------------------------------------
135: IsHTMLName
136: ----------------------------------------------------------------------*/
1.3 cvs 137:
138: #ifdef __STDC__
139: boolean IsHTMLName (char *path)
140: #else /* __STDC__ */
141: boolean IsHTMLName (path)
142: char *path;
143:
144: #endif /* __STDC__ */
145: {
1.5 cvs 146: char temppath[MAX_LENGTH];
147: char suffix[MAX_LENGTH];
148: char nsuffix[MAX_LENGTH];
149: int i;
150:
151: if (!path)
152: return FALSE;
153:
154: strcpy (temppath, path);
155: ExtractSuffix (temppath, suffix);
156:
157: /* Normalize the suffix */
158: i = 0;
159: while (suffix[i] != EOS)
160: nsuffix[i] = TOLOWER (suffix[i++]);
161: nsuffix[i] = EOS;
162: if ((strcmp (nsuffix, "html")) &&
163: (strcmp (nsuffix, "htm")) &&
164: (strcmp (nsuffix, "shtml")))
165: return FALSE;
166: return TRUE;
1.3 cvs 167: }
168:
1.4 cvs 169: /*----------------------------------------------------------------------
170: IsImageName
171: ----------------------------------------------------------------------*/
1.3 cvs 172:
173: #ifdef __STDC__
174: boolean IsImageName (char *path)
175: #else /* __STDC__ */
176: boolean IsImageName (path)
177: char *path;
178:
179: #endif /* __STDC__ */
180: {
1.5 cvs 181: char temppath[MAX_LENGTH];
182: char suffix[MAX_LENGTH];
183: char nsuffix[MAX_LENGTH];
184: int i;
185:
186: if (!path)
187: return FALSE;
188:
189: strcpy (temppath, path);
190: ExtractSuffix (temppath, suffix);
191:
192: /* Normalize the suffix */
193: i = 0;
194: while (suffix[i] != EOS)
195: nsuffix[i] = TOLOWER (suffix[i++]);
196: nsuffix[i] = EOS;
197: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
198: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
199: (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
200: return FALSE;
201: return TRUE;
1.3 cvs 202: }
203:
1.4 cvs 204: /*----------------------------------------------------------------------
205: IsTextName
206: ----------------------------------------------------------------------*/
1.3 cvs 207:
208: #ifdef __STDC__
209: boolean IsTextName (char *path)
210: #else /* __STDC__ */
211: boolean IsTextName (path)
212: char *path;
213:
214: #endif /* __STDC__ */
215: {
1.5 cvs 216: char temppath[MAX_LENGTH];
217: char suffix[MAX_LENGTH];
218: char nsuffix[MAX_LENGTH];
219: int i;
220:
221: if (!path)
222: return FALSE;
223:
224: strcpy (temppath, path);
225: ExtractSuffix (temppath, suffix);
226:
227: /* Normalize the suffix */
228: i = 0;
229: while (suffix[i] != EOS)
230: {
231: nsuffix[i] = TOLOWER (suffix[i]);
232: i++;
233: }
234: nsuffix[i] = EOS;
235:
236: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
237: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
238: (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
239: (strcmp (nsuffix, "Z")) && (strcmp (nsuffix, "gz")) &&
240: (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "xpg")) &&
241: (strcmp (nsuffix, "xpd")) && (strcmp (nsuffix, "ps")) &&
242: (strcmp (nsuffix, "au")))
243: return TRUE;
244: return FALSE;
1.3 cvs 245: }
246:
1.4 cvs 247: /*----------------------------------------------------------------------
248: IsHTTPPath
249: ----------------------------------------------------------------------*/
1.3 cvs 250:
251: #ifdef __STDC__
252: boolean IsHTTPPath (char *path)
253: #else /* __STDC__ */
254: boolean IsHTTPPath (path)
255: char *path;
256:
257: #endif /* __STDC__ */
258: {
1.5 cvs 259: if (!path)
260: return FALSE;
1.3 cvs 261:
1.5 cvs 262: if (strncmp (path, "http:", 5) != 0)
263: return FALSE;
264: return TRUE;
1.3 cvs 265: }
266:
1.4 cvs 267: /*----------------------------------------------------------------------
268: IsWithParameters
269: ----------------------------------------------------------------------*/
1.3 cvs 270:
271: #ifdef __STDC__
272: boolean IsWithParameters (char *path)
273: #else /* __STDC__ */
274: boolean IsWithParameters (path)
275: char *path;
276:
277: #endif /* __STDC__ */
278: {
1.5 cvs 279: int i;
1.3 cvs 280:
1.5 cvs 281: if ((!path) || (path[0] == EOS))
282: return FALSE;
1.3 cvs 283:
1.5 cvs 284: i = strlen (path) - 1;
285: while (i > 0 && path[i--] != '?')
286: if (i < 0)
287: return FALSE;
1.3 cvs 288:
1.5 cvs 289: /* There is a parameter */
290: return TRUE;
1.3 cvs 291: }
292:
1.4 cvs 293: /*----------------------------------------------------------------------
294: IsW3Path
295: ----------------------------------------------------------------------*/
1.3 cvs 296:
297: #ifdef __STDC__
298: boolean IsW3Path (char *path)
299: #else /* __STDC__ */
300: boolean IsW3Path (path)
301: char *path;
302:
303: #endif /* __STDC__ */
304: {
1.5 cvs 305: if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
306: (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
307: (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
308: (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
309: return FALSE;
310: return TRUE;
1.3 cvs 311: }
312:
1.4 cvs 313: /*----------------------------------------------------------------------
314: IsValidProtocol
315: ----------------------------------------------------------------------*/
1.3 cvs 316:
317: #ifdef __STDC__
318: boolean IsValidProtocol (char *path)
319: #else /* __STDC__ */
320: boolean IsValidProtocol (path)
321: char *path;
322:
323: #endif /* __STDC__ */
324: {
1.5 cvs 325: if (!strncmp (path, "http:", 5)
1.3 cvs 326: /***|| !strncmp (path, "ftp:", 4)
1.5 cvs 327: || !strncmp (path, "news:", 5)***/ )
1.8 ! cvs 328: return (TRUE);
1.5 cvs 329: else
1.8 ! cvs 330: return (FALSE);
1.3 cvs 331: }
332:
1.4 cvs 333: /*----------------------------------------------------------------------
334: IsValidNormalizeURL says which URL's may be normalized
335: ----------------------------------------------------------------------*/
1.3 cvs 336:
337: #ifdef __STDC__
338: boolean IsValidNormalizeURL (char *path)
339: #else /* __STDC__ */
340: boolean IsValidNormalizeURL (path)
341: char *path;
342:
343: #endif /* __STDC__ */
344: {
1.5 cvs 345: if (strchr (path, ':') && !strncmp (path, "http:", 5))
1.8 ! cvs 346: return (TRUE);
1.5 cvs 347: else
1.8 ! cvs 348: return (FALSE);
1.3 cvs 349: }
350:
351:
1.4 cvs 352: /*----------------------------------------------------------------------
353: NormalizeURL provides the new complete and normalized URL or file
354: name path and the name of the document.
355: orgName is the original requested name.
356: doc identifies the document which provides the original
357: name.
358: newName is the resulting URL of file name.
359: docName is the resulting document name.
360: ----------------------------------------------------------------------*/
1.3 cvs 361:
362: #ifdef __STDC__
363: void NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
364: #else /* __STDC__ */
365: void NormalizeURL (orgName, doc, newName, docName)
366: char *orgName;
367: Document doc;
368: char *newName;
369: char *docName;
370:
371: #endif /* __STDC__ */
372: {
1.5 cvs 373: char basename[MAX_LENGTH];
374: char tempname[MAX_LENGTH];
375: int i;
376: char *ptr;
377: char *basename_ptr;
378: int basename_flag;
379: Element el;
380: ElementType elType;
381: AttributeType attrType;
382: Attribute attrHREF;
383: int length;
384:
385: /* Fix up orgName, by erasing leading and trailing white space */
386: if (!newName || !docName)
387: return;
388: ptr = orgName;
389: while (*ptr == ' ' && *ptr++ != EOS) ;
390: strcpy (tempname, ptr);
391: ptr = strchr (tempname, ' ');
392: if (ptr)
393: *ptr = EOS;
394:
395: /*
396: ** the following block to take into account the BASE element.
397: ** This is not very optimized, as this procedure is repeated for
398: ** each element which is retrieved. A better way would be to
399: ** move this higher up in the function call hierarchy.
400: */
401: if (IsValidNormalizeURL (tempname) && doc)
402: {
403: length = MAX_LENGTH;
404: /* get the root element */
405: el = TtaGetMainRoot (doc);
406:
407: /* search the BASE element */
408: elType.ElSSchema = TtaGetDocumentSSchema (doc);
409: elType.ElTypeNum = HTML_EL_BASE;
410: el = TtaSearchTypedElement (elType, SearchInTree, el);
411: if (el)
412: {
413: /*
414: ** The document has a BASE element
415: ** Get the HREF attribute of the BASE Element
1.3 cvs 416: */
1.5 cvs 417: attrType.AttrSSchema = elType.ElSSchema;
418: attrType.AttrTypeNum = HTML_ATTR_HREF_;
419: attrHREF = TtaGetAttribute (el, attrType);
420: if (attrHREF)
421: {
422: /*
423: ** Use the base path of the document
424: ** To do: verify length of the buffer
425: ** length > TtaGetTextAttributeLength (attrHREF) + strlen (orgName)
426: */
427: TtaGiveTextAttributeValue (attrHREF, basename, &length);
428:
429: /*
430: ** base and orgName have to be separated by a DIR_SEP
431: */
432: if (basename[strlen (basename) - 1] != DIR_SEP && tempname[0] != DIR_SEP)
433: strcat (basename, DIR_STR);
434: }
435: }
436: else
437: basename[0] = EOS;
438: }
439: else
440: basename[0] = EOS;
441:
442: if (basename[0] == EOS)
443: {
444: /*
445: ** There is no BASE element in that document.
446: ** A temporary fix as TtaExtractName does not tolerate a name
447: ** ending in /. Here, we reinsert the slash, in order to
448: ** parse the name in the following two lines. A bit
449: ** redundant and has to be reviewed.
450: */
451: if (DocumentURLs[(int) doc])
452: {
453: basename_ptr = HTParse (DocumentURLs[(int) doc], "", PARSE_ALL);
454: basename_flag = TRUE;
455: }
456: else
457: {
458: basename_ptr = "";
459: basename_flag = FALSE;
460: }
461: }
462: else
463: {
464: basename_ptr = HTParse (basename, "", PARSE_ALL);
465: basename_flag = TRUE;
466: } /* if-else tempname */
467:
468: ptr = HTParse (tempname, basename_ptr, PARSE_ALL);
469: if (basename_flag)
470: HT_FREE (basename_ptr);
471: if (ptr)
472: {
473: ptr = HTSimplify (&ptr);
474: strcpy (newName, ptr);
475: HT_FREE (ptr);
476: }
477: else
478: newName[0] = EOS;
479:
480: i = strlen (newName) - 1;
481: if (i > 0)
482: {
483: /*
484: ** A temporary fix for an interfacing problem:
485: ** TtaExtractName does not tolerate url's finished on DIR_SEP
486: */
487: ptr = strrchr (newName, DIR_SEP);
488: if (ptr)
489: ptr++;
490: if (ptr && *ptr != EOS)
491: strcpy (docName, ptr);
492: else
493: /*
494: ** The docname was not comprised inside the URL, so let's
495: ** assign a "noname.html" name :)
496: */
497: strcpy (docName, "noname.html");
498:
499: /*
500: ** A temporary fix for an interfacing problem:
501: ** TtaExtractName does not tolerate url's finished on DIR_SEP
502: */
503: if (newName[i] == DIR_SEP)
504: newName[i] = EOS;
505: }
1.3 cvs 506: }
507:
1.4 cvs 508: /*----------------------------------------------------------------------
509: IsSameHost
510: ----------------------------------------------------------------------*/
1.3 cvs 511:
512: #ifdef __STDC__
513: boolean IsSameHost (char *url1, char *url2)
514: #else /* __STDC__ */
515: boolean IsSameHost (url1, url2)
516: char *path;
517:
518: #endif /* __STDC__ */
519: {
1.5 cvs 520: char *basename_ptr1, *basename_ptr2;
521: boolean result;
1.3 cvs 522:
1.5 cvs 523: basename_ptr1 = HTParse (url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
524: basename_ptr2 = HTParse (url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
1.3 cvs 525:
1.5 cvs 526: if (strcmp (basename_ptr1, basename_ptr2))
1.8 ! cvs 527: result = FALSE;
1.5 cvs 528: else
1.8 ! cvs 529: result = TRUE;
1.3 cvs 530:
1.5 cvs 531: HT_FREE (basename_ptr1);
532: HT_FREE (basename_ptr2);
1.3 cvs 533:
1.5 cvs 534: return (result);
1.3 cvs 535: }
536:
537:
1.4 cvs 538: /*----------------------------------------------------------------------
539: AHTMakeRelativeURL
540: ----------------------------------------------------------------------*/
1.3 cvs 541:
542: #ifdef __STDC__
1.5 cvs 543: char *AHTMakeRelativeName (char *url, char *base_url)
1.3 cvs 544: #else /* __STDC__ */
1.5 cvs 545: char *AHTMakeRelativeName (url, base_url)
546: char url;
547: char base_url;
548:
1.3 cvs 549: #endif /* __STDC__ */
550: {
1.5 cvs 551: char *base_ptr, *url_ptr;
552: char *result;
553:
554: /* verify if we are in the same host */
1.3 cvs 555:
1.5 cvs 556: base_ptr = HTParse (base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
557: url_ptr = HTParse (url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
1.3 cvs 558:
1.5 cvs 559: if (!strcmp (base_ptr, url_ptr))
560: {
561: HT_FREE (base_ptr);
562: HT_FREE (url_ptr);
1.3 cvs 563:
1.5 cvs 564: /* Normalize the URLs */
1.3 cvs 565:
1.5 cvs 566: base_ptr = HTParse (base_url, "", PARSE_ALL);
567: url_ptr = HTParse (url, "", PARSE_ALL);
1.3 cvs 568:
1.5 cvs 569: /* Use libwww to make relative name */
1.3 cvs 570:
1.5 cvs 571: result = HTRelative (url_ptr, base_ptr);
572: HT_FREE (base_ptr);
573: HT_FREE (url_ptr);
574: }
575: else
576: result = (char *) NULL;
1.3 cvs 577:
1.5 cvs 578: return (result);
1.3 cvs 579: }
Webmaster