Annotation of Amaya/amaya/AHTURLTools.c, revision 1.1.1.1
1.1 cvs 1: /*
2: ** -- Copyright (c) 1996-1997 Inria/CNRS All rights reserved. --
3: */
4:
5: #include "amaya.h"
6:
7: #include "dialog.h"
8: #include "content.h"
9: #include "view.h"
10: #include "interface.h"
11: #include "message.h"
12: #include "conststr.h"
13: #include "AHTMemConv.h"
14: #include "init.h"
15: #include "AHTURLTools.h" /** defined here **/
16:
17: /*+--------------------------------------------------------------------+ */
18: /*| IsHTMLName | */
19: /*+--------------------------------------------------------------------+ */
20:
21: #ifdef __STDC__
22: boolean IsHTMLName (char *path)
23: #else /* __STDC__ */
24: boolean IsHTMLName (path)
25: char *path;
26:
27: #endif /* __STDC__ */
28: {
29: char temppath[MAX_LENGTH];
30: char suffix[MAX_LENGTH];
31: char nsuffix[MAX_LENGTH];
32: int i;
33:
34: if (!path)
35: return FALSE;
36:
37: strcpy (temppath, path);
38: ExtractSuffix (temppath, suffix);
39:
40: /* Normalize the suffix */
41: i = 0;
42: while (suffix[i] != EOS)
43: nsuffix[i] = TOLOWER (suffix[i++]);
44: nsuffix[i] = EOS;
45: if ((strcmp (nsuffix, "html")) &&
46: (strcmp (nsuffix, "htm")) &&
47: (strcmp (nsuffix, "shtml")))
48: return FALSE;
49: return TRUE;
50: }
51:
52: /*+--------------------------------------------------------------------+ */
53: /*| IsImageName | */
54: /*+--------------------------------------------------------------------+ */
55:
56: #ifdef __STDC__
57: boolean IsImageName (char *path)
58: #else /* __STDC__ */
59: boolean IsImageName (path)
60: char *path;
61:
62: #endif /* __STDC__ */
63: {
64: char temppath[MAX_LENGTH];
65: char suffix[MAX_LENGTH];
66: char nsuffix[MAX_LENGTH];
67: int i;
68:
69: if (!path)
70: return FALSE;
71:
72: strcpy (temppath, path);
73: ExtractSuffix (temppath, suffix);
74:
75: /* Normalize the suffix */
76: i = 0;
77: while (suffix[i] != EOS)
78: nsuffix[i] = TOLOWER (suffix[i++]);
79: nsuffix[i] = EOS;
80: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
81: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
82: (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
83: return FALSE;
84: return TRUE;
85: }
86:
87: /*+--------------------------------------------------------------------+ */
88: /*| IsTextName | */
89: /*+--------------------------------------------------------------------+ */
90:
91: #ifdef __STDC__
92: boolean IsTextName (char *path)
93: #else /* __STDC__ */
94: boolean IsTextName (path)
95: char *path;
96:
97: #endif /* __STDC__ */
98: {
99: char temppath[MAX_LENGTH];
100: char suffix[MAX_LENGTH];
101: char nsuffix[MAX_LENGTH];
102: int i;
103:
104: if (!path)
105: return FALSE;
106:
107: strcpy (temppath, path);
108: ExtractSuffix (temppath, suffix);
109:
110: /* Normalize the suffix */
111: i = 0;
112: while (suffix[i] != EOS)
113: {
114: nsuffix[i] = TOLOWER (suffix[i]);
115: i++;
116: }
117: nsuffix[i] = EOS;
118:
119: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
120: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
121: (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
122: (strcmp (nsuffix, "Z")) && (strcmp (nsuffix, "gz")) &&
123: (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "xpg")) &&
124: (strcmp (nsuffix, "xpd")) && (strcmp (nsuffix, "ps")) &&
125: (strcmp (nsuffix, "au")))
126: return TRUE;
127: return FALSE;
128: }
129:
130: /*+--------------------------------------------------------------------+ */
131: /*| IsHTTPPath | */
132: /*+--------------------------------------------------------------------+ */
133:
134: #ifdef __STDC__
135: boolean IsHTTPPath (char *path)
136: #else /* __STDC__ */
137: boolean IsHTTPPath (path)
138: char *path;
139:
140: #endif /* __STDC__ */
141: {
142: if (!path)
143: return FALSE;
144:
145: if (strncmp (path, "http:", 5) != 0)
146: return FALSE;
147: return TRUE;
148: }
149:
150: /*+--------------------------------------------------------------------+ */
151: /*| IsWithParameters | */
152: /*+--------------------------------------------------------------------+ */
153:
154: #ifdef __STDC__
155: boolean IsWithParameters (char *path)
156: #else /* __STDC__ */
157: boolean IsWithParameters (path)
158: char *path;
159:
160: #endif /* __STDC__ */
161: {
162: int i;
163:
164: if ((!path) || (path[0] == EOS))
165: return FALSE;
166:
167: i = strlen (path) - 1;
168: while (i > 0 && path[i--] != '?')
169: if (i < 0)
170: return FALSE;
171:
172: /* There is a parameter */
173: return TRUE;
174: }
175:
176: /*+--------------------------------------------------------------------+ */
177: /*| IsW3Path | */
178: /*+--------------------------------------------------------------------+ */
179:
180: #ifdef __STDC__
181: boolean IsW3Path (char *path)
182: #else /* __STDC__ */
183: boolean IsW3Path (path)
184: char *path;
185:
186: #endif /* __STDC__ */
187: {
188: if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
189: (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
190: (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
191: (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
192: return FALSE;
193: return TRUE;
194: }
195:
196: /*+--------------------------------------------------------------------+ */
197: /*| IsValidProtocol | */
198: /*+--------------------------------------------------------------------+ */
199:
200: #ifdef __STDC__
201: boolean IsValidProtocol (char *path)
202: #else /* __STDC__ */
203: boolean IsValidProtocol (path)
204: char *path;
205:
206: #endif /* __STDC__ */
207: {
208: if (!strncmp (path, "http:", 5)
209: /***|| !strncmp (path, "ftp:", 4)
210: || !strncmp (path, "news:", 5)***/)
211: return (YES);
212: else
213: return (NO);
214: }
215:
216: /*+--------------------------------------------------------------------+ */
217: /*| IsValidNormalizeURL says which URL's may be normalized | */
218: /*+--------------------------------------------------------------------+ */
219:
220: #ifdef __STDC__
221: boolean IsValidNormalizeURL (char *path)
222: #else /* __STDC__ */
223: boolean IsValidNormalizeURL (path)
224: char *path;
225:
226: #endif /* __STDC__ */
227: {
228: if (strchr(path,':') && !strncmp (path, "http:", 5))
229: return (YES);
230: else
231: return (NO);
232: }
233:
234:
235: /*+--------------------------------------------------------------------+ */
236: /*| NormalizeURL provides the new complete and normalized URL or file | */
237: /*| name path and the name of the document. | */
238: /*| orgName is the original requested name. | */
239: /*| doc identifies the document which provides the original | */
240: /*| name. | */
241: /*| newName is the resulting URL of file name. | */
242: /*| docName is the resulting document name. | */
243: /*+--------------------------------------------------------------------+ */
244:
245: #ifdef __STDC__
246: void NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
247: #else /* __STDC__ */
248: void NormalizeURL (orgName, doc, newName, docName)
249: char *orgName;
250: Document doc;
251: char *newName;
252: char *docName;
253:
254: #endif /* __STDC__ */
255: {
256: char basename[MAX_LENGTH];
257: char tempname[MAX_LENGTH];
258: int i;
259: char *ptr;
260: char *basename_ptr;
261: int basename_flag;
262: Element el;
263: ElementType elType;
264: AttributeType attrType;
265: Attribute attrHREF;
266: int length;
267:
268: /* Fix up orgName, by erasing leading and trailing white space */
269: if (!newName || !docName)
270: return;
271: ptr = orgName;
272: while (*ptr == ' ' && *ptr++ != EOS) ;
273: strcpy (tempname, ptr);
274: ptr = strchr (tempname, ' ');
275: if (ptr)
276: *ptr = EOS;
277:
278: /*
279: ** the following block to take into account the BASE element.
280: ** This is not very optimized, as this procedure is repeated for
281: ** each element which is retrieved. A better way would be to
282: ** move this higher up in the function call hierarchy.
283: */
284: if (IsValidNormalizeURL (tempname) && doc)
285: {
286: length = MAX_LENGTH;
287: /* get the root element */
288: el = TtaGetMainRoot (doc);
289:
290: /* search the BASE element */
291: elType.ElSSchema = TtaGetDocumentSSchema (doc);
292: elType.ElTypeNum = HTML_EL_BASE;
293: el = TtaSearchTypedElement (elType, SearchInTree, el);
294: if (el)
295: {
296: /*
297: ** The document has a BASE element
298: ** Get the HREF attribute of the BASE Element
299: */
300: attrType.AttrSSchema = elType.ElSSchema;
301: attrType.AttrTypeNum = HTML_ATTR_HREF_;
302: attrHREF = TtaGetAttribute (el, attrType);
303: if (attrHREF)
304: {
305: /*
306: ** Use the base path of the document
307: ** To do: verify length of the buffer
308: ** length > TtaGetTextAttributeLength (attrHREF) + strlen (orgName)
309: */
310: TtaGiveTextAttributeValue (attrHREF, basename, &length);
311:
312: /*
313: ** base and orgName have to be separated by a DIR_SEP
314: */
315: if (basename[strlen (basename) - 1] != DIR_SEP && tempname[0] != DIR_SEP)
316: strcat (basename, DIR_STR);
317: }
318: }
319: else
320: basename[0] = EOS;
321: }
322: else
323: basename[0] = EOS;
324:
325: if (basename[0] == EOS)
326: {
327: /*
328: ** There is no BASE element in that document.
329: ** A temporary fix as TtaExtractName does not tolerate a name
330: ** ending in /. Here, we reinsert the slash, in order to
331: ** parse the name in the following two lines. A bit
332: ** redundant and has to be reviewed.
333: */
334: if (DocumentURLs[(int) doc])
335: {
336: basename_ptr = HTParse (DocumentURLs[(int) doc], "", PARSE_ALL);
337: basename_flag = TRUE;
338: }
339: else
340: {
341: basename_ptr = "";
342: basename_flag = FALSE;
343: }
344: }
345: else
346: {
347: basename_ptr = HTParse (basename, "", PARSE_ALL);
348: basename_flag = TRUE;
349: } /* if-else tempname */
350:
351: ptr = HTParse (tempname, basename_ptr, PARSE_ALL);
352: if (basename_flag)
353: HT_FREE (basename_ptr);
354: if (ptr)
355: {
356: ptr = HTSimplify (&ptr);
357: strcpy (newName, ptr);
358: HT_FREE (ptr);
359: }
360: else
361: newName[0] = EOS;
362:
363: i = strlen (newName) - 1;
364: if (i > 0)
365: {
366: /*
367: ** A temporary fix for an interfacing problem:
368: ** TtaExtractName does not tolerate url's finished on DIR_SEP
369: */
370: ptr = strrchr (newName, DIR_SEP);
371: if (ptr)
372: ptr++;
373: if (ptr && *ptr != EOS)
374: strcpy (docName, ptr);
375: else
376: /*
377: ** The docname was not comprised inside the URL, so let's
378: ** assign a "noname.html" name :)
379: */
380: strcpy (docName, "noname.html");
381:
382: /*
383: ** A temporary fix for an interfacing problem:
384: ** TtaExtractName does not tolerate url's finished on DIR_SEP
385: */
386: if (newName[i] == DIR_SEP)
387: newName[i] = EOS;
388: }
389: }
390:
391: /*+--------------------------------------------------------------------+ */
392: /*| IsSameHost | */
393: /*+--------------------------------------------------------------------+ */
394:
395: #ifdef __STDC__
396: boolean IsSameHost (char *url1, char *url2)
397: #else /* __STDC__ */
398: boolean IsSameHost (url1, url2)
399: char *path;
400:
401: #endif /* __STDC__ */
402: {
403: char *basename_ptr1, *basename_ptr2;
404: boolean result;
405:
406: basename_ptr1 = HTParse(url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
407: basename_ptr2 = HTParse(url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
408:
409: if (strcmp (basename_ptr1, basename_ptr2))
410: result = NO;
411: else
412: result = YES;
413:
414: HT_FREE(basename_ptr1);
415: HT_FREE(basename_ptr2);
416:
417: return(result);
418: }
419:
420:
421: /*+--------------------------------------------------------------------+ */
422: /*| AHTMakeRelativeURL | */
423: /*+--------------------------------------------------------------------+ */
424:
425: #ifdef __STDC__
426: char *AHTMakeRelativeName(char *url, char *base_url)
427: #else /* __STDC__ */
428: char *AHTMakeRelativeName(url, base_url)
429: char url;
430: char base_url;
431: #endif /* __STDC__ */
432: {
433: char *base_ptr, *url_ptr;
434: char *result;
435:
436: /* verify if we are in the same host */
437:
438: base_ptr = HTParse(base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
439: url_ptr = HTParse(url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
440:
441: if (!strcmp (base_ptr, url_ptr)) {
442: HT_FREE(base_ptr);
443: HT_FREE(url_ptr);
444:
445: /* Normalize the URLs */
446:
447: base_ptr = HTParse(base_url, "", PARSE_ALL);
448: url_ptr = HTParse(url, "", PARSE_ALL);
449:
450: /* Use libwww to make relative name */
451:
452: result = HTRelative(url_ptr, base_ptr);
453: HT_FREE(base_ptr);
454: HT_FREE(url_ptr);
455: }
456: else
457: result = (char *) NULL;
458:
459: return(result);
460: }
461:
462:
463:
464:
465:
466:
467:
Webmaster