Annotation of Amaya/amaya/AHTURLTools.c, revision 1.3
1.3 ! cvs 1: /*
! 2: ** -- Copyright (c) 1996-1997 Inria/CNRS All rights reserved. --
! 3: */
! 4:
! 5: #include "amaya.h"
! 6:
! 7: #include "dialog.h"
! 8: #include "content.h"
! 9: #include "view.h"
! 10: #include "interface.h"
! 11: #include "message.h"
! 12: #include "conststr.h"
! 13: #include "AHTMemConv.h"
! 14: #include "init.h"
! 15: #include "AHTURLTools.h" /** defined here **/
! 16:
! 17: /*+--------------------------------------------------------------------+ */
! 18: /*| IsHTMLName | */
! 19: /*+--------------------------------------------------------------------+ */
! 20:
! 21: #ifdef __STDC__
! 22: boolean IsHTMLName (char *path)
! 23: #else /* __STDC__ */
! 24: boolean IsHTMLName (path)
! 25: char *path;
! 26:
! 27: #endif /* __STDC__ */
! 28: {
! 29: char temppath[MAX_LENGTH];
! 30: char suffix[MAX_LENGTH];
! 31: char nsuffix[MAX_LENGTH];
! 32: int i;
! 33:
! 34: if (!path)
! 35: return FALSE;
! 36:
! 37: strcpy (temppath, path);
! 38: ExtractSuffix (temppath, suffix);
! 39:
! 40: /* Normalize the suffix */
! 41: i = 0;
! 42: while (suffix[i] != EOS)
! 43: nsuffix[i] = TOLOWER (suffix[i++]);
! 44: nsuffix[i] = EOS;
! 45: if ((strcmp (nsuffix, "html")) &&
! 46: (strcmp (nsuffix, "htm")) &&
! 47: (strcmp (nsuffix, "shtml")))
! 48: return FALSE;
! 49: return TRUE;
! 50: }
! 51:
! 52: /*+--------------------------------------------------------------------+ */
! 53: /*| IsImageName | */
! 54: /*+--------------------------------------------------------------------+ */
! 55:
! 56: #ifdef __STDC__
! 57: boolean IsImageName (char *path)
! 58: #else /* __STDC__ */
! 59: boolean IsImageName (path)
! 60: char *path;
! 61:
! 62: #endif /* __STDC__ */
! 63: {
! 64: char temppath[MAX_LENGTH];
! 65: char suffix[MAX_LENGTH];
! 66: char nsuffix[MAX_LENGTH];
! 67: int i;
! 68:
! 69: if (!path)
! 70: return FALSE;
! 71:
! 72: strcpy (temppath, path);
! 73: ExtractSuffix (temppath, suffix);
! 74:
! 75: /* Normalize the suffix */
! 76: i = 0;
! 77: while (suffix[i] != EOS)
! 78: nsuffix[i] = TOLOWER (suffix[i++]);
! 79: nsuffix[i] = EOS;
! 80: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
! 81: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
! 82: (strcmp (nsuffix, "png")) && (strcmp (nsuffix, "au")))
! 83: return FALSE;
! 84: return TRUE;
! 85: }
! 86:
! 87: /*+--------------------------------------------------------------------+ */
! 88: /*| IsTextName | */
! 89: /*+--------------------------------------------------------------------+ */
! 90:
! 91: #ifdef __STDC__
! 92: boolean IsTextName (char *path)
! 93: #else /* __STDC__ */
! 94: boolean IsTextName (path)
! 95: char *path;
! 96:
! 97: #endif /* __STDC__ */
! 98: {
! 99: char temppath[MAX_LENGTH];
! 100: char suffix[MAX_LENGTH];
! 101: char nsuffix[MAX_LENGTH];
! 102: int i;
! 103:
! 104: if (!path)
! 105: return FALSE;
! 106:
! 107: strcpy (temppath, path);
! 108: ExtractSuffix (temppath, suffix);
! 109:
! 110: /* Normalize the suffix */
! 111: i = 0;
! 112: while (suffix[i] != EOS)
! 113: {
! 114: nsuffix[i] = TOLOWER (suffix[i]);
! 115: i++;
! 116: }
! 117: nsuffix[i] = EOS;
! 118:
! 119: if ((strcmp (nsuffix, "gif")) && (strcmp (nsuffix, "xbm")) &&
! 120: (strcmp (nsuffix, "xpm")) && (strcmp (nsuffix, "jpg")) &&
! 121: (strcmp (nsuffix, "pdf")) && (strcmp (nsuffix, "png")) &&
! 122: (strcmp (nsuffix, "Z")) && (strcmp (nsuffix, "gz")) &&
! 123: (strcmp (nsuffix, "tgz")) && (strcmp (nsuffix, "xpg")) &&
! 124: (strcmp (nsuffix, "xpd")) && (strcmp (nsuffix, "ps")) &&
! 125: (strcmp (nsuffix, "au")))
! 126: return TRUE;
! 127: return FALSE;
! 128: }
! 129:
! 130: /*+--------------------------------------------------------------------+ */
! 131: /*| IsHTTPPath | */
! 132: /*+--------------------------------------------------------------------+ */
! 133:
! 134: #ifdef __STDC__
! 135: boolean IsHTTPPath (char *path)
! 136: #else /* __STDC__ */
! 137: boolean IsHTTPPath (path)
! 138: char *path;
! 139:
! 140: #endif /* __STDC__ */
! 141: {
! 142: if (!path)
! 143: return FALSE;
! 144:
! 145: if (strncmp (path, "http:", 5) != 0)
! 146: return FALSE;
! 147: return TRUE;
! 148: }
! 149:
! 150: /*+--------------------------------------------------------------------+ */
! 151: /*| IsWithParameters | */
! 152: /*+--------------------------------------------------------------------+ */
! 153:
! 154: #ifdef __STDC__
! 155: boolean IsWithParameters (char *path)
! 156: #else /* __STDC__ */
! 157: boolean IsWithParameters (path)
! 158: char *path;
! 159:
! 160: #endif /* __STDC__ */
! 161: {
! 162: int i;
! 163:
! 164: if ((!path) || (path[0] == EOS))
! 165: return FALSE;
! 166:
! 167: i = strlen (path) - 1;
! 168: while (i > 0 && path[i--] != '?')
! 169: if (i < 0)
! 170: return FALSE;
! 171:
! 172: /* There is a parameter */
! 173: return TRUE;
! 174: }
! 175:
! 176: /*+--------------------------------------------------------------------+ */
! 177: /*| IsW3Path | */
! 178: /*+--------------------------------------------------------------------+ */
! 179:
! 180: #ifdef __STDC__
! 181: boolean IsW3Path (char *path)
! 182: #else /* __STDC__ */
! 183: boolean IsW3Path (path)
! 184: char *path;
! 185:
! 186: #endif /* __STDC__ */
! 187: {
! 188: if ((strncmp (path, "http:", 5)) && (strncmp (path, "ftp:", 4)) &&
! 189: (strncmp (path, "telnet:", 7)) && (strncmp (path, "wais:", 5)) &&
! 190: (strncmp (path, "news:", 5)) && (strncmp (path, "gopher:", 7)) &&
! 191: (strncmp (path, "mailto:", 7)) && (strncmp (path, "archie:", 7)))
! 192: return FALSE;
! 193: return TRUE;
! 194: }
! 195:
! 196: /*+--------------------------------------------------------------------+ */
! 197: /*| IsValidProtocol | */
! 198: /*+--------------------------------------------------------------------+ */
! 199:
! 200: #ifdef __STDC__
! 201: boolean IsValidProtocol (char *path)
! 202: #else /* __STDC__ */
! 203: boolean IsValidProtocol (path)
! 204: char *path;
! 205:
! 206: #endif /* __STDC__ */
! 207: {
! 208: if (!strncmp (path, "http:", 5)
! 209: /***|| !strncmp (path, "ftp:", 4)
! 210: || !strncmp (path, "news:", 5)***/)
! 211: return (YES);
! 212: else
! 213: return (NO);
! 214: }
! 215:
! 216: /*+--------------------------------------------------------------------+ */
! 217: /*| IsValidNormalizeURL says which URL's may be normalized | */
! 218: /*+--------------------------------------------------------------------+ */
! 219:
! 220: #ifdef __STDC__
! 221: boolean IsValidNormalizeURL (char *path)
! 222: #else /* __STDC__ */
! 223: boolean IsValidNormalizeURL (path)
! 224: char *path;
! 225:
! 226: #endif /* __STDC__ */
! 227: {
! 228: if (strchr(path,':') && !strncmp (path, "http:", 5))
! 229: return (YES);
! 230: else
! 231: return (NO);
! 232: }
! 233:
! 234:
! 235: /*+--------------------------------------------------------------------+ */
! 236: /*| NormalizeURL provides the new complete and normalized URL or file | */
! 237: /*| name path and the name of the document. | */
! 238: /*| orgName is the original requested name. | */
! 239: /*| doc identifies the document which provides the original | */
! 240: /*| name. | */
! 241: /*| newName is the resulting URL of file name. | */
! 242: /*| docName is the resulting document name. | */
! 243: /*+--------------------------------------------------------------------+ */
! 244:
! 245: #ifdef __STDC__
! 246: void NormalizeURL (char *orgName, Document doc, char *newName, char *docName)
! 247: #else /* __STDC__ */
! 248: void NormalizeURL (orgName, doc, newName, docName)
! 249: char *orgName;
! 250: Document doc;
! 251: char *newName;
! 252: char *docName;
! 253:
! 254: #endif /* __STDC__ */
! 255: {
! 256: char basename[MAX_LENGTH];
! 257: char tempname[MAX_LENGTH];
! 258: int i;
! 259: char *ptr;
! 260: char *basename_ptr;
! 261: int basename_flag;
! 262: Element el;
! 263: ElementType elType;
! 264: AttributeType attrType;
! 265: Attribute attrHREF;
! 266: int length;
! 267:
! 268: /* Fix up orgName, by erasing leading and trailing white space */
! 269: if (!newName || !docName)
! 270: return;
! 271: ptr = orgName;
! 272: while (*ptr == ' ' && *ptr++ != EOS) ;
! 273: strcpy (tempname, ptr);
! 274: ptr = strchr (tempname, ' ');
! 275: if (ptr)
! 276: *ptr = EOS;
! 277:
! 278: /*
! 279: ** the following block to take into account the BASE element.
! 280: ** This is not very optimized, as this procedure is repeated for
! 281: ** each element which is retrieved. A better way would be to
! 282: ** move this higher up in the function call hierarchy.
! 283: */
! 284: if (IsValidNormalizeURL (tempname) && doc)
! 285: {
! 286: length = MAX_LENGTH;
! 287: /* get the root element */
! 288: el = TtaGetMainRoot (doc);
! 289:
! 290: /* search the BASE element */
! 291: elType.ElSSchema = TtaGetDocumentSSchema (doc);
! 292: elType.ElTypeNum = HTML_EL_BASE;
! 293: el = TtaSearchTypedElement (elType, SearchInTree, el);
! 294: if (el)
! 295: {
! 296: /*
! 297: ** The document has a BASE element
! 298: ** Get the HREF attribute of the BASE Element
! 299: */
! 300: attrType.AttrSSchema = elType.ElSSchema;
! 301: attrType.AttrTypeNum = HTML_ATTR_HREF_;
! 302: attrHREF = TtaGetAttribute (el, attrType);
! 303: if (attrHREF)
! 304: {
! 305: /*
! 306: ** Use the base path of the document
! 307: ** To do: verify length of the buffer
! 308: ** length > TtaGetTextAttributeLength (attrHREF) + strlen (orgName)
! 309: */
! 310: TtaGiveTextAttributeValue (attrHREF, basename, &length);
! 311:
! 312: /*
! 313: ** base and orgName have to be separated by a DIR_SEP
! 314: */
! 315: if (basename[strlen (basename) - 1] != DIR_SEP && tempname[0] != DIR_SEP)
! 316: strcat (basename, DIR_STR);
! 317: }
! 318: }
! 319: else
! 320: basename[0] = EOS;
! 321: }
! 322: else
! 323: basename[0] = EOS;
! 324:
! 325: if (basename[0] == EOS)
! 326: {
! 327: /*
! 328: ** There is no BASE element in that document.
! 329: ** A temporary fix as TtaExtractName does not tolerate a name
! 330: ** ending in /. Here, we reinsert the slash, in order to
! 331: ** parse the name in the following two lines. A bit
! 332: ** redundant and has to be reviewed.
! 333: */
! 334: if (DocumentURLs[(int) doc])
! 335: {
! 336: basename_ptr = HTParse (DocumentURLs[(int) doc], "", PARSE_ALL);
! 337: basename_flag = TRUE;
! 338: }
! 339: else
! 340: {
! 341: basename_ptr = "";
! 342: basename_flag = FALSE;
! 343: }
! 344: }
! 345: else
! 346: {
! 347: basename_ptr = HTParse (basename, "", PARSE_ALL);
! 348: basename_flag = TRUE;
! 349: } /* if-else tempname */
! 350:
! 351: ptr = HTParse (tempname, basename_ptr, PARSE_ALL);
! 352: if (basename_flag)
! 353: HT_FREE (basename_ptr);
! 354: if (ptr)
! 355: {
! 356: ptr = HTSimplify (&ptr);
! 357: strcpy (newName, ptr);
! 358: HT_FREE (ptr);
! 359: }
! 360: else
! 361: newName[0] = EOS;
! 362:
! 363: i = strlen (newName) - 1;
! 364: if (i > 0)
! 365: {
! 366: /*
! 367: ** A temporary fix for an interfacing problem:
! 368: ** TtaExtractName does not tolerate url's finished on DIR_SEP
! 369: */
! 370: ptr = strrchr (newName, DIR_SEP);
! 371: if (ptr)
! 372: ptr++;
! 373: if (ptr && *ptr != EOS)
! 374: strcpy (docName, ptr);
! 375: else
! 376: /*
! 377: ** The docname was not comprised inside the URL, so let's
! 378: ** assign a "noname.html" name :)
! 379: */
! 380: strcpy (docName, "noname.html");
! 381:
! 382: /*
! 383: ** A temporary fix for an interfacing problem:
! 384: ** TtaExtractName does not tolerate url's finished on DIR_SEP
! 385: */
! 386: if (newName[i] == DIR_SEP)
! 387: newName[i] = EOS;
! 388: }
! 389: }
! 390:
! 391: /*+--------------------------------------------------------------------+ */
! 392: /*| IsSameHost | */
! 393: /*+--------------------------------------------------------------------+ */
! 394:
! 395: #ifdef __STDC__
! 396: boolean IsSameHost (char *url1, char *url2)
! 397: #else /* __STDC__ */
! 398: boolean IsSameHost (url1, url2)
! 399: char *path;
! 400:
! 401: #endif /* __STDC__ */
! 402: {
! 403: char *basename_ptr1, *basename_ptr2;
! 404: boolean result;
! 405:
! 406: basename_ptr1 = HTParse(url1, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
! 407: basename_ptr2 = HTParse(url2, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
! 408:
! 409: if (strcmp (basename_ptr1, basename_ptr2))
! 410: result = NO;
! 411: else
! 412: result = YES;
! 413:
! 414: HT_FREE(basename_ptr1);
! 415: HT_FREE(basename_ptr2);
! 416:
! 417: return(result);
! 418: }
! 419:
! 420:
! 421: /*+--------------------------------------------------------------------+ */
! 422: /*| AHTMakeRelativeURL | */
! 423: /*+--------------------------------------------------------------------+ */
! 424:
! 425: #ifdef __STDC__
! 426: char *AHTMakeRelativeName(char *url, char *base_url)
! 427: #else /* __STDC__ */
! 428: char *AHTMakeRelativeName(url, base_url)
! 429: char url;
! 430: char base_url;
! 431: #endif /* __STDC__ */
! 432: {
! 433: char *base_ptr, *url_ptr;
! 434: char *result;
! 435:
! 436: /* verify if we are in the same host */
! 437:
! 438: base_ptr = HTParse(base_url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
! 439: url_ptr = HTParse(url, "", PARSE_ACCESS | PARSE_HOST | PARSE_PUNCTUATION);
! 440:
! 441: if (!strcmp (base_ptr, url_ptr)) {
! 442: HT_FREE(base_ptr);
! 443: HT_FREE(url_ptr);
! 444:
! 445: /* Normalize the URLs */
! 446:
! 447: base_ptr = HTParse(base_url, "", PARSE_ALL);
! 448: url_ptr = HTParse(url, "", PARSE_ALL);
! 449:
! 450: /* Use libwww to make relative name */
! 451:
! 452: result = HTRelative(url_ptr, base_ptr);
! 453: HT_FREE(base_ptr);
! 454: HT_FREE(url_ptr);
! 455: }
! 456: else
! 457: result = (char *) NULL;
! 458:
! 459: return(result);
! 460: }
! 461:
! 462:
! 463:
! 464:
! 465:
! 466:
! 467:
Webmaster