Return to AHTURLTools.c CVS log | Up to [Public] / Amaya / amaya |
1.7 cvs 1: /* 2: * 3: * (c) COPYRIGHT MIT and INRIA, 1996. 4: * Please first read the full copyright statement in file COPYRIGHT. 5: * 6: */ 1.9 cvs 7: 1.10 cvs 8: /* 9: * AHTURLTools.c: contains all the functions for testing, manipulating, 1.25 cvs 10: * and normalizing URLs. It also contains a local copy of the libWWW 11: * URL parsing functions. 1.10 cvs 12: * 13: * Authors: J. Kahan, I. Vatton 1.87 cvs 14: * R. Guetari: Windows & Unicode. 1.10 cvs 15: * 16: */ 1.7 cvs 17: 1.15 cvs 18: #define THOT_EXPORT extern 1.3 cvs 19: #include "amaya.h" 20: 1.8 cvs 21: #include "init_f.h" 22: #include "AHTURLTools_f.h" 23: 1.24 cvs 24: #define MAX_PRINT_URL_LENGTH 50 1.29 cvs 25: typedef struct _HTURI { 1.67 cvs 26: STRING access; /* Now known as "scheme" */ 27: STRING host; 28: STRING absolute; 29: STRING relative; 30: STRING fragment; 1.29 cvs 31: } HTURI; 1.24 cvs 32: 1.28 cvs 33: 34: /*---------------------------------------------------------------------- 35: ConvertToLowerCase 36: Converts a string to lowercase. 37: ----------------------------------------------------------------------*/ 1.22 cvs 38: #ifdef __STDC__ 1.67 cvs 39: void ConvertToLowerCase (STRING string) 1.28 cvs 40: #else /* __STDC__ */ 1.38 cvs 41: void ConvertToLowerCase (string) 1.67 cvs 42: STRING string; 1.28 cvs 43: 44: #endif /* __STDC__ */ 45: { 46: int i; 1.93 cvs 47: 1.28 cvs 48: if (!string) 49: return; 50: 1.87 cvs 51: for (i = 0; string[i] != WC_EOS; i++) 1.67 cvs 52: string[i] = utolower (string[i]); 1.28 cvs 53: } 1.22 cvs 54: 1.8 cvs 55: /*---------------------------------------------------------------------- 1.75 cvs 56: EscapeChar 57: writes the equivalent escape code of a char in a string 58: ----------------------------------------------------------------------*/ 59: #ifdef __STDC__ 60: void EscapeChar (STRING string, UCHAR_T c) 61: #else 62: void EscapeChar (string, c) 63: STRING string; 64: UCHAR_T c; 65: 66: #endif 67: { 68: c &= 0xFF; /* strange behavior under solaris? */ 69: usprintf (string, TEXT("%02x"), (unsigned int) c); 70: } 71: 72: /*---------------------------------------------------------------------- 1.96 cvs 73: UnEscapeChar 74: writes the equivalent hex code to a %xx coded char 75: ----------------------------------------------------------------------*/ 76: #ifdef __STDC__ 77: static CHAR_T UnEscapeChar (CHAR_T c) 78: #else 79: static CHAR_T UnEscapeChar (c) 80: CHAR_T c; 81: #endif 82: { 83: return c >= TEXT('0') && c <= TEXT('9') ? c - TEXT('0') 84: : c >= TEXT('A') && c <= TEXT('F') ? c - TEXT('A') + 10 85: : c - TEXT('a') + 10; /* accept small letters just in case */ 86: } 87: 88: /*---------------------------------------------------------------------- 1.75 cvs 89: EscapeURL 90: Takes a URL and escapes all protected chars into 91: %xx sequences. Also, removes any leading white spaces 92: Returns either NULL or a new buffer, which must be freed by the caller 93: ----------------------------------------------------------------------*/ 94: #ifdef __STDC__ 95: STRING EscapeURL (const STRING url) 96: #else 97: STRING EscapeURL (url) 98: STRING url; 99: #endif /* __STDC__ */ 100: { 101: STRING buffer; 102: int buffer_len; 103: int buffer_free_mem; 104: PCHAR_T ptr; 105: int new_chars; 106: void *status; 107: 108: if (url && *url) 109: { 1.76 cvs 110: buffer_free_mem = ustrlen (url) + 20; 111: buffer = TtaAllocString (buffer_free_mem + 1); 1.75 cvs 112: ptr = url; 113: buffer_len = 0; 114: 115: while (*ptr) 116: { 117: switch (*ptr) 118: { 119: /* put here below all the chars that need to 120: be escaped into %xx */ 1.81 cvs 121: case 0x27: /* & */ 122: case 0x20: /* space */ 1.75 cvs 123: new_chars = 3; 124: break; 125: 126: default: 127: new_chars = 1; 128: break; 129: } 130: 131: /* see if we need extra room in the buffer */ 132: if (new_chars > buffer_free_mem) 133: { 1.76 cvs 134: buffer_free_mem = 20; 1.75 cvs 135: status = TtaRealloc (buffer, sizeof (CHAR_T) 136: * (buffer_len + buffer_free_mem + 1)); 137: if (status) 138: buffer = (STRING) status; 139: else { 140: /* @@ maybe we should do some other behavior here, like 141: freeing the buffer and return a void thing */ 1.87 cvs 142: buffer[buffer_len] = WC_EOS; 1.75 cvs 143: break; 144: } 145: } 146: /* escape the char */ 147: if (new_chars == 3) 148: { 149: buffer[buffer_len] = TEXT('%'); 150: EscapeChar (&buffer[buffer_len+1], *ptr); 151: } 152: else 153: buffer[buffer_len] = *ptr; 154: 155: /* update the status */ 156: buffer_len += new_chars; 157: buffer_free_mem -= new_chars; 158: /* examine the next char */ 159: ptr++; 160: } 1.87 cvs 161: buffer[buffer_len] = WC_EOS; 1.75 cvs 162: } 1.76 cvs 163: else 164: buffer = NULL; 165: 1.75 cvs 166: return (buffer); 167: } 168: 169: 170: /*---------------------------------------------------------------------- 1.11 cvs 171: ExplodeURL 1.8 cvs 172: ----------------------------------------------------------------------*/ 173: #ifdef __STDC__ 174: void ExplodeURL (char *url, char **proto, char **host, char **dir, char **file) 175: #else 176: void ExplodeURL (url, proto, host, dir, file) 177: char *url; 178: char **proto; 179: char **host; 180: char **dir; 181: char **file; 182: 183: #endif 184: { 1.33 cvs 185: char *curr, *temp; 186: char used_sep; 1.32 cvs 187: 1.33 cvs 188: if (url && strchr (url, URL_SEP)) 189: used_sep = URL_SEP; 190: else 191: used_sep = DIR_SEP; 1.8 cvs 192: 193: if ((url == NULL) || (proto == NULL) || (host == NULL) || 194: (dir == NULL) || (file == NULL)) 195: return; 196: 197: /* initialize every pointer */ 198: *proto = *host = *dir = *file = NULL; 199: 200: /* skip any leading space */ 201: while ((*url == SPACE) || (*url == TAB)) 202: url++; 1.9 cvs 203: curr = url; 204: if (*curr == 0) 1.8 cvs 205: goto finished; 206: 207: /* go to the end of the URL */ 1.68 cvs 208: while ((*curr != EOS) && (*curr != SPACE) && (*curr != BSPACE) && 209: (*curr != __CR__) && (*curr != EOL)) 1.9 cvs 210: curr++; 1.8 cvs 211: 212: /* mark the end of the chain */ 1.9 cvs 213: *curr = EOS; 214: curr--; 215: if (curr <= url) 1.8 cvs 216: goto finished; 217: 218: /* search the next DIR_SEP indicating the beginning of the file name */ 219: do 1.11 cvs 220: curr--; 1.33 cvs 221: while ((curr >= url) && (*curr != used_sep)); 1.11 cvs 222: 1.9 cvs 223: if (curr < url) 1.8 cvs 224: goto finished; 1.9 cvs 225: *file = curr + 1; 1.8 cvs 226: 227: /* mark the end of the dir */ 1.9 cvs 228: *curr = EOS; 229: curr--; 230: if (curr < url) 1.8 cvs 231: goto finished; 232: 1.29 cvs 233: /* search for the DIR_STR indicating the host name start */ 1.33 cvs 234: while ((curr > url) && ((*curr != used_sep) || (*(curr + 1) != used_sep))) 1.9 cvs 235: curr--; 1.8 cvs 236: 237: /* if we found it, separate the host name from the directory */ 1.33 cvs 238: if ((*curr == DIR_SEP) && (*(curr + 1) == used_sep)) 1.8 cvs 239: { 1.9 cvs 240: *host = temp = curr + 2; 1.33 cvs 241: while ((*temp != 0) && (*temp != used_sep)) 1.8 cvs 242: temp++; 1.33 cvs 243: if (*temp == used_sep) 1.8 cvs 244: { 245: *temp = EOS; 246: *dir = temp + 1; 247: } 248: } 249: else 1.11 cvs 250: *dir = curr; 251: 1.9 cvs 252: if (curr <= url) 1.8 cvs 253: goto finished; 254: 255: /* mark the end of the proto */ 1.9 cvs 256: *curr = EOS; 257: curr--; 258: if (curr < url) 1.8 cvs 259: goto finished; 260: 1.67 cvs 261: if (*curr == TEXT(':')) 1.8 cvs 262: { 1.9 cvs 263: *curr = EOS; 264: curr--; 1.8 cvs 265: } 266: else 267: goto finished; 1.11 cvs 268: 1.9 cvs 269: if (curr < url) 1.8 cvs 270: goto finished; 1.9 cvs 271: while ((curr > url) && (isalpha (*curr))) 272: curr--; 273: *proto = curr; 1.8 cvs 274: 275: finished:; 276: 277: #ifdef AMAYA_DEBUG 278: fprintf (stderr, "ExplodeURL(%s)\n\t", url); 279: if (*proto) 280: fprintf (stderr, "proto : %s, ", *proto); 281: if (*host) 282: fprintf (stderr, "host : %s, ", *host); 283: if (*dir) 284: fprintf (stderr, "dir : %s, ", *dir); 285: if (*file) 286: fprintf (stderr, "file : %s ", *file); 287: fprintf (stderr, "\n"); 288: #endif 289: 290: } 1.3 cvs 291: 1.61 cvs 292: 293: /*---------------------------------------------------------------------- 294: ExtractSuffix extract suffix from document nane. 295: ----------------------------------------------------------------------*/ 296: #ifdef __STDC__ 1.91 cvs 297: void ExtractSuffix (CHAR_T* aName, CHAR_T* aSuffix) 1.61 cvs 298: #else 299: void ExtractSuffix (aName, aSuffix) 1.91 cvs 300: CHAR_T* aName; 301: CHAR_T* aSuffix; 1.61 cvs 302: 303: #endif 304: { 305: int lg, i; 306: STRING ptr, oldptr; 307: 308: if (!aSuffix || !aName) 309: /* bad suffix */ 310: return; 311: 1.87 cvs 312: aSuffix[0] = WC_EOS; 1.61 cvs 313: lg = ustrlen (aName); 314: if (lg) 315: { 316: /* the name is not empty */ 317: oldptr = ptr = &aName[0]; 318: do 319: { 1.67 cvs 320: ptr = ustrrchr (oldptr, TEXT('.')); 1.61 cvs 321: if (ptr) 322: oldptr = &ptr[1]; 323: } 324: while (ptr); 325: 326: i = (int) (oldptr) - (int) (aName); /* name length */ 327: if (i > 1) 328: { 1.87 cvs 329: aName[i - 1] = WC_EOS; 1.61 cvs 330: if (i != lg) 331: ustrcpy (aSuffix, oldptr); 332: } 333: } 334: } 335: 1.4 cvs 336: /*---------------------------------------------------------------------- 1.9 cvs 337: IsHTMLName 338: returns TRUE if path points to an HTML resource. 1.4 cvs 339: ----------------------------------------------------------------------*/ 1.3 cvs 340: #ifdef __STDC__ 1.84 cvs 341: ThotBool IsHTMLName (const CHAR_T* path) 1.3 cvs 342: #else /* __STDC__ */ 1.67 cvs 343: ThotBool IsHTMLName (path) 1.84 cvs 344: const CHAR_T* path; 1.3 cvs 345: #endif /* __STDC__ */ 346: { 1.84 cvs 347: CHAR_T temppath[MAX_LENGTH]; 348: CHAR_T suffix[MAX_LENGTH]; 349: CHAR_T nsuffix[MAX_LENGTH]; 350: int i; 1.5 cvs 351: 352: if (!path) 1.37 cvs 353: return (FALSE); 1.5 cvs 354: 1.84 cvs 355: ustrcpy (temppath, path); 1.5 cvs 356: ExtractSuffix (temppath, suffix); 1.91 cvs 357: /* while (suffix[0] != WC_EOS) { */ 358: i = 0; 359: while (suffix[i] != WC_EOS) { 360: /* Normalize the suffix */ 361: i = 0; 362: while (suffix[i] != WC_EOS && i < MAX_LENGTH -1) { 363: nsuffix[i] = utolower (suffix[i]); 364: i++; 365: } 366: nsuffix[i] = WC_EOS; 367: if (!ustrcmp (nsuffix, TEXT("html")) || 368: !ustrcmp (nsuffix, TEXT("htm")) || 369: !ustrcmp (nsuffix, TEXT("shtml")) || 370: !ustrcmp (nsuffix, TEXT("jsp")) || 371: !ustrcmp (nsuffix, TEXT("xht")) || 372: !ustrcmp (nsuffix, TEXT("xhtm")) || 373: !ustrcmp (nsuffix, TEXT("xhtml"))) 374: return (TRUE); 375: else if (!ustrcmp (nsuffix, TEXT("gz"))) { 376: /* take into account compressed files */ 377: ExtractSuffix (temppath, suffix); 378: /* Normalize the suffix */ 379: i = 0; 380: while (suffix[i] != WC_EOS && i < MAX_LENGTH -1) { 381: nsuffix[i] = utolower (suffix[i]); 382: i++; 383: } 384: nsuffix[i] = WC_EOS; 385: if (!ustrcmp (nsuffix, TEXT("html")) || 386: !ustrcmp (nsuffix, TEXT("htm")) || 387: !ustrcmp (nsuffix, TEXT("shtml")) || 388: !ustrcmp (nsuffix, TEXT("jsp")) || 389: !ustrcmp (nsuffix, TEXT("xht")) || 390: !ustrcmp (nsuffix, TEXT("xhtm")) || 391: !ustrcmp (nsuffix, TEXT("xhtml"))) 392: return (TRUE); 393: else 394: return (FALSE); 395: } else 396: /* check if there is another suffix */ 397: ExtractSuffix (temppath, suffix); 398: } 1.88 cvs 399: 400: return (FALSE); 1.3 cvs 401: } 402: 1.4 cvs 403: /*---------------------------------------------------------------------- 1.56 cvs 404: IsXMLName 405: returns TRUE if path points to an XML resource. 406: ----------------------------------------------------------------------*/ 407: #ifdef __STDC__ 1.67 cvs 408: ThotBool IsXMLName (const STRING path) 1.56 cvs 409: #else /* __STDC__ */ 1.67 cvs 410: ThotBool IsXMLName (path) 411: const STRING path; 1.56 cvs 412: #endif /* __STDC__ */ 413: { 1.67 cvs 414: CHAR_T temppath[MAX_LENGTH]; 415: CHAR_T suffix[MAX_LENGTH]; 1.56 cvs 416: 417: if (!path) 418: return (FALSE); 419: 1.67 cvs 420: ustrcpy (temppath, path); 1.56 cvs 421: ExtractSuffix (temppath, suffix); 422: 1.67 cvs 423: if (!ustrcasecmp (suffix, TEXT("xml")) || 424: !ustrcasecmp (suffix, TEXT("xht")) || 425: !ustrcmp (suffix, TEXT("xhtm")) || 426: !ustrcmp (suffix, TEXT("xhtml"))) 1.56 cvs 427: return (TRUE); 1.67 cvs 428: else if (!ustrcmp (suffix, TEXT("gz"))) 1.56 cvs 429: { 430: /* take into account compressed files */ 431: ExtractSuffix (temppath, suffix); 1.67 cvs 432: if (!ustrcasecmp (suffix, TEXT("xml")) || 433: !ustrcasecmp (suffix, TEXT("xht")) || 434: !ustrcmp (suffix, TEXT("xhtm")) || 435: !ustrcmp (suffix, TEXT("xhtml"))) 1.60 cvs 436: return (TRUE); 437: else 438: return (FALSE); 439: } 440: else 441: return (FALSE); 442: } 443: 444: /*---------------------------------------------------------------------- 445: IsCSSName 446: returns TRUE if path points to an XML resource. 447: ----------------------------------------------------------------------*/ 448: #ifdef __STDC__ 1.67 cvs 449: ThotBool IsCSSName (const STRING path) 1.60 cvs 450: #else /* __STDC__ */ 1.67 cvs 451: ThotBool IsCSSName (path) 452: const STRING path; 1.60 cvs 453: #endif /* __STDC__ */ 454: { 1.67 cvs 455: CHAR_T temppath[MAX_LENGTH]; 456: CHAR_T suffix[MAX_LENGTH]; 1.60 cvs 457: 458: if (!path) 459: return (FALSE); 460: 1.67 cvs 461: ustrcpy (temppath, path); 1.60 cvs 462: ExtractSuffix (temppath, suffix); 463: 1.67 cvs 464: if (!ustrcasecmp (suffix, TEXT("css"))) 1.60 cvs 465: return (TRUE); 1.67 cvs 466: else if (!ustrcmp (suffix, TEXT("gz"))) 1.60 cvs 467: { 468: /* take into account compressed files */ 469: ExtractSuffix (temppath, suffix); 1.67 cvs 470: if (!ustrcasecmp (suffix, TEXT("css"))) 1.56 cvs 471: return (TRUE); 472: else 473: return (FALSE); 474: } 475: else 476: return (FALSE); 477: } 478: 479: /*---------------------------------------------------------------------- 1.9 cvs 480: IsImageName 481: returns TRUE if path points to an image resource. 1.4 cvs 482: ----------------------------------------------------------------------*/ 1.3 cvs 483: #ifdef __STDC__ 1.67 cvs 484: ThotBool IsImageName (const STRING path) 1.3 cvs 485: #else /* __STDC__ */ 1.67 cvs 486: ThotBool IsImageName (path) 487: const STRING path; 1.3 cvs 488: #endif /* __STDC__ */ 489: { 1.67 cvs 490: CHAR_T temppath[MAX_LENGTH]; 491: CHAR_T suffix[MAX_LENGTH]; 492: CHAR_T nsuffix[MAX_LENGTH]; 1.5 cvs 493: int i; 494: 495: if (!path) 1.13 cvs 496: return (FALSE); 1.5 cvs 497: 1.67 cvs 498: ustrcpy (temppath, path); 1.5 cvs 499: ExtractSuffix (temppath, suffix); 500: 501: /* Normalize the suffix */ 502: i = 0; 1.87 cvs 503: while (suffix[i] != WC_EOS && i < MAX_LENGTH -1) 1.13 cvs 504: { 1.67 cvs 505: nsuffix[i] = utolower (suffix[i]); 1.13 cvs 506: i++; 507: } 1.87 cvs 508: nsuffix[i] = WC_EOS; 1.67 cvs 509: if ((!ustrcmp (nsuffix, TEXT("gif"))) || (!ustrcmp (nsuffix, TEXT("xbm"))) || 510: (!ustrcmp (nsuffix, TEXT("xpm"))) || (!ustrcmp (nsuffix, TEXT("jpg"))) || 511: (!ustrcmp (nsuffix, TEXT("png"))) || (!ustrcmp (nsuffix, TEXT("au")))) 1.39 cvs 512: return (TRUE); 513: return (FALSE); 1.3 cvs 514: } 515: 1.4 cvs 516: /*---------------------------------------------------------------------- 1.58 cvs 517: IsImageType 518: returns TRUE if type points to an image resource. 519: ----------------------------------------------------------------------*/ 520: #ifdef __STDC__ 1.67 cvs 521: ThotBool IsImageType (const STRING type) 1.58 cvs 522: #else /* __STDC__ */ 1.67 cvs 523: ThotBool IsImageType (type) 524: const STRING type; 1.58 cvs 525: #endif /* __STDC__ */ 526: { 1.67 cvs 527: CHAR_T temptype[MAX_LENGTH]; 1.58 cvs 528: int i; 529: 530: if (!type) 531: return (FALSE); 532: 1.67 cvs 533: ustrcpy (temptype, type); 1.58 cvs 534: /* Normalize the type */ 535: i = 0; 1.87 cvs 536: while (temptype[i] != WC_EOS) 1.58 cvs 537: { 538: temptype[i] = tolower (temptype[i]); 539: i++; 540: } 1.67 cvs 541: if ((!ustrcmp (temptype, TEXT("gif"))) || (!ustrcmp (temptype, TEXT("x-xbitmap"))) || 542: (!ustrcmp (temptype, TEXT("x-xpixmap"))) || (!ustrcmp (temptype, TEXT("jpeg"))) || 543: (!ustrcmp (temptype, TEXT("png")))) 1.58 cvs 544: return (TRUE); 545: return (FALSE); 546: } 547: 548: /*---------------------------------------------------------------------- 1.9 cvs 549: IsTextName 1.4 cvs 550: ----------------------------------------------------------------------*/ 1.3 cvs 551: #ifdef __STDC__ 1.67 cvs 552: ThotBool IsTextName (const STRING path) 1.3 cvs 553: #else /* __STDC__ */ 1.67 cvs 554: ThotBool IsTextName (path) 555: const STRING path; 1.3 cvs 556: 557: #endif /* __STDC__ */ 558: { 1.67 cvs 559: CHAR_T temppath[MAX_LENGTH]; 560: CHAR_T suffix[MAX_LENGTH]; 561: CHAR_T nsuffix[MAX_LENGTH]; 1.5 cvs 562: int i; 563: 564: if (!path) 1.13 cvs 565: return (FALSE); 1.5 cvs 566: 1.67 cvs 567: ustrcpy (temppath, path); 1.5 cvs 568: ExtractSuffix (temppath, suffix); 569: 570: /* Normalize the suffix */ 571: i = 0; 1.87 cvs 572: while (suffix[i] != WC_EOS && i < MAX_LENGTH -1) 1.5 cvs 573: { 1.25 cvs 574: nsuffix[i] = tolower (suffix[i]); 1.5 cvs 575: i++; 576: } 1.87 cvs 577: nsuffix[i] = WC_EOS; 1.5 cvs 578: 1.67 cvs 579: if ((!ustrcmp (nsuffix, TEXT("txt"))) || (!ustrcmp (nsuffix, TEXT("dtd")))) 1.13 cvs 580: return (TRUE); 1.67 cvs 581: else if (!ustrcmp (nsuffix, TEXT("gz"))) 1.13 cvs 582: { 1.39 cvs 583: /* take into account compressed files */ 1.13 cvs 584: ExtractSuffix (temppath, suffix); 585: /* Normalize the suffix */ 586: i = 0; 1.87 cvs 587: while (suffix[i] != WC_EOS && i < MAX_LENGTH -1) 1.13 cvs 588: { 1.25 cvs 589: nsuffix[i] = tolower (suffix[i]); 1.13 cvs 590: i++; 591: } 1.87 cvs 592: nsuffix[i] = WC_EOS; 1.67 cvs 593: if ((!ustrcmp (nsuffix, TEXT("txt"))) || (!ustrcmp (nsuffix, TEXT("dtd")))) 1.13 cvs 594: return (TRUE); 595: else 596: return (FALSE); 597: } 598: else 599: return (FALSE); 1.3 cvs 600: } 601: 1.4 cvs 602: /*---------------------------------------------------------------------- 1.9 cvs 603: IsHTTPPath 604: returns TRUE if path is in fact an http URL. 1.4 cvs 605: ----------------------------------------------------------------------*/ 1.3 cvs 606: #ifdef __STDC__ 1.67 cvs 607: ThotBool IsHTTPPath (const STRING path) 1.3 cvs 608: #else /* __STDC__ */ 1.67 cvs 609: ThotBool IsHTTPPath (path) 610: const STRING path; 1.3 cvs 611: #endif /* __STDC__ */ 612: { 1.5 cvs 613: if (!path) 614: return FALSE; 1.3 cvs 615: 1.67 cvs 616: if ((!ustrncmp (path, TEXT("http:"), 5) != 0) 617: || !ustrncmp (path, TEXT("internal:"), 9)) 1.58 cvs 618: return TRUE; 619: return FALSE; 1.3 cvs 620: } 621: 1.4 cvs 622: /*---------------------------------------------------------------------- 1.9 cvs 623: IsWithParameters 624: returns TRUE if url has a concatenated query string. 1.4 cvs 625: ----------------------------------------------------------------------*/ 1.3 cvs 626: #ifdef __STDC__ 1.66 cvs 627: ThotBool IsWithParameters (const char *url) 1.3 cvs 628: #else /* __STDC__ */ 1.66 cvs 629: ThotBool IsWithParameters (url) 1.34 cvs 630: const char *url; 1.3 cvs 631: #endif /* __STDC__ */ 632: { 1.5 cvs 633: int i; 1.3 cvs 634: 1.9 cvs 635: if ((!url) || (url[0] == EOS)) 1.5 cvs 636: return FALSE; 1.3 cvs 637: 1.9 cvs 638: i = strlen (url) - 1; 639: while (i > 0 && url[i--] != '?') 1.5 cvs 640: if (i < 0) 641: return FALSE; 1.3 cvs 642: 1.5 cvs 643: /* There is a parameter */ 644: return TRUE; 1.3 cvs 645: } 646: 1.4 cvs 647: /*---------------------------------------------------------------------- 1.9 cvs 648: IsW3Path 649: returns TRUE if path is in fact a URL. 1.4 cvs 650: ----------------------------------------------------------------------*/ 1.3 cvs 651: #ifdef __STDC__ 1.84 cvs 652: ThotBool IsW3Path (const CHAR_T* path) 1.3 cvs 653: #else /* __STDC__ */ 1.67 cvs 654: ThotBool IsW3Path (path) 1.84 cvs 655: const CHAR_T* path; 1.3 cvs 656: #endif /* __STDC__ */ 657: { 1.84 cvs 658: if (ustrncmp (path, TEXT("http:"), 5) && 659: ustrncmp (path, TEXT("ftp:"), 4) && 660: ustrncmp (path, TEXT("telnet:"), 7) && 661: ustrncmp (path, TEXT("wais:"), 5) && 662: ustrncmp (path, TEXT("news:"), 5) && 663: ustrncmp (path, TEXT("gopher:"), 7) && 664: ustrncmp (path, TEXT("mailto:"), 7) && 665: ustrncmp (path, TEXT("archie:"), 7)) 1.72 cvs 666: return FALSE; 667: return TRUE; 1.3 cvs 668: } 669: 1.4 cvs 670: /*---------------------------------------------------------------------- 1.90 cvs 671: IsFilePath 672: returns TRUE if path is in fact a URL. 673: ----------------------------------------------------------------------*/ 674: #ifdef __STDC__ 675: static ThotBool IsFilePath (const CHAR_T* path) 676: #else /* __STDC__ */ 677: static ThotBool IsFilePath (path) 678: const CHAR_T* path; 679: #endif /* __STDC__ */ 680: { 681: if (ustrncmp (path, TEXT("file:"), 5)) 682: return FALSE; 683: return TRUE; 684: } 685: 686: /*---------------------------------------------------------------------- 1.9 cvs 687: IsValidProtocol 688: returns true if the url protocol is supported by Amaya. 1.4 cvs 689: ----------------------------------------------------------------------*/ 1.3 cvs 690: #ifdef __STDC__ 1.67 cvs 691: ThotBool IsValidProtocol (const STRING url) 1.3 cvs 692: #else /* __STDC__ */ 1.67 cvs 693: ThotBool IsValidProtocol (url) 694: const STRING url; 1.3 cvs 695: #endif /* __STDC__ */ 696: { 1.67 cvs 697: if (!ustrncmp (url, TEXT("http:"), 5) 1.69 cvs 698: || !ustrncmp (url, TEXT("internal:"), 9) 1.70 cvs 699: || !ustrncmp (url, TEXT("ftp:"), 4)) 1.22 cvs 700: /* experimental */ 1.58 cvs 701: /*** || !strncmp (url, "ftp:", 4) ***/ 1.24 cvs 702: /*** || !strncmp (path, "news:", 5)***/ 1.8 cvs 703: return (TRUE); 1.5 cvs 704: else 1.8 cvs 705: return (FALSE); 1.3 cvs 706: } 707: 1.31 cvs 708: 709: /*---------------------------------------------------------------------- 710: GetBaseURL 711: normalizes orgName according to a base associated with doc, and 712: following the standard URL format rules. 713: The function returns the base used to solve relative URL and SRC: 714: - the base of the document, 715: - or the document path (without document name). 716: ----------------------------------------------------------------------*/ 717: #ifdef __STDC__ 1.84 cvs 718: CHAR_T* GetBaseURL (Document doc) 1.31 cvs 719: #else /* __STDC__ */ 1.84 cvs 720: CHAR_T* GetBaseURL (doc) 1.31 cvs 721: Document doc; 722: #endif /* __STDC__ */ 723: { 724: Element el; 725: ElementType elType; 726: AttributeType attrType; 727: Attribute attr; 1.84 cvs 728: CHAR_T *ptr, *basename; 1.31 cvs 729: int length; 730: 1.57 cvs 731: /* @@@ irene */ 732: if (!DocumentURLs[doc]) 733: return NULL; 1.67 cvs 734: basename = TtaAllocString (MAX_LENGTH); 735: ustrncpy (basename, DocumentURLs[doc], MAX_LENGTH-1); 1.84 cvs 736: basename[MAX_LENGTH-1] = WC_EOS; 1.31 cvs 737: length = MAX_LENGTH -1; 738: /* get the root element */ 739: el = TtaGetMainRoot (doc); 740: /* search the BASE element */ 741: elType.ElSSchema = TtaGetDocumentSSchema (doc); 1.65 cvs 742: elType.ElTypeNum = HTML_EL_HEAD; 743: el = TtaSearchTypedElement (elType, SearchForward, el); 744: if (el) 745: { 746: elType.ElTypeNum = HTML_EL_BASE; 747: el = TtaSearchTypedElement (elType, SearchInTree, el); 748: } 1.31 cvs 749: if (el) 750: { 751: /* The document has a BASE element -> Get the HREF attribute */ 752: attrType.AttrSSchema = elType.ElSSchema; 753: attrType.AttrTypeNum = HTML_ATTR_HREF_; 754: attr = TtaGetAttribute (el, attrType); 755: if (attr) 756: { 757: /* Use the base path of the document */ 758: TtaGiveTextAttributeValue (attr, basename, &length); 759: /* base and orgName have to be separated by a DIR_SEP */ 760: length--; 1.84 cvs 761: if (basename[0] != WC_EOS && basename[length] != WC_URL_SEP && basename[length] != WC_DIR_SEP) 1.31 cvs 762: /* verify if the base has the form "protocol://server:port" */ 763: { 1.84 cvs 764: ptr = AmayaParseUrl (basename, TEXT(""), AMAYA_PARSE_ACCESS | 1.33 cvs 765: AMAYA_PARSE_HOST | 766: AMAYA_PARSE_PUNCTUATION); 1.67 cvs 767: if (ptr && !ustrcmp (ptr, basename)) 1.31 cvs 768: { 1.43 cvs 769: /* it has this form, we complete it by adding a URL_STR */ 1.84 cvs 770: if (ustrchr (basename, WC_DIR_SEP)) 771: ustrcat (basename, WC_DIR_STR); 1.43 cvs 772: else 1.84 cvs 773: ustrcat (basename, WC_URL_STR); 1.31 cvs 774: length++; 775: } 776: if (ptr) 777: TtaFreeMemory (ptr); 778: } 779: } 1.33 cvs 780: } 781: 1.31 cvs 782: /* Remove anything after the last DIR_SEP char. If no such char is found, 783: * then search for the first ":" char, hoping that what's before that is a 784: * protocol. If found, end the string there. If neither char is found, 785: * then discard the whole base element. 786: */ 1.67 cvs 787: length = ustrlen (basename) - 1; 1.31 cvs 788: /* search for the last DIR_SEP char */ 1.84 cvs 789: while (length >= 0 && basename[length] != WC_URL_SEP && basename[length] != WC_DIR_SEP) 1.31 cvs 790: length--; 791: if (length >= 0) 792: /* found the last DIR_SEP char, end the string there */ 1.84 cvs 793: basename[length + 1] = WC_EOS; 1.31 cvs 794: else 795: /* search for the first PATH_STR char */ 796: { 1.67 cvs 797: for (length = 0; basename[length] != TEXT(':') && 1.84 cvs 798: basename[length] != WC_EOS; length ++); 1.67 cvs 799: if (basename[length] == TEXT(':')) 1.31 cvs 800: /* found, so end the string there */ 1.84 cvs 801: basename[length + 1] = WC_EOS; 1.31 cvs 802: else 803: /* not found, discard the base */ 1.84 cvs 804: basename[0] = WC_EOS; 1.31 cvs 805: } 806: return (basename); 807: } 808: 809: 1.4 cvs 810: /*---------------------------------------------------------------------- 1.40 cvs 811: GetLocalPath 812: Allocate and return the local document path associated to the url 813: ----------------------------------------------------------------------*/ 814: #ifdef __STDC__ 1.84 cvs 815: CHAR_T* GetLocalPath (Document doc, CHAR_T* url) 1.40 cvs 816: #else /* __STDC__ */ 1.84 cvs 817: CHAR_T* GetLocalPath (doc, url) 1.40 cvs 818: Document doc; 1.84 cvs 819: CHAR_T* url; 1.40 cvs 820: #endif /* __STDC__ */ 821: { 1.84 cvs 822: CHAR_T* ptr; 823: CHAR_T* n; 824: CHAR_T* documentname; 825: CHAR_T url_sep; 1.83 cvs 826: int len; 1.67 cvs 827: ThotBool noFile; 1.40 cvs 828: 829: if (url != NULL) 830: { 831: /* check whether the file name exists */ 1.84 cvs 832: len = ustrlen (url) - 1; 1.71 cvs 833: if (IsW3Path (url)) 1.84 cvs 834: url_sep = TEXT('/'); 1.41 cvs 835: else 1.84 cvs 836: url_sep = WC_DIR_SEP; 1.41 cvs 837: noFile = (url[len] == url_sep); 1.40 cvs 838: if (noFile) 1.84 cvs 839: url[len] = WC_EOS; 840: ptr = TtaAllocString (MAX_LENGTH); 841: documentname = TtaAllocString (MAX_LENGTH); 1.78 cvs 842: TtaExtractName (url, ptr, documentname); 1.84 cvs 843: usprintf (ptr, TEXT("%s%s%d%s"), TempFileDirectory, WC_DIR_STR, doc, WC_DIR_STR); 1.40 cvs 844: if (!TtaCheckDirectory (ptr)) 845: /* directory did not exist */ 1.72 cvs 846: TtaMakeDirectory (ptr); 1.47 cvs 847: 848: /* don't include the query string within document name */ 1.84 cvs 849: n = ustrrchr (documentname, TEXT('?')); 1.47 cvs 850: if (n != NULL) 1.84 cvs 851: *n = WC_EOS; 1.46 cvs 852: /* don't include ':' within document name */ 1.84 cvs 853: n = ustrchr (documentname, TEXT(':')); 1.46 cvs 854: if (n != NULL) 1.84 cvs 855: *n = WC_EOS; 1.69 cvs 856: /* if after all this operations document name 857: is empty, let's use noname.html instead */ 1.84 cvs 858: if (documentname[0] == WC_EOS) 859: ustrcat (ptr, TEXT("noname.html")); 1.69 cvs 860: else 1.84 cvs 861: ustrcat (ptr, documentname); 1.40 cvs 862: TtaFreeMemory (documentname); 863: /* restore the url */ 864: if (noFile) 1.41 cvs 865: url[len] = url_sep; 1.40 cvs 866: return (ptr); 867: } 868: else 869: return (NULL); 870: } 871: 1.73 cvs 872: /*---------------------------------------------------------------------- 1.79 cvs 873: ExtractTarget extract the target name from document nane. 874: ----------------------------------------------------------------------*/ 875: #ifdef __STDC__ 1.84 cvs 876: void ExtractTarget (CHAR_T* aName, CHAR_T* target) 1.79 cvs 877: #else 878: void ExtractTarget (aName, target) 1.84 cvs 879: CHAR_T* aName; 880: CHAR_T* target; 1.79 cvs 881: #endif 882: { 1.82 cvs 883: int lg, i; 1.84 cvs 884: CHAR_T* ptr; 885: CHAR_T* oldptr; 1.79 cvs 886: 887: if (!target || !aName) 888: /* bad target */ 889: return; 890: 1.84 cvs 891: target[0] = WC_EOS; 892: lg = ustrlen (aName); 1.79 cvs 893: if (lg) 894: { 895: /* the name is not empty */ 896: oldptr = ptr = &aName[0]; 897: do 898: { 1.84 cvs 899: ptr = ustrrchr (oldptr, TEXT('#')); 1.79 cvs 900: if (ptr) 901: oldptr = &ptr[1]; 902: } 903: while (ptr); 904: 905: i = (int) (oldptr) - (int) (aName); /* name length */ 906: if (i > 1) 907: { 1.84 cvs 908: aName[i - 1] = WC_EOS; 1.79 cvs 909: if (i != lg) 1.84 cvs 910: ustrcpy (target, oldptr); 1.79 cvs 911: } 912: } 913: } 914: 915: /*---------------------------------------------------------------------- 1.90 cvs 916: RemoveNewLines (text) 917: Removes any '\n' chars that are found in text. 918: Returns TRUE if it did the operation, FALSE otherwise. 1.73 cvs 919: ----------------------------------------------------------------------*/ 920: #ifdef __STDC__ 1.90 cvs 921: ThotBool RemoveNewLines (CHAR_T *text) 1.73 cvs 922: #else 1.90 cvs 923: ThotBool RemoveNewLines (text) 924: CHAR_T *text; 925: 1.73 cvs 926: #endif /* __STDC__ */ 927: { 1.90 cvs 928: ThotBool change = FALSE; 929: CHAR_T *src; 930: CHAR_T *dest; 931: 932: src = text; 933: dest = text; 934: 935: while (*src) 936: { 937: switch (*src) 938: { 939: case TEXT('\n'): 940: /* don't copy the newline */ 941: change = 1; 942: break; 943: default: 944: *dest = *src; 945: dest++; 946: break; 947: } 948: src++; 949: } 950: /* copy the last EOS char */ 951: *dest = *src; 952: 953: return (change); 954: } 955: 956: /*---------------------------------------------------------------------- 957: CleanCopyFileURL 958: Copies a file url from a src string to destination string. 1.97 ! cvs 959: convertion says which type of convertion (none, %xx, URL_SEP into DIR_SEP ! 960: we want to do). 1.90 cvs 961: ----------------------------------------------------------------------*/ 962: #ifdef __STDC__ 1.97 ! cvs 963: static void CleanCopyFileURL (CHAR_T *dest, CHAR_T *src, ConvertionType convertion) 1.90 cvs 964: #else 1.97 ! cvs 965: static void CleanCopyFileURL (dest, src, convertion) 1.90 cvs 966: CHAR_T* dest; 967: CHAR_T* src; 1.97 ! cvs 968: ConvertionType convertion; 1.89 cvs 969: 1.90 cvs 970: #endif /* __STDC__ */ 971: { 972: while (*src) 1.89 cvs 973: { 1.90 cvs 974: switch (*src) 1.89 cvs 975: { 976: #ifdef _WINDOWS 1.90 cvs 977: case WC_URL_SEP: 1.96 cvs 978: /* make DIR_SEP transformation */ 1.97 ! cvs 979: if (convertion & AM_CONV_URL_SEP) 1.96 cvs 980: *dest = WC_DIR_SEP; 981: else 982: *dest = *src; 1.90 cvs 983: dest++; 1.96 cvs 984: src++; 1.90 cvs 985: break; 1.89 cvs 986: #endif /* _WINDOWS */ 1.96 cvs 987: 988: case TEXT('%'): 1.97 ! cvs 989: if (convertion & AM_CONV_PERCENT) 1.96 cvs 990: { 1.97 ! cvs 991: /* (code adapted from libwww's HTUnEscape function */ 1.96 cvs 992: src++; 1.97 ! cvs 993: if (*src != WC_EOS) ! 994: { ! 995: *dest = UnEscapeChar (*src) * 16; ! 996: src++; ! 997: } ! 998: if (*src != WC_EOS) ! 999: { ! 1000: *dest = *dest + UnEscapeChar (*src); ! 1001: src++; ! 1002: } ! 1003: dest++; 1.96 cvs 1004: } 1.97 ! cvs 1005: else 1.96 cvs 1006: { 1.97 ! cvs 1007: *dest = *src; ! 1008: dest++; 1.96 cvs 1009: src++; 1010: } 1011: break; 1012: 1.90 cvs 1013: default: 1014: *dest = *src; 1.89 cvs 1015: dest++; 1.96 cvs 1016: src++; 1.90 cvs 1017: break; 1.89 cvs 1018: } 1019: } 1.90 cvs 1020: /* copy the EOS char */ 1021: *dest = *src; 1.73 cvs 1022: } 1.40 cvs 1023: 1024: /*---------------------------------------------------------------------- 1.9 cvs 1025: NormalizeURL 1026: normalizes orgName according to a base associated with doc, and 1027: following the standard URL format rules. 1.53 cvs 1028: if doc is 0 and otherPath not NULL, normalizes orgName according to this 1029: other path. 1.9 cvs 1030: The function returns the new complete and normalized URL 1.12 cvs 1031: or file name path (newName) and the name of the document (docName). 1.9 cvs 1032: N.B. If the function can't find out what's the docName, it assigns 1033: the name "noname.html". 1.4 cvs 1034: ----------------------------------------------------------------------*/ 1.3 cvs 1035: #ifdef __STDC__ 1.84 cvs 1036: void NormalizeURL (CHAR_T* orgName, Document doc, CHAR_T* newName, CHAR_T* docName, CHAR_T* otherPath) 1.3 cvs 1037: #else /* __STDC__ */ 1.53 cvs 1038: void NormalizeURL (orgName, doc, newName, docName, otherPath) 1.84 cvs 1039: CHAR_T* orgName; 1.3 cvs 1040: Document doc; 1.84 cvs 1041: CHAR_T* newName; 1042: CHAR_T* docName; 1043: CHAR_T* otherPath; 1.3 cvs 1044: #endif /* __STDC__ */ 1045: { 1.84 cvs 1046: CHAR_T* basename; 1047: CHAR_T tempOrgName[MAX_LENGTH]; 1048: CHAR_T* ptr; 1049: CHAR_T used_sep; 1050: int length; 1051: ThotBool check; 1.5 cvs 1052: 1.44 cvs 1053: # ifdef _WINDOWS 1054: int ndx; 1055: # endif /* _WINDOWS */ 1056: 1.5 cvs 1057: if (!newName || !docName) 1058: return; 1.18 cvs 1059: 1.32 cvs 1060: if (doc != 0) 1.53 cvs 1061: basename = GetBaseURL (doc); 1062: else if (otherPath != NULL) 1.84 cvs 1063: basename = TtaWCSdup (otherPath); 1.32 cvs 1064: else 1.53 cvs 1065: basename = NULL; 1.32 cvs 1066: 1.18 cvs 1067: /* 1.31 cvs 1068: * Clean orgName 1069: * Make sure we have a complete orgName, without any leading or trailing 1070: * white spaces, or trailinbg new lines 1071: */ 1.5 cvs 1072: ptr = orgName; 1.18 cvs 1073: /* skip leading white space and new line characters */ 1.84 cvs 1074: while ((*ptr == WC_SPACE || *ptr == WC_EOL) && *ptr++ != WC_EOS); 1075: ustrncpy (tempOrgName, ptr, MAX_LENGTH -1); 1076: tempOrgName[MAX_LENGTH -1] = WC_EOS; 1.18 cvs 1077: /* 1.31 cvs 1078: * Make orgName a complete URL 1079: * If the URL does not include a protocol, then try to calculate 1080: * one using the doc's base element (if it exists), 1081: */ 1.84 cvs 1082: if (tempOrgName[0] == WC_EOS) 1.53 cvs 1083: { 1.84 cvs 1084: newName[0] = WC_EOS; 1.53 cvs 1085: TtaFreeMemory (basename); 1086: return; 1087: } 1.49 cvs 1088: 1089: /* clean trailing white space */ 1.84 cvs 1090: length = ustrlen (tempOrgName) - 1; 1091: while (tempOrgName[length] == WC_SPACE && tempOrgName[length] == WC_EOL) 1.53 cvs 1092: { 1.84 cvs 1093: tempOrgName[length] = WC_EOS; 1.53 cvs 1094: length--; 1095: } 1.50 cvs 1096: 1.55 cvs 1097: /* remove extra dot (which dot???) */ 1098: /* ugly, but faster than a strcmp */ 1.84 cvs 1099: if (tempOrgName[length] == TEXT('.') 1100: && (length == 0 || tempOrgName[length-1] != TEXT('.'))) 1101: tempOrgName[length] = WC_EOS; 1.50 cvs 1102: 1.94 cvs 1103: if (IsW3Path (tempOrgName)) 1.53 cvs 1104: { 1105: /* the name is complete, go to the Sixth Step */ 1.84 cvs 1106: ustrcpy (newName, tempOrgName); 1.53 cvs 1107: SimplifyUrl (&newName); 1108: /* verify if the URL has the form "protocol://server:port" */ 1.84 cvs 1109: ptr = AmayaParseUrl (newName, TEXT(""), AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION); 1110: if (ptr && !ustrcmp (ptr, newName)) /* it has this form, we complete it by adding a DIR_STR */ 1111: ustrcat (newName, WC_URL_STR); 1.49 cvs 1112: 1.53 cvs 1113: if (ptr) 1.50 cvs 1114: TtaFreeMemory (ptr); 1.53 cvs 1115: } 1116: else if ( basename == NULL) 1117: /* the name is complete, go to the Sixth Step */ 1.84 cvs 1118: ustrcpy (newName, tempOrgName); 1.53 cvs 1119: else 1120: { 1.31 cvs 1121: /* Calculate the absolute URL, using the base or document URL */ 1.44 cvs 1122: # ifdef _WINDOWS 1.53 cvs 1123: if (!IsW3Path (basename)) 1124: { 1.67 cvs 1125: length = ustrlen (tempOrgName); 1.53 cvs 1126: for (ndx = 0; ndx < length; ndx++) 1.67 cvs 1127: if (tempOrgName [ndx] == TEXT('/')) 1128: tempOrgName [ndx] = TEXT('\\'); 1.53 cvs 1129: } 1.44 cvs 1130: # endif /* _WINDOWS */ 1.25 cvs 1131: ptr = AmayaParseUrl (tempOrgName, basename, AMAYA_PARSE_ALL); 1.53 cvs 1132: if (ptr) 1133: { 1134: SimplifyUrl (&ptr); 1.84 cvs 1135: ustrcpy (newName, ptr); 1.53 cvs 1136: TtaFreeMemory (ptr); 1137: } 1138: else 1.84 cvs 1139: newName[0] = WC_EOS; 1.53 cvs 1140: } 1.36 cvs 1141: 1142: TtaFreeMemory (basename); 1.18 cvs 1143: /* 1.31 cvs 1144: * Prepare the docname that will refer to this ressource in the 1145: * .amaya directory. If the new URL finishes on DIR_SEP, then use 1146: * noname.html as a default ressource name 1.18 cvs 1147: */ 1.84 cvs 1148: if (newName[0] != WC_EOS) 1.53 cvs 1149: { 1.84 cvs 1150: length = ustrlen (newName) - 1; 1151: if (newName[length] == WC_URL_SEP || newName[length] == WC_DIR_SEP) 1.53 cvs 1152: { 1153: used_sep = newName[length]; 1154: check = TRUE; 1155: while (check) 1156: { 1.50 cvs 1157: length--; 1158: while (length >= 0 && newName[length] != used_sep) 1.53 cvs 1159: length--; 1.84 cvs 1160: if (!ustrncmp (&newName[length+1], TEXT(".."), 2)) 1.53 cvs 1161: { 1.84 cvs 1162: newName[length+1] = WC_EOS; 1.53 cvs 1163: /* remove also previous directory */ 1164: length--; 1165: while (length >= 0 && newName[length] != used_sep) 1166: length--; 1.84 cvs 1167: if (ustrncmp (&newName[length+1], TEXT("//"), 2)) 1.53 cvs 1168: /* don't remove server name */ 1.84 cvs 1169: newName[length+1] = WC_EOS; 1.53 cvs 1170: } 1.84 cvs 1171: else if (!ustrncmp (&newName[length+1], TEXT("."), 1)) 1172: newName[length+1] = WC_EOS; 1.50 cvs 1173: else 1.53 cvs 1174: check = FALSE; 1175: } 1.84 cvs 1176: ustrcpy (docName, TEXT("noname.html")); 1.53 cvs 1177: /* docname was not comprised inside the URL, so let's */ 1178: /* assign the default ressource name */ 1.84 cvs 1179: ustrcpy (docName, TEXT("noname.html")); 1.53 cvs 1180: } 1181: else 1182: { /* docname is comprised inside the URL */ 1.84 cvs 1183: while (length >= 0 && newName[length] != WC_URL_SEP && newName[length] != WC_DIR_SEP) 1.53 cvs 1184: length--; 1185: if (length < 0) 1.84 cvs 1186: ustrcpy (docName, newName); 1.53 cvs 1187: else 1.84 cvs 1188: ustrcpy (docName, &newName[length+1]); 1.53 cvs 1189: } 1190: } 1191: else 1.84 cvs 1192: docName[0] = WC_EOS; 1.18 cvs 1193: } 1.3 cvs 1194: 1.4 cvs 1195: /*---------------------------------------------------------------------- 1.9 cvs 1196: IsSameHost 1.4 cvs 1197: ----------------------------------------------------------------------*/ 1.3 cvs 1198: #ifdef __STDC__ 1.67 cvs 1199: ThotBool IsSameHost (const STRING url1, const STRING url2) 1.3 cvs 1200: #else /* __STDC__ */ 1.67 cvs 1201: ThotBool IsSameHost (url1, url2) 1202: const STRING url1; 1203: const STRING url2; 1.3 cvs 1204: #endif /* __STDC__ */ 1205: { 1.67 cvs 1206: STRING basename_ptr1, basename_ptr2; 1207: ThotBool result; 1.3 cvs 1208: 1.84 cvs 1209: basename_ptr1 = AmayaParseUrl (url1, TEXT(""), AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION); 1.86 cvs 1210: basename_ptr2 = AmayaParseUrl (url2, TEXT(""), AMAYA_PARSE_ACCESS | AMAYA_PARSE_HOST | AMAYA_PARSE_PUNCTUATION); 1.3 cvs 1211: 1.67 cvs 1212: if (ustrcmp (basename_ptr1, basename_ptr2)) 1.8 cvs 1213: result = FALSE; 1.5 cvs 1214: else 1.8 cvs 1215: result = TRUE; 1.3 cvs 1216: 1.25 cvs 1217: TtaFreeMemory (basename_ptr1); 1218: TtaFreeMemory (basename_ptr2); 1.5 cvs 1219: return (result); 1.3 cvs 1220: } 1221: 1222: 1.4 cvs 1223: /*---------------------------------------------------------------------- 1.22 cvs 1224: HasKnownFileSuffix 1225: returns TRUE if path points to a file ending with a suffix. 1226: ----------------------------------------------------------------------*/ 1227: #ifdef __STDC__ 1.67 cvs 1228: ThotBool HasKnownFileSuffix (const STRING path) 1.22 cvs 1229: #else /* __STDC__ */ 1.67 cvs 1230: ThotBool HasKnownFileSuffix (path) 1231: const STRING path; 1.22 cvs 1232: #endif /* __STDC__ */ 1233: { 1.67 cvs 1234: STRING root; 1235: CHAR_T temppath[MAX_LENGTH]; 1236: CHAR_T suffix[MAX_LENGTH]; 1.22 cvs 1237: 1.87 cvs 1238: if (!path || path[0] == WC_EOS || path[ustrlen(path)] == WC_DIR_SEP) 1.22 cvs 1239: return (FALSE); 1240: 1.84 cvs 1241: root = AmayaParseUrl(path, TEXT(""), AMAYA_PARSE_PATH | AMAYA_PARSE_PUNCTUATION); 1.22 cvs 1242: 1243: if (root) 1244: { 1.67 cvs 1245: ustrcpy (temppath, root); 1.25 cvs 1246: TtaFreeMemory (root); 1.22 cvs 1247: /* Get the suffix */ 1248: ExtractSuffix (temppath, suffix); 1249: 1.87 cvs 1250: if( suffix[0] == WC_EOS) 1.22 cvs 1251: /* no suffix */ 1252: return (FALSE); 1253: 1254: /* Normalize the suffix */ 1255: ConvertToLowerCase (suffix); 1256: 1.67 cvs 1257: if (!ustrcmp (suffix, TEXT("gz"))) 1.22 cvs 1258: /* skip the compressed suffix */ 1259: { 1260: ExtractSuffix (temppath, suffix); 1.87 cvs 1261: if(suffix[0] == WC_EOS) 1.22 cvs 1262: /* no suffix */ 1263: return (FALSE); 1264: /* Normalize the suffix */ 1265: ConvertToLowerCase (suffix); 1266: } 1267: 1.67 cvs 1268: if (ustrcmp (suffix, TEXT("gif")) && 1269: ustrcmp (suffix, TEXT("xbm")) && 1270: ustrcmp (suffix, TEXT("xpm")) && 1271: ustrcmp (suffix, TEXT("jpg")) && 1272: ustrcmp (suffix, TEXT("pdf")) && 1273: ustrcmp (suffix, TEXT("png")) && 1274: ustrcmp (suffix, TEXT("tgz")) && 1275: ustrcmp (suffix, TEXT("xpg")) && 1276: ustrcmp (suffix, TEXT("xpd")) && 1277: ustrcmp (suffix, TEXT("ps")) && 1278: ustrcmp (suffix, TEXT("au")) && 1279: ustrcmp (suffix, TEXT("html")) && 1280: ustrcmp (suffix, TEXT("htm")) && 1281: ustrcmp (suffix, TEXT("shtml")) && 1282: ustrcmp (suffix, TEXT("xht")) && 1283: ustrcmp (suffix, TEXT("xhtm")) && 1284: ustrcmp (suffix, TEXT("xhtml")) && 1285: ustrcmp (suffix, TEXT("txt")) && 1286: ustrcmp (suffix, TEXT("css")) && 1287: ustrcmp (suffix, TEXT("eps"))) 1.22 cvs 1288: return (FALSE); 1289: else 1290: return (TRUE); 1291: } 1292: else 1293: return (FALSE); 1294: } 1295: 1296: 1297: /*---------------------------------------------------------------------- 1.24 cvs 1298: ChopURL 1299: Gives back a URL no longer than MAX_PRINT_URL_LENGTH chars (outputURL). 1300: If inputURL is bigger than that size, outputURL receives 1301: MAX_PRINT_URL_LENGTH / 2 chars from the beginning of inputURL, "...", 1302: and MAX_PRINT_URL_LENGTH / 2 chars from the end of inputURL. 1303: If inputURL is not longer than MAX_PRINT_URL_LENGTH chars, it gets 1304: copied into outputURL. 1305: N.B.: outputURL must point to a memory block of MAX_PRINT_URL_LENGTH 1306: chars. 1307: ----------------------------------------------------------------------*/ 1308: #ifdef __STDC__ 1.86 cvs 1309: void ChopURL (CHAR_T* outputURL, const CHAR_T* inputURL) 1.24 cvs 1310: #else 1311: void ChopURL (outputURL, inputURL) 1.86 cvs 1312: CHAR_T* outputURL; 1313: const CHAR_T* inputURL; 1.24 cvs 1314: #endif 1.22 cvs 1315: 1.24 cvs 1316: { 1317: int len; 1.9 cvs 1318: 1.86 cvs 1319: len = ustrlen (inputURL); 1.24 cvs 1320: if (len <= MAX_PRINT_URL_LENGTH) 1.86 cvs 1321: ustrcpy (outputURL, inputURL); 1.24 cvs 1322: else 1323: /* make a truncated urlName on the status window */ 1324: { 1.86 cvs 1325: ustrncpy (outputURL, inputURL, MAX_PRINT_URL_LENGTH / 2); 1326: outputURL [MAX_PRINT_URL_LENGTH / 2] = WC_EOS; 1327: ustrcat (outputURL, TEXT("...")); 1328: ustrcat (outputURL, &(inputURL[len - MAX_PRINT_URL_LENGTH / 2 ])); 1.24 cvs 1329: } 1.25 cvs 1330: } 1331: 1332: 1333: /*---------------------------------------------------------------------- 1334: scan 1.47 cvs 1335: Scan a filename for its constituents 1.25 cvs 1336: ----------------------------------- 1337: 1338: On entry, 1339: name points to a document name which may be incomplete. 1340: On exit, 1341: absolute or relative may be nonzero (but not both). 1342: host, fragment and access may be nonzero if they were specified. 1343: Any which are nonzero point to zero terminated strings. 1344: ----------------------------------------------------------------------*/ 1345: #ifdef __STDC__ 1.67 cvs 1346: static void scan (STRING name, HTURI * parts) 1.25 cvs 1347: #else /* __STDC__ */ 1348: static void scan (name, parts) 1.67 cvs 1349: STRING name; 1350: HTURI *parts; 1.25 cvs 1351: 1352: #endif /* __STDC__ */ 1353: { 1.67 cvs 1354: STRING p; 1355: STRING after_access = name; 1.32 cvs 1356: 1.43 cvs 1357: memset (parts, '\0', sizeof (HTURI)); 1.28 cvs 1358: /* Look for fragment identifier */ 1.67 cvs 1359: if ((p = ustrchr(name, TEXT('#'))) != NULL) 1.28 cvs 1360: { 1.67 cvs 1361: *p++ = TEXT('\0'); 1.28 cvs 1362: parts->fragment = p; 1.25 cvs 1363: } 1364: 1.28 cvs 1365: for (p=name; *p; p++) 1366: { 1.67 cvs 1367: if (*p == URL_SEP || *p == DIR_SEP || *p == TEXT('#') || *p == TEXT('?')) 1.28 cvs 1368: break; 1.67 cvs 1369: if (*p == TEXT(':')) 1.28 cvs 1370: { 1371: *p = 0; 1372: parts->access = after_access; /* Scheme has been specified */ 1373: 1374: /* The combination of gcc, the "-O" flag and the HP platform is 1375: unhealthy. The following three lines is a quick & dirty fix, but is 1376: not recommended. Rather, turn off "-O". */ 1377: 1378: /* after_access = p;*/ 1379: /* while (*after_access == 0)*/ 1380: /* after_access++;*/ 1381: after_access = p+1; 1.67 cvs 1382: if (!ustrcasecmp(TEXT("URL"), parts->access)) 1.28 cvs 1383: /* Ignore IETF's URL: pre-prefix */ 1384: parts->access = NULL; 1385: else 1.25 cvs 1386: break; 1387: } 1388: } 1389: 1390: p = after_access; 1.43 cvs 1391: if (*p == URL_SEP || *p == DIR_SEP) 1.28 cvs 1392: { 1.43 cvs 1393: if (p[1] == URL_SEP) 1.28 cvs 1394: { 1.25 cvs 1395: parts->host = p+2; /* host has been specified */ 1.28 cvs 1396: *p = 0; /* Terminate access */ 1397: /* look for end of host name if any */ 1.67 cvs 1398: p = ustrchr (parts->host, URL_SEP); 1.28 cvs 1399: if (p) 1400: { 1.87 cvs 1401: *p = WC_EOS; /* Terminate host */ 1.25 cvs 1402: parts->absolute = p+1; /* Root has been found */ 1.28 cvs 1403: } 1404: } 1405: else 1406: /* Root found but no host */ 1407: parts->absolute = p+1; 1408: } 1409: else 1410: { 1.25 cvs 1411: parts->relative = (*after_access) ? after_access : 0; /* zero for "" */ 1.28 cvs 1412: } 1.25 cvs 1413: } 1414: 1415: 1416: /*---------------------------------------------------------------------- 1.28 cvs 1417: AmayaParseUrl: parse a Name relative to another name 1418: 1419: This returns those parts of a name which are given (and requested) 1420: substituting bits from the related name where necessary. 1.25 cvs 1421: 1.28 cvs 1422: On entry, 1.25 cvs 1423: aName A filename given 1424: relatedName A name relative to which aName is to be parsed. Give 1425: it an empty string if aName is absolute. 1426: wanted A mask for the bits which are wanted. 1427: 1.28 cvs 1428: On exit, 1.25 cvs 1429: returns A pointer to a malloc'd string which MUST BE FREED 1430: ----------------------------------------------------------------------*/ 1431: #ifdef __STDC__ 1.84 cvs 1432: CHAR_T* AmayaParseUrl (const CHAR_T* aName, CHAR_T* relatedName, int wanted) 1.25 cvs 1433: #else /* __STDC__ */ 1.84 cvs 1434: CHAR_T* AmayaParseUrl (aName, relatedName, wanted) 1435: const CHAR_T* aName; 1436: CHAR_T* relatedName; 1.28 cvs 1437: int wanted; 1.25 cvs 1438: 1439: #endif /* __STDC__ */ 1440: { 1.84 cvs 1441: CHAR_T* return_value; 1.67 cvs 1442: CHAR_T result[MAX_LENGTH]; 1443: CHAR_T name[MAX_LENGTH]; 1444: CHAR_T rel[MAX_LENGTH]; 1.84 cvs 1445: CHAR_T *p, *access; 1.29 cvs 1446: HTURI given, related; 1447: int len; 1.67 cvs 1448: CHAR_T used_sep; 1.84 cvs 1449: CHAR_T* used_str; 1.32 cvs 1450: 1.84 cvs 1451: if (ustrchr (aName, WC_DIR_SEP) || ustrchr (relatedName, WC_DIR_SEP)) 1.33 cvs 1452: { 1.84 cvs 1453: used_str = WC_DIR_STR; 1454: used_sep = WC_DIR_SEP; 1.33 cvs 1455: } 1.32 cvs 1456: else 1.33 cvs 1457: { 1.84 cvs 1458: used_str = WC_URL_STR; 1459: used_sep = WC_URL_SEP; 1.33 cvs 1460: } 1.32 cvs 1461: 1.29 cvs 1462: /* Make working copies of input strings to cut up: */ 1463: return_value = NULL; 1464: result[0] = 0; /* Clear string */ 1.67 cvs 1465: ustrcpy (name, aName); 1.29 cvs 1466: if (relatedName != NULL) 1.67 cvs 1467: ustrcpy (rel, relatedName); 1.29 cvs 1468: else 1.84 cvs 1469: relatedName[0] = WC_EOS; 1.29 cvs 1470: 1471: scan (name, &given); 1472: scan (rel, &related); 1473: access = given.access ? given.access : related.access; 1474: if (wanted & AMAYA_PARSE_ACCESS) 1475: if (access) 1476: { 1.67 cvs 1477: ustrcat (result, access); 1.29 cvs 1478: if(wanted & AMAYA_PARSE_PUNCTUATION) 1.67 cvs 1479: ustrcat (result, TEXT(":")); 1.29 cvs 1480: } 1481: 1482: if (given.access && related.access) 1483: /* If different, inherit nothing. */ 1.67 cvs 1484: if (ustrcmp (given.access, related.access) != 0) 1.29 cvs 1485: { 1486: related.host = 0; 1487: related.absolute = 0; 1488: related.relative = 0; 1489: related.fragment = 0; 1490: } 1491: 1492: if (wanted & AMAYA_PARSE_HOST) 1493: if(given.host || related.host) 1494: { 1495: if(wanted & AMAYA_PARSE_PUNCTUATION) 1.67 cvs 1496: ustrcat (result, TEXT("//")); 1497: ustrcat (result, given.host ? given.host : related.host); 1.29 cvs 1498: } 1499: 1500: if (given.host && related.host) 1501: /* If different hosts, inherit no path. */ 1.67 cvs 1502: if (ustrcmp (given.host, related.host) != 0) 1.29 cvs 1503: { 1504: related.absolute = 0; 1505: related.relative = 0; 1506: related.fragment = 0; 1507: } 1508: 1509: if (wanted & AMAYA_PARSE_PATH) 1510: { 1511: if (given.absolute) 1512: { 1513: /* All is given */ 1514: if (wanted & AMAYA_PARSE_PUNCTUATION) 1.67 cvs 1515: ustrcat (result, used_str); 1516: ustrcat (result, given.absolute); 1.25 cvs 1517: } 1.29 cvs 1518: else if (related.absolute) 1519: { 1520: /* Adopt path not name */ 1.67 cvs 1521: ustrcat (result, used_str); 1522: ustrcat (result, related.absolute); 1.29 cvs 1523: if (given.relative) 1524: { 1525: /* Search part? */ 1.67 cvs 1526: p = ustrchr (result, TEXT('?')); 1.29 cvs 1527: if (!p) 1.67 cvs 1528: p=result+ustrlen(result)-1; 1.33 cvs 1529: for (; *p!=used_sep; p--); /* last / */ 1.29 cvs 1530: /* Remove filename */ 1531: p[1]=0; 1532: /* Add given one */ 1.67 cvs 1533: ustrcat (result, given.relative); 1.25 cvs 1534: } 1535: } 1.29 cvs 1536: else if (given.relative) 1537: /* what we've got */ 1.67 cvs 1538: ustrcat (result, given.relative); 1.29 cvs 1539: else if (related.relative) 1.67 cvs 1540: ustrcat (result, related.relative); 1.29 cvs 1541: else 1542: /* No inheritance */ 1.67 cvs 1543: ustrcat (result, used_str); 1.25 cvs 1544: } 1.29 cvs 1545: 1546: if (wanted & AMAYA_PARSE_ANCHOR) 1547: if (given.fragment || related.fragment) 1548: { 1549: if (given.absolute && given.fragment) 1550: { 1551: /*Fixes for relURLs...*/ 1552: if (wanted & AMAYA_PARSE_PUNCTUATION) 1.67 cvs 1553: ustrcat (result, TEXT("#")); 1554: ustrcat (result, given.fragment); 1.29 cvs 1555: } 1556: else if (!(given.absolute) && !(given.fragment)) 1.84 cvs 1557: ustrcat (result, TEXT("")); 1.29 cvs 1558: else 1559: { 1560: if (wanted & AMAYA_PARSE_PUNCTUATION) 1.67 cvs 1561: ustrcat (result, TEXT("#")); 1562: ustrcat (result, given.fragment ? given.fragment : related.fragment); 1.29 cvs 1563: } 1564: } 1.67 cvs 1565: len = ustrlen (result); 1566: if ((return_value = TtaAllocString (len + 1)) != NULL) 1567: ustrcpy (return_value, result); 1.29 cvs 1568: return (return_value); /* exactly the right length */ 1.25 cvs 1569: } 1570: 1571: /*---------------------------------------------------------------------- 1572: HTCanon 1573: Canonicalizes the URL in the following manner starting from the host 1574: pointer: 1575: 1576: 1) The host name is converted to lowercase 1577: 2) Chop off port if `:80' (http), `:70' (gopher), or `:21' (ftp) 1578: 1579: Return: OK The position of the current path part of the URL 1580: which might be the old one or a new one. 1581: 1582: ----------------------------------------------------------------------*/ 1583: #ifdef __STDC__ 1.84 cvs 1584: static CHAR_T* HTCanon (CHAR_T** filename, CHAR_T* host) 1.25 cvs 1585: #else /* __STDC__ */ 1.67 cvs 1586: static STRING HTCanon (filename, host) 1.84 cvs 1587: CHAR_T** filename; 1588: CHAR_T* host; 1.25 cvs 1589: #endif /* __STDC__ */ 1590: { 1.84 cvs 1591: CHAR_T* newname = NULL; 1592: CHAR_T used_sep; 1593: CHAR_T* path; 1594: CHAR_T* strptr; 1595: CHAR_T* port; 1596: CHAR_T* access = host-3; 1.32 cvs 1597: 1598: 1.84 cvs 1599: if (*filename && ustrchr (*filename, WC_URL_SEP)) 1.33 cvs 1600: { 1.84 cvs 1601: used_sep = WC_URL_SEP; 1.33 cvs 1602: } 1603: else 1604: { 1.84 cvs 1605: used_sep = WC_DIR_SEP; 1.33 cvs 1606: } 1.32 cvs 1607: 1.82 cvs 1608: while (access > *filename && *(access - 1) != used_sep) /* Find access method */ 1.25 cvs 1609: access--; 1.84 cvs 1610: if ((path = ustrchr (host, used_sep)) == NULL) /* Find path */ 1611: path = host + ustrlen (host); 1612: if ((strptr = ustrchr (host, TEXT('@'))) != NULL && strptr < path) /* UserId */ 1.82 cvs 1613: host = strptr; 1.84 cvs 1614: if ((port = ustrchr (host, TEXT(':'))) != NULL && port > path) /* Port number */ 1.82 cvs 1615: port = NULL; 1.25 cvs 1616: 1617: strptr = host; /* Convert to lower-case */ 1.82 cvs 1618: while (strptr < path) 1.33 cvs 1619: { 1.84 cvs 1620: *strptr = utolower (*strptr); 1.82 cvs 1621: strptr++; 1.33 cvs 1622: } 1.25 cvs 1623: 1624: /* Does the URL contain a full domain name? This also works for a 1625: numerical host name. The domain name is already made lower-case 1626: and without a trailing dot. */ 1627: { 1.84 cvs 1628: CHAR_T* dot = port ? port : path; 1629: if (dot > *filename && *--dot == TEXT('.')) 1.33 cvs 1630: { 1.84 cvs 1631: CHAR_T* orig = dot; 1632: CHAR_T* dest = dot + 1; 1.82 cvs 1633: while ((*orig++ = *dest++)); 1634: if (port) port--; 1.33 cvs 1635: path--; 1.25 cvs 1636: } 1637: } 1638: /* Chop off port if `:', `:80' (http), `:70' (gopher), or `:21' (ftp) */ 1.33 cvs 1639: if (port) 1640: { 1.82 cvs 1641: if (!*(port+1) || *(port+1) == used_sep) 1.33 cvs 1642: { 1643: if (!newname) 1644: { 1.84 cvs 1645: CHAR_T* orig = port; 1646: CHAR_T* dest = port + 1; 1.82 cvs 1647: while ((*orig++ = *dest++)); 1.33 cvs 1648: } 1649: } 1.84 cvs 1650: else if ((!ustrncmp (access, TEXT("http"), 4) && 1651: (*(port + 1) == TEXT('8') && 1652: *(port+2) == TEXT('0') && 1.82 cvs 1653: (*(port+3) == used_sep || !*(port + 3)))) || 1.84 cvs 1654: (!ustrncmp (access, TEXT("gopher"), 6) && 1655: (*(port+1) == TEXT('7') && 1656: *(port+2) == TEXT('0') && 1.82 cvs 1657: (*(port+3) == used_sep || !*(port+3)))) || 1.84 cvs 1658: (!ustrncmp (access, TEXT("ftp"), 3) && 1659: (*(port+1) == TEXT('2') && 1660: *(port + 2) == TEXT('1') && 1.82 cvs 1661: (*(port+3) == used_sep || !*(port+3))))) { 1.33 cvs 1662: if (!newname) 1663: { 1.84 cvs 1664: CHAR_T* orig = port; 1665: CHAR_T* dest = port + 3; 1.33 cvs 1666: while((*orig++ = *dest++)); 1667: /* Update path position, Henry Minsky */ 1668: path -= 3; 1.25 cvs 1669: } 1.33 cvs 1670: } 1671: else if (newname) 1.84 cvs 1672: ustrncat (newname, port, (int) (path - port)); 1.33 cvs 1673: } 1.25 cvs 1674: 1.33 cvs 1675: if (newname) 1676: { 1.84 cvs 1677: CHAR_T* newpath = newname + ustrlen (newname); 1678: ustrcat (newname, path); 1.25 cvs 1679: path = newpath; 1.28 cvs 1680: /* Free old copy */ 1681: TtaFreeMemory(*filename); 1.25 cvs 1682: *filename = newname; 1.33 cvs 1683: } 1.25 cvs 1684: return path; 1685: } 1686: 1687: 1688: /*---------------------------------------------------------------------- 1.29 cvs 1689: SimplifyUrl: simplify a URI 1.32 cvs 1690: A URI is allowed to contain the sequence xxx/../ which may be 1691: replaced by "" , and the sequence "/./" which may be replaced by DIR_STR. 1.28 cvs 1692: Simplification helps us recognize duplicate URIs. 1.25 cvs 1693: 1.28 cvs 1694: Thus, /etc/junk/../fred becomes /etc/fred 1695: /etc/junk/./fred becomes /etc/junk/fred 1.25 cvs 1696: 1.28 cvs 1697: but we should NOT change 1698: http://fred.xxx.edu/../.. 1.25 cvs 1699: 1700: or ../../albert.html 1701: 1.28 cvs 1702: In order to avoid empty URLs the following URLs become: 1.25 cvs 1703: 1704: /fred/.. becomes /fred/.. 1705: /fred/././.. becomes /fred/.. 1706: /fred/.././junk/.././ becomes /fred/.. 1707: 1.28 cvs 1708: If more than one set of `://' is found (several proxies in cascade) then 1709: only the part after the last `://' is simplified. 1.25 cvs 1710: 1.28 cvs 1711: Returns: A string which might be the old one or a new one. 1.25 cvs 1712: ----------------------------------------------------------------------*/ 1713: #ifdef __STDC__ 1.84 cvs 1714: void SimplifyUrl (CHAR_T** url) 1.25 cvs 1715: #else /* __STDC__ */ 1.29 cvs 1716: void SimplifyUrl (url) 1.84 cvs 1717: CHAR_T** url; 1.25 cvs 1718: #endif /* __STDC__ */ 1719: { 1.84 cvs 1720: CHAR_T* path; 1721: CHAR_T* access; 1722: CHAR_T* newptr; 1723: CHAR_T* p; 1724: CHAR_T *orig, *dest, *end; 1.28 cvs 1725: 1.85 cvs 1726: CHAR_T used_sep; 1.77 cvs 1727: ThotBool ddot_simplify; /* used to desactivate the double dot simplifcation: 1728: something/../ simplification in relative URLs when they start with a ../ */ 1.32 cvs 1729: 1730: 1.28 cvs 1731: if (!url || !*url) 1732: return; 1733: 1.84 cvs 1734: if (ustrchr (*url, WC_URL_SEP)) 1.33 cvs 1735: { 1.84 cvs 1736: used_sep = WC_URL_SEP; 1.33 cvs 1737: } 1.32 cvs 1738: else 1.33 cvs 1739: { 1.84 cvs 1740: used_sep = WC_DIR_SEP; 1.33 cvs 1741: } 1.32 cvs 1742: 1.77 cvs 1743: /* should we simplify double dot? */ 1744: path = *url; 1.84 cvs 1745: if (*path == TEXT('.') && *(path + 1) == TEXT('.')) 1.77 cvs 1746: ddot_simplify = FALSE; 1747: else 1748: ddot_simplify = TRUE; 1749: 1.28 cvs 1750: /* Find any scheme name */ 1.84 cvs 1751: if ((path = ustrstr (*url, TEXT("://"))) != NULL) 1.33 cvs 1752: { 1753: /* Find host name */ 1.28 cvs 1754: access = *url; 1.84 cvs 1755: while (access < path && (*access = utolower (*access))) 1.82 cvs 1756: access++; 1.28 cvs 1757: path += 3; 1.84 cvs 1758: while ((newptr = ustrstr (path, TEXT ("://"))) != NULL) 1.82 cvs 1759: /* For proxies */ 1760: path = newptr+3; 1761: /* We have a host name */ 1.84 cvs 1762: path = HTCanon (url, path); 1.25 cvs 1763: } 1.84 cvs 1764: else if ((path = ustrstr (*url, TEXT(":/"))) != NULL) 1.28 cvs 1765: path += 2; 1766: else 1767: path = *url; 1.25 cvs 1768: 1.84 cvs 1769: if (*path == used_sep && *(path+1) == used_sep) 1.28 cvs 1770: /* Some URLs start //<foo> */ 1771: path += 1; 1.94 cvs 1772: else if (IsFilePath (path)) 1773: { 1774: /* doesn't need to do anything more */ 1775: return; 1776: } 1.84 cvs 1777: else if (!ustrncmp (path, TEXT("news:"), 5)) 1.28 cvs 1778: { 1.84 cvs 1779: newptr = ustrchr (path+5, TEXT('@')); 1.28 cvs 1780: if (!newptr) 1781: newptr = path + 5; 1782: while (*newptr) 1783: { 1784: /* Make group or host lower case */ 1.84 cvs 1785: *newptr = utolower (*newptr); 1.28 cvs 1786: newptr++; 1.25 cvs 1787: } 1.28 cvs 1788: /* Doesn't need to do any more */ 1789: return; 1.25 cvs 1790: } 1.28 cvs 1791: 1792: if ((p = path)) 1793: { 1.67 cvs 1794: if (!((end = ustrchr (path, TEXT(';'))) || (end = ustrchr (path, TEXT('?'))) || 1795: (end = ustrchr (path, TEXT('#'))))) 1796: end = path + ustrlen (path); 1.28 cvs 1797: 1798: /* Parse string second time to simplify */ 1799: p = path; 1800: while (p < end) 1801: { 1.77 cvs 1802: /* if we're pointing to a char, it's safe to reactivate the ../ convertion */ 1803: if (!ddot_simplify && *p != TEXT('.') && *p != used_sep) 1804: ddot_simplify = TRUE; 1805: 1.33 cvs 1806: if (*p==used_sep) 1.28 cvs 1807: { 1.67 cvs 1808: if (p > *url && *(p+1) == TEXT('.') && (*(p+2) == used_sep || !*(p+2))) 1.28 cvs 1809: { 1810: orig = p + 1; 1.84 cvs 1811: dest = (*(p+2) != used_sep) ? p+2 : p+3; 1.52 cvs 1812: while ((*orig++ = *dest++)); /* Remove a used_sep and a dot*/ 1.28 cvs 1813: end = orig - 1; 1814: } 1.77 cvs 1815: else if (ddot_simplify && *(p+1) == TEXT('.') && *(p+2) == TEXT('.') 1816: && (*(p+3) == used_sep || !*(p+3))) 1.28 cvs 1817: { 1818: newptr = p; 1.52 cvs 1819: while (newptr>path && *--newptr!=used_sep); /* prev used_sep */ 1820: if (*newptr == used_sep) 1821: orig = newptr + 1; 1.28 cvs 1822: else 1.52 cvs 1823: orig = newptr; 1824: 1825: dest = (*(p+3) != used_sep) ? p+3 : p+4; 1826: while ((*orig++ = *dest++)); /* Remove /xxx/.. */ 1827: end = orig-1; 1828: /* Start again with prev slash */ 1829: p = newptr; 1.28 cvs 1830: } 1.33 cvs 1831: else if (*(p+1) == used_sep) 1.28 cvs 1832: { 1.33 cvs 1833: while (*(p+1) == used_sep) 1.28 cvs 1834: { 1835: orig = p; 1836: dest = p + 1; 1837: while ((*orig++ = *dest++)); /* Remove multiple /'s */ 1838: end = orig-1; 1839: } 1840: } 1841: else 1.25 cvs 1842: p++; 1.28 cvs 1843: } 1844: else 1845: p++; 1.25 cvs 1846: } 1847: } 1.51 cvs 1848: 1849: /* 1850: ** Check for host/../.. kind of things 1851: */ 1.77 cvs 1852: if (*path == used_sep && *(path+1) == TEXT('.') && *(path+2) == TEXT('.') 1853: && (!*(path+3) || *(path+3) == used_sep)) 1.87 cvs 1854: *(path+1) = WC_EOS; 1.51 cvs 1855: 1.28 cvs 1856: return; 1857: } 1858: 1859: 1860: /*---------------------------------------------------------------------- 1.96 cvs 1861: NormalizeFile normalizes local names. 1.28 cvs 1862: Return TRUE if target and src differ. 1863: ----------------------------------------------------------------------*/ 1864: #ifdef __STDC__ 1.97 ! cvs 1865: ThotBool NormalizeFile (CHAR_T* src, CHAR_T* target, ConvertionType convertion) 1.28 cvs 1866: #else 1.97 ! cvs 1867: ThotBool NormalizeFile (src, target, convertion) 1.84 cvs 1868: CHAR_T* src; 1869: CHAR_T* target; 1.97 ! cvs 1870: ConvertionType convertion; 1.28 cvs 1871: 1872: #endif 1873: { 1.93 cvs 1874: # ifndef _WINDOWS 1.90 cvs 1875: CHAR_T *s; 1.93 cvs 1876: int i; 1877: # endif /* !_WINDOWS */ 1.82 cvs 1878: ThotBool change; 1.90 cvs 1879: int start_index; /* the first char that we'll copy */ 1.28 cvs 1880: 1.54 cvs 1881: change = FALSE; 1.90 cvs 1882: start_index = 0; 1883: 1884: if (!src || src[0] == WC_EOS) 1.96 cvs 1885: { 1886: target[0] = WC_EOS; 1887: return FALSE; 1888: } 1.90 cvs 1889: 1890: /* @@ do I need file: or file:/ here? */ 1.84 cvs 1891: if (ustrncmp (src, TEXT("file:"), 5) == 0) 1.28 cvs 1892: { 1.90 cvs 1893: /* remove the prefix file: */ 1894: start_index += 5; 1895: 1896: /* remove the localhost prefix */ 1.94 cvs 1897: if (ustrncmp (&src[start_index], TEXT("//localhost/"), 12) == 0) 1898: start_index += 11; 1899: 1900: /* remove the first two slashes in / / /path */ 1901: while (src[start_index] && 1902: src[start_index] == TEXT('/') 1903: && src[start_index + 1] == TEXT('/')) 1904: start_index++; 1905: 1906: #ifdef _WINDOWS 1907: /* remove any extra slash before the drive name */ 1908: if (src[start_index] == TEXT('/') 1909: &&src[start_index+2] == TEXT(':')) 1910: start_index++; 1911: #endif /* _WINDOWS */ 1.90 cvs 1912: 1913: if (src[start_index] == WC_EOS) 1914: /* if there's nothing afterwards, add a DIR_STR */ 1915: ustrcpy (target, WC_DIR_STR); 1916: else 1.97 ! cvs 1917: /* as we're inside a file: URL, we'll apply all the convertions ! 1918: we know */ ! 1919: CleanCopyFileURL (target, &src[start_index], AM_CONV_ALL); 1.96 cvs 1920: 1921: change = TRUE; 1922: } 1.97 ! cvs 1923: else if (convertion != AM_CONV_NONE) 1.96 cvs 1924: { 1925: /* we are following a "local" relative link, we do all the 1926: convertions except for the HOME_DIR ~ one */ 1.97 ! cvs 1927: CleanCopyFileURL (target, src, convertion); 1.28 cvs 1928: } 1.90 cvs 1929: #ifndef _WINDOWS 1.84 cvs 1930: else if (src[0] == TEXT('~')) 1.53 cvs 1931: { 1.96 cvs 1932: /* it must be a URL typed in a text input field */ 1933: /* do the HOME_DIR ~ substitution */ 1.82 cvs 1934: s = TtaGetEnvString ("HOME"); 1.84 cvs 1935: ustrcpy (target, s); 1.90 cvs 1936: #if 0 1.96 cvs 1937: /* JK: invalidated this part of the code as it's simpler 1938: to add the DIR_SEP whenever we have something to add 1939: to the path rather than adding it systematically */ 1.84 cvs 1940: if (src[1] != WC_DIR_SEP) 1941: ustrcat (target, WC_DIR_STR); 1.90 cvs 1942: #endif 1943: i = ustrlen (target); 1.92 cvs 1944: ustrcpy (&target[i], &src[1]); 1.54 cvs 1945: change = TRUE; 1.53 cvs 1946: } 1.90 cvs 1947: #endif /* _WINDOWS */ 1.28 cvs 1948: else 1.96 cvs 1949: /* leave it as it is */ 1.92 cvs 1950: ustrcpy (target, src); 1.96 cvs 1951: 1.28 cvs 1952: /* remove /../ and /./ */ 1.29 cvs 1953: SimplifyUrl (&target); 1.54 cvs 1954: if (!change) 1.84 cvs 1955: change = ustrcmp (src, target); 1.28 cvs 1956: return (change); 1.25 cvs 1957: } 1958: 1.28 cvs 1959: 1.25 cvs 1960: /*---------------------------------------------------------------------- 1.31 cvs 1961: MakeRelativeURL: make relative name 1.25 cvs 1962: 1.28 cvs 1963: This function creates and returns a string which gives an expression of 1964: one address as related to another. Where there is no relation, an absolute 1965: address is retured. 1.25 cvs 1966: 1.28 cvs 1967: On entry, 1.25 cvs 1968: Both names must be absolute, fully qualified names of nodes 1969: (no fragment bits) 1970: 1.28 cvs 1971: On exit, 1.25 cvs 1972: The return result points to a newly allocated name which, if 1973: parsed by AmayaParseUrl relative to relatedName, will yield aName. 1974: The caller is responsible for freeing the resulting name later. 1975: ----------------------------------------------------------------------*/ 1976: #ifdef __STDC__ 1.67 cvs 1977: STRING MakeRelativeURL (STRING aName, STRING relatedName) 1.25 cvs 1978: #else /* __STDC__ */ 1.67 cvs 1979: STRING MakeRelativeURL (aName, relatedName) 1980: STRING aName; 1981: STRING relatedName; 1.25 cvs 1982: #endif /* __STDC__ */ 1983: { 1.87 cvs 1984: CHAR_T* return_value; 1.67 cvs 1985: CHAR_T result[MAX_LENGTH]; 1.87 cvs 1986: CHAR_T* p; 1987: CHAR_T* q; 1988: CHAR_T* after_access; 1989: CHAR_T* last_slash = NULL; 1.29 cvs 1990: int slashes, levels, len; 1991: 1.44 cvs 1992: # ifdef _WINDOWS 1993: int ndx; 1994: # endif /* _WINDOWS */ 1995: 1.29 cvs 1996: if (aName == NULL || relatedName == NULL) 1997: return (NULL); 1998: 1999: slashes = 0; 2000: after_access = NULL; 2001: p = aName; 2002: q = relatedName; 2003: for (; *p && (*p == *q); p++, q++) 1.27 cvs 2004: { 2005: /* Find extent of match */ 1.67 cvs 2006: if (*p == TEXT(':')) 1.29 cvs 2007: after_access = p + 1; 1.28 cvs 2008: if (*p == DIR_SEP) 1.27 cvs 2009: { 1.29 cvs 2010: /* memorize the last slash position and count them */ 1.27 cvs 2011: last_slash = p; 2012: slashes++; 1.25 cvs 2013: } 2014: } 2015: 1.31 cvs 2016: /* q, p point to the first non-matching character or zero */ 1.87 cvs 2017: if (*q == WC_EOS) 1.31 cvs 2018: { 2019: /* New name is a subset of the related name */ 2020: /* exactly the right length */ 1.67 cvs 2021: len = ustrlen (p); 2022: if ((return_value = TtaAllocString (len + 1)) != NULL) 2023: ustrcpy (return_value, p); 1.31 cvs 2024: } 2025: else if ((slashes < 2 && after_access == NULL) 2026: || (slashes < 3 && after_access != NULL)) 2027: { 2028: /* Two names whitout common path */ 2029: /* exactly the right length */ 1.67 cvs 2030: len = ustrlen (aName); 2031: if ((return_value = TtaAllocString (len + 1)) != NULL) 2032: ustrcpy (return_value, aName); 1.31 cvs 2033: } 2034: else 2035: { 2036: /* Some path in common */ 1.67 cvs 2037: if (slashes == 3 && ustrncmp (aName, TEXT("http:"), 5) == 0) 1.31 cvs 2038: /* just the same server */ 1.67 cvs 2039: ustrcpy (result, last_slash); 1.31 cvs 2040: else 2041: { 2042: levels= 0; 1.67 cvs 2043: for (; *q && *q != TEXT('#') && *q != TEXT(';') && *q != TEXT('?'); q++) 1.31 cvs 2044: if (*q == DIR_SEP) 2045: levels++; 2046: 1.87 cvs 2047: result[0] = WC_EOS; 1.31 cvs 2048: for (;levels; levels--) 1.67 cvs 2049: ustrcat (result, TEXT("../")); 2050: ustrcat (result, last_slash+1); 1.31 cvs 2051: } 1.52 cvs 2052: 2053: if (!*result) 1.67 cvs 2054: ustrcat (result, TEXT("./")); 1.52 cvs 2055: 1.31 cvs 2056: /* exactly the right length */ 1.67 cvs 2057: len = ustrlen (result); 2058: if ((return_value = TtaAllocString (len + 1)) != NULL) 2059: ustrcpy (return_value, result); 1.52 cvs 2060: 1.25 cvs 2061: } 1.44 cvs 2062: # ifdef _WINDOWS 1.67 cvs 2063: len = ustrlen (return_value); 1.44 cvs 2064: for (ndx = 0; ndx < len; ndx ++) 1.67 cvs 2065: if (return_value[ndx] == TEXT('\\')) 2066: return_value[ndx] = TEXT('/') ; 1.44 cvs 2067: # endif /* _WINDOWS */ 1.29 cvs 2068: return (return_value); 1.24 cvs 2069: } 1.35 cvs 2070: 2071: