Annotation of libwww/Library/src/HTAccess.c, revision 1.2
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
6: ** JFG Jean-Francois Groff jgh@next.com
7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
11: ** 6 Oct 92 Moved HTClientHost and logfile into here. TBL
12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 ! timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
! 14: ** PARAMETERS TO HTSEARCH AND HTLOAFRELATIVE CHANGED
! 15: **
! 16: ** Bugs
! 17: ** This module assumes that that the graphic object is hypertext, as it
! 18: ** needs to select it when it has been loaded. A supercalss needs to be
! 19: ** defined which accepts select and select_anchor.
1.1 timbl 20: */
21:
22: /* Implements:
23: */
24: #include "HTAccess.h"
25:
26: /* Uses:
27: */
28:
29: #include "HTParse.h"
30: #include "HTUtils.h"
31: #include "WWW.h"
1.2 ! timbl 32:
! 33: #ifndef NO_RULES
! 34: #include "HTRules.h"
! 35: #endif
! 36:
1.1 timbl 37: #include <stdio.h>
38:
1.2 ! timbl 39: #include "HTList.h"
! 40: #include "HText.h" /* See bugs above */
! 41: #include "HTAlert.h"
! 42:
! 43: #ifdef OLD_CODE
! 44: #include <errno.h>
1.1 timbl 45: #include "tcp.h"
46: #ifndef DECNET
47: #include "HTFTP.h"
48: #include "HTGopher.h"
49: #include "HTNews.h"
50: #endif
1.2 ! timbl 51: #endif
1.1 timbl 52:
53:
54: /* These flags may be set to modify the operation of this module
55: */
56: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
57: PUBLIC FILE * logfile = 0; /* File to which to output one-liners */
58:
1.2 ! timbl 59: /* To generate other things, play with these:
! 60: */
! 61:
! 62: PUBLIC HTFormat HTOutputFormat = NULL;
! 63: PUBLIC HTStream* HTOutputStream = NULL; /* For non-interactive, set this */
1.1 timbl 64:
65: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
66:
67:
68: /* Register a Protocol HTRegisterProtocol
69: ** -------------------
70: */
71:
72: PUBLIC BOOL HTRegisterProtocol(protocol)
73: HTProtocol * protocol;
74: {
75: if (!protocols) protocols = HTList_new();
76: HTList_addObject(protocols, protocol);
77: return YES;
78: }
79:
80:
81: /* Register all known protocols
82: ** ----------------------------
83: **
84: ** Add to or subtract from this list if you add or remove protocol modules.
85: ** This routine is called the first time the protocol list is needed,
86: ** unless any protocols are already registered, in which case it is not called.
87: ** Therefore the application can override this list.
88: **
89: ** Compiling with NO_INIT prevents all known protocols from being forced
90: ** in at link time.
91: */
92: #ifndef NO_INIT
93: PRIVATE void HTAccessInit NOARGS /* Call me once */
94: {
1.2 ! timbl 95: extern HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 96: #ifndef DECNET
1.2 ! timbl 97: extern HTProtocol HTFTP, HTNews, HTGopher;
! 98: HTRegisterProtocol(&HTFTP);
! 99: HTRegisterProtocol(&HTNews);
! 100: HTRegisterProtocol(&HTGopher);
1.1 timbl 101: #endif
102:
1.2 ! timbl 103: HTRegisterProtocol(&HTTP);
! 104: HTRegisterProtocol(&HTFile);
! 105: HTRegisterProtocol(&HTTelnet);
! 106: HTRegisterProtocol(&HTTn3270);
! 107: HTRegisterProtocol(&HTRlogin);
1.1 timbl 108: }
109: #endif
110:
111:
1.2 ! timbl 112: /* Find physical name and access protocol
! 113: ** --------------------------------------
1.1 timbl 114: **
115: **
116: ** On entry,
117: ** addr must point to the fully qualified hypertext reference.
118: ** anchor a pareent anchor with whose address is addr
119: **
120: ** On exit,
1.2 ! timbl 121: ** returns HT_NO_ACCESS Error has occured.
! 122: ** HT_OK Success
1.1 timbl 123: **
124: */
1.2 ! timbl 125: PRIVATE int get_physical ARGS2(
! 126: CONST char *, addr,
! 127: HTParentAnchor *, anchor)
1.1 timbl 128: {
129: char * access=0; /* Name of access method */
1.2 ! timbl 130: char * physical = 0;
1.1 timbl 131:
1.2 ! timbl 132: #ifndef NO_RULES
! 133: physical = HTTranslate(addr);
! 134: if (!physical) {
! 135: return HT_FORBIDDEN;
! 136: }
! 137: HTAnchor_setPhysical(anchor, physical);
! 138: free(physical); /* free our copy */
! 139: #else
! 140: HTAnchor_setPhysical(anchor, addr);
! 141: #endif
! 142:
! 143: access = HTParse(HTAnchor_physical(anchor),
! 144: "file:", PARSE_ACCESS);
1.1 timbl 145:
146: /* Check whether gateway access has been set up for this
147: */
148: #ifdef USE_GATEWAYS
1.2 ! timbl 149: {
! 150: char * gateway_parameter, gateway;
! 151: gateway_parameter = (char *)malloc(strlen(access)+20);
! 152: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
! 153: strcpy(gateway_parameter, "WWW_");
! 154: strcat(gateway_parameter, access);
! 155: strcat(gateway_parameter, "_GATEWAY");
! 156: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
! 157: free(gateway_parameter);
! 158: if (gateway) {
! 159: status = HTLoadHTTP(addr, gateway, anchor,
! 160: HTOutputFormat ? HTOutputFormat : WWW_PRESENT, sink);
! 161: HTAlert("Cannot retrieve required information from gateway.");
! 162: free(access);
! 163: return status;
! 164: }
! 165: }
1.1 timbl 166: #endif
167:
168:
169:
170: /* Search registered protocols to find suitable one
171: */
172: {
173: int i, n;
174: #ifndef NO_INIT
1.2 ! timbl 175: if (!protocols) HTAccessInit();
1.1 timbl 176: #endif
177: n = HTList_count(protocols);
178: for (i=0; i<n; i++) {
1.2 ! timbl 179: HTProtocol *p = HTList_objectAt(protocols, i);
! 180: if (strcmp(p->name, access)==0) {
! 181: HTAnchor_setProtocol(anchor, p);
! 182: free(access);
! 183: return (HT_OK);
1.1 timbl 184: }
185: }
186: }
187:
188: free(access);
1.2 ! timbl 189: return HT_NO_ACCESS;
1.1 timbl 190: }
191:
192:
193: /* Load a document
194: ** ---------------
195: **
1.2 ! timbl 196: ** This is an internal routine, which has an address AND a matching
! 197: ** anchor. (The public routines are called with one OR the other.)
! 198: **
! 199: ** On entry,
! 200: ** addr must point to the fully qualified hypertext reference.
! 201: ** anchor a pareent anchor with whose address is addr
! 202: **
! 203: ** On exit,
! 204: ** returns <0 Error has occured.
! 205: ** HT_LOADED Success
! 206: ** HT_NO_DATA Success, but no document loaded.
! 207: **
! 208: */
! 209: PRIVATE int HTLoad ARGS4(
! 210: CONST char *, addr,
! 211: HTParentAnchor *, anchor,
! 212: HTFormat, format_out,
! 213: HTStream *, sink)
! 214: {
! 215: HTProtocol* p;
! 216: int status = get_physical(addr, anchor);
! 217: if (status == HT_FORBIDDEN) {
! 218: return HTLoadError(sink, 500, "Access forbidden by rule");
! 219: }
! 220: if (status < 0) return status; /* Can't resolve or forbidden */
! 221:
! 222: p = HTAnchor_protocol(anchor);
! 223: return (*(p->load))(HTAnchor_physical(anchor),
! 224: anchor, format_out, sink);
! 225: }
! 226:
! 227:
! 228: /* Get a save stream for a document
! 229: ** --------------------------------
! 230: */
! 231: PUBLIC HTStream *HTSaveStream ARGS1(HTParentAnchor *, anchor)
! 232: {
! 233: HTProtocol * p = HTAnchor_protocol(anchor);
! 234: if (!p) return NULL;
! 235:
! 236: return (*p->saveStream)(anchor);
! 237:
! 238: }
! 239:
! 240:
! 241: /* Load a document - with logging etc
! 242: ** ----------------------------------
! 243: **
! 244: ** - Checks or documents already loaded
! 245: ** - Logs the access
! 246: ** - Allows stdin filter option
! 247: ** - Trace ouput and error messages
! 248: **
1.1 timbl 249: ** On Entry,
250: ** anchor is the node_anchor for the document
251: ** full_address The address of the document to be accessed.
1.2 ! timbl 252: ** filter if YES, treat stdin as HTML
1.1 timbl 253: **
254: ** On Exit,
255: ** returns YES Success in opening document
256: ** NO Failure
257: **
258: */
259:
1.2 ! timbl 260: PRIVATE BOOL HTLoadDocument ARGS4(
! 261: CONST char *, full_address,
! 262: HTParentAnchor *, anchor,
! 263: HTFormat, format_out,
! 264: HTStream*, sink)
1.1 timbl 265:
266: {
267: int status;
268: HText * text;
269:
270: if (TRACE) fprintf (stderr,
271: "HTAccess: loading document %s\n", full_address);
272:
273: if (text=(HText *)HTAnchor_document(anchor)) { /* Already loaded */
274: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
275: HText_select(text);
276: return YES;
277: }
278:
1.2 ! timbl 279: status = HTLoad(full_address, anchor, format_out, sink);
! 280:
! 281:
1.1 timbl 282: /* Log the access if necessary
283: */
284: if (logfile) {
285: time_t theTime;
286: time(&theTime);
287: fprintf(logfile, "%24.24s %s %s %s\n",
288: ctime(&theTime),
289: HTClientHost ? HTClientHost : "local",
290: status<0 ? "FAIL" : "GET",
291: full_address);
292: fflush(logfile); /* Actually update it on disk */
293: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
294: ctime(&theTime),
295: HTClientHost ? HTClientHost : "local",
296: status<0 ? "FAIL" : "GET",
297: full_address);
298: }
299:
300:
301: if (status == HT_LOADED) {
302: if (TRACE) {
303: fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
304: full_address);
305: }
306: return YES;
307: }
308:
309: if (status == HT_NO_DATA) {
310: if (TRACE) {
311: fprintf(stderr,
312: "HTAccess: `%s' has been accessed, No data left.\n",
313: full_address);
314: }
315: return NO;
316: }
317:
1.2 ! timbl 318: if (status<0) { /* Failure in accessing a document */
1.1 timbl 319: #ifdef CURSES
320: user_message("Can't access `%s'", full_address);
321: #else
1.2 ! timbl 322: fprintf(stderr, "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 323: #endif
324:
325: return NO;
326: }
327:
328: fprintf(stderr,
1.2 ! timbl 329: "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.1 timbl 330: exit(-6996);
331:
1.2 ! timbl 332: } /* HTLoadDocument */
1.1 timbl 333:
334:
335:
336: /* Load a document from absolute name
337: ** ---------------
338: **
339: ** On Entry,
340: ** addr The absolute address of the document to be accessed.
341: ** filter if YES, treat document as HTML
342: **
343: ** On Exit,
344: ** returns YES Success in opening document
345: ** NO Failure
346: **
347: **
348: */
349:
1.2 ! timbl 350: PUBLIC BOOL HTLoadAbsolute ARGS1(CONST char *,addr)
! 351: {
! 352: return HTLoadDocument( addr,
! 353: HTAnchor_parent(HTAnchor_findAddress(addr)),
! 354: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
! 355: HTOutputStream);
! 356: }
! 357:
! 358:
! 359: /* Load a document from absolute name to stream
! 360: ** --------------------------------------------
! 361: **
! 362: ** On Entry,
! 363: ** addr The absolute address of the document to be accessed.
! 364: ** sink if non-NULL, send data down this stream
! 365: **
! 366: ** On Exit,
! 367: ** returns YES Success in opening document
! 368: ** NO Failure
! 369: **
! 370: **
! 371: */
! 372:
! 373: PUBLIC BOOL HTLoadToStream ARGS3(
! 374: CONST char *, addr,
! 375: BOOL, filter,
! 376: HTStream *, sink)
1.1 timbl 377: {
1.2 ! timbl 378: return HTLoadDocument(addr,
1.1 timbl 379: HTAnchor_parent(HTAnchor_findAddress(addr)),
1.2 ! timbl 380: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
! 381: sink);
1.1 timbl 382: }
383:
384:
1.2 ! timbl 385:
! 386:
1.1 timbl 387: /* Load a document from relative name
388: ** ---------------
389: **
390: ** On Entry,
1.2 ! timbl 391: ** relative_name The relative address of the document
! 392: ** to be accessed.
1.1 timbl 393: **
394: ** On Exit,
395: ** returns YES Success in opening document
396: ** NO Failure
397: **
398: **
399: */
400:
1.2 ! timbl 401: PUBLIC BOOL HTLoadRelative ARGS2(
! 402: CONST char *, relative_name,
! 403: HTParentAnchor *, here)
1.1 timbl 404: {
405: char * full_address = 0;
406: BOOL result;
407: char * mycopy = 0;
408: char * stripped = 0;
409: char * current_address =
1.2 ! timbl 410: HTAnchor_address((HTAnchor*)here);
1.1 timbl 411:
412: StrAllocCopy(mycopy, relative_name);
413:
414: stripped = HTStrip(mycopy);
415: full_address = HTParse(stripped,
416: current_address,
417: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.2 ! timbl 418: result = HTLoadAbsolute(full_address);
1.1 timbl 419: free(full_address);
420: free(current_address);
421: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
422: return result;
423: }
424:
425:
426: /* Load if necessary, and select an anchor
427: ** --------------------------------------
428: **
429: ** On Entry,
430: ** destination The child or parenet anchor to be loaded.
431: **
432: ** On Exit,
433: ** returns YES Success
434: ** NO Failure
435: **
436: */
437:
438: PUBLIC BOOL HTLoadAnchor ARGS1(HTAnchor *,destination)
439: {
440: HTParentAnchor * parent;
441: BOOL loaded = NO;
442: if (!destination) return NO; /* No link */
443:
444: parent = HTAnchor_parent(destination);
445:
446: if (HTAnchor_document(parent) == NULL) { /* If not alread loaded */
447: /* TBL 921202 */
1.2 ! timbl 448:
1.1 timbl 449: BOOL result;
450: char * address = HTAnchor_address((HTAnchor*) parent);
1.2 ! timbl 451: result = HTLoadDocument(address, parent,
! 452: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
! 453: HTOutputStream);
1.1 timbl 454: free(address);
455: if (!result) return NO;
456: loaded = YES;
457: }
458:
459: {
460: HText *text = (HText*)HTAnchor_document(parent);
461: if (destination != (HTAnchor *)parent) { /* If child anchor */
462: HText_selectAnchor(text,
463: (HTChildAnchor*)destination); /* Double display? @@ */
464: } else {
465: if (!loaded) HText_select(text);
466: }
467: }
468: return YES;
469:
470: } /* HTLoadAnchor */
471:
472:
473: /* Search
474: ** ------
475: ** Performs a keyword search on word given by the user. Adds the keyword to
476: ** the end of the current address and attempts to open the new address.
477: **
478: ** On Entry,
479: ** *keywords space-separated keyword list or similar search list
1.2 ! timbl 480: ** here is anchor search is to be done on.
1.1 timbl 481: */
482:
1.2 ! timbl 483: PRIVATE char hex(i)
! 484: int i;
! 485: {
! 486: return i < 10 ? '0'+i : 'A'+ i - 10;
! 487: }
1.1 timbl 488:
1.2 ! timbl 489: PUBLIC BOOL HTSearch ARGS2(
! 490: CONST char *, keywords,
! 491: HTParentAnchor *, here)
1.1 timbl 492: {
1.2 ! timbl 493:
! 494: #define acceptable \
! 495: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
! 496:
! 497: char *q, *u;
! 498: CONST char * p, *s, *e; /* Pointers into keywords */
! 499: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 500: BOOL result;
1.2 ! timbl 501: char * escaped = malloc(strlen(keywords)*3+1);
! 502:
! 503: static CONST BOOL isAcceptable[96] =
! 504:
! 505: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
! 506: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
! 507: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
! 508: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
! 509: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
! 510: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
! 511: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
! 512:
! 513: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
! 514:
! 515:
! 516: /* Convert spaces to + and hex escape unacceptable characters
! 517: */
! 518: for(s=keywords; *s && WHITE(*s); s++) /*scan */ ; /* Skip white space */
! 519: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
! 520: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
! 521: int c = (int)TOASCII(*p);
! 522: if (WHITE(*p)) {
! 523: *q++ = '+';
! 524: } else if (c>=32 && c<=(char)127 && isAcceptable[c-32]) {
! 525: *q++ = (char)c;
! 526: } else {
! 527: *q++ = '%';
! 528: *q++ = hex(c / 16);
! 529: *q++ = hex(c % 16);
! 530: }
! 531: } /* Loop over string */
1.1 timbl 532:
1.2 ! timbl 533: *q=0;
! 534: /* terminate escaped sctring */
! 535: u=strchr(address, '?'); /* Find old search string */
! 536: if (u) *u = 0; /* Chop old search off */
1.1 timbl 537:
538: StrAllocCat(address, "?");
1.2 ! timbl 539: StrAllocCat(address, escaped);
! 540: free(escaped);
! 541: result = HTLoadRelative(address, here);
1.1 timbl 542: free(address);
1.2 ! timbl 543:
1.1 timbl 544: return result;
1.2 ! timbl 545: }
! 546:
! 547:
! 548: /* Search Given Indexname
! 549: ** ------
! 550: ** Performs a keyword search on word given by the user. Adds the keyword to
! 551: ** the end of the current address and attempts to open the new address.
! 552: **
! 553: ** On Entry,
! 554: ** *keywords space-separated keyword list or similar search list
! 555: ** *addres is name of object search is to be done on.
! 556: */
! 557:
! 558: PUBLIC BOOL HTSearchAbsolute ARGS2(
! 559: CONST char *, keywords,
! 560: CONST char *, indexname)
! 561: {
! 562: HTParentAnchor * anchor =
! 563: (HTParentAnchor*) HTAnchor_findAddress(indexname);
! 564: return HTSearch(keywords, anchor);
! 565: }
! 566:
! 567:
! 568: /* Generate the anchor for the home page
! 569: ** -------------------------------------
! 570: **
! 571: ** As it involves file access, this should only be done once
! 572: ** when the program first runs.
! 573: ** This is a default algorithm -- browesr don't HAVE to use this.
! 574: **
! 575: */
! 576: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
! 577: {
! 578: char * my_home = (char *)getenv(LOGICAL_DEFAULT);
! 579: BOOL got_local_default = NO;
! 580: char * ref;
! 581: HTParentAnchor * anchor;
1.1 timbl 582:
1.2 ! timbl 583: #ifdef unix
! 584: {
! 585: FILE * fp = fopen(LOCAL_DEFAULT_FILE, "r");
! 586: if (fp) {
! 587: fclose(fp);
! 588: got_local_default = YES;
! 589: } else {
! 590: if (TRACE) fprintf(stderr,
! 591: "HTBrowse: No local default home %s\n",
! 592: LOCAL_DEFAULT_FILE);
! 593: }
! 594: }
! 595: #endif
! 596: ref = HTParse( my_home ? my_home :
! 597: HTClientHost ? REMOTE_ADDRESS :
! 598: got_local_default ? LOCAL_DEFAULT
! 599: : LAST_RESORT,
! 600: LAST_RESORT,
! 601: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
! 602: if (my_home) {
! 603: if (TRACE) fprintf(stderr,
! 604: "HTAccess: Using custom home page %s i.e. address %s\n",
! 605: my_home, ref);
! 606: }
! 607: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
! 608: free(ref);
! 609: return anchor;
1.1 timbl 610: }
611:
612:
Webmaster