Annotation of libwww/Library/src/HTAccess.c, revision 1.10
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
11: ** 6 Oct 92 Moved HTClientHost and logfile into here. TBL
12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
1.9 timbl 14: ** PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
15: ** 28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.2 timbl 16: **
17: ** Bugs
18: ** This module assumes that that the graphic object is hypertext, as it
1.9 timbl 19: ** needs to select it when it has been loaded. A superclass needs to be
1.2 timbl 20: ** defined which accepts select and select_anchor.
1.1 timbl 21: */
22:
1.9 timbl 23: #ifndef DEFAULT_WAIS_GATEWAY
1.8 timbl 24: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.9 timbl 25: #endif
1.8 timbl 26:
1.1 timbl 27: /* Implements:
28: */
29: #include "HTAccess.h"
30:
31: /* Uses:
32: */
33:
34: #include "HTParse.h"
35: #include "HTUtils.h"
1.4 timbl 36: #include "HTML.h" /* SCW */
1.2 timbl 37:
38: #ifndef NO_RULES
39: #include "HTRules.h"
40: #endif
41:
1.1 timbl 42: #include <stdio.h>
43:
1.2 timbl 44: #include "HTList.h"
45: #include "HText.h" /* See bugs above */
46: #include "HTAlert.h"
47:
1.1 timbl 48:
49: /* These flags may be set to modify the operation of this module
50: */
51: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
52: PUBLIC FILE * logfile = 0; /* File to which to output one-liners */
53:
1.2 timbl 54: /* To generate other things, play with these:
55: */
56:
57: PUBLIC HTFormat HTOutputFormat = NULL;
58: PUBLIC HTStream* HTOutputStream = NULL; /* For non-interactive, set this */
1.1 timbl 59:
60: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
61:
62:
63: /* Register a Protocol HTRegisterProtocol
64: ** -------------------
65: */
66:
67: PUBLIC BOOL HTRegisterProtocol(protocol)
68: HTProtocol * protocol;
69: {
70: if (!protocols) protocols = HTList_new();
71: HTList_addObject(protocols, protocol);
72: return YES;
73: }
74:
75:
76: /* Register all known protocols
77: ** ----------------------------
78: **
79: ** Add to or subtract from this list if you add or remove protocol modules.
80: ** This routine is called the first time the protocol list is needed,
81: ** unless any protocols are already registered, in which case it is not called.
82: ** Therefore the application can override this list.
83: **
84: ** Compiling with NO_INIT prevents all known protocols from being forced
85: ** in at link time.
86: */
87: #ifndef NO_INIT
88: PRIVATE void HTAccessInit NOARGS /* Call me once */
89: {
1.2 timbl 90: extern HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 91: #ifndef DECNET
1.2 timbl 92: extern HTProtocol HTFTP, HTNews, HTGopher;
1.3 timbl 93: #ifdef DIRECT_WAIS
94: extern HTProtocol HTWAIS;
95: #endif
1.2 timbl 96: HTRegisterProtocol(&HTFTP);
97: HTRegisterProtocol(&HTNews);
98: HTRegisterProtocol(&HTGopher);
1.3 timbl 99: #ifdef DIRECT_WAIS
100: HTRegisterProtocol(&HTWAIS);
101: #endif
1.1 timbl 102: #endif
103:
1.2 timbl 104: HTRegisterProtocol(&HTTP);
105: HTRegisterProtocol(&HTFile);
106: HTRegisterProtocol(&HTTelnet);
107: HTRegisterProtocol(&HTTn3270);
108: HTRegisterProtocol(&HTRlogin);
1.1 timbl 109: }
110: #endif
111:
112:
1.2 timbl 113: /* Find physical name and access protocol
114: ** --------------------------------------
1.1 timbl 115: **
116: **
117: ** On entry,
118: ** addr must point to the fully qualified hypertext reference.
119: ** anchor a pareent anchor with whose address is addr
120: **
121: ** On exit,
1.2 timbl 122: ** returns HT_NO_ACCESS Error has occured.
123: ** HT_OK Success
1.1 timbl 124: **
125: */
1.2 timbl 126: PRIVATE int get_physical ARGS2(
127: CONST char *, addr,
128: HTParentAnchor *, anchor)
1.1 timbl 129: {
130: char * access=0; /* Name of access method */
1.2 timbl 131: char * physical = 0;
1.1 timbl 132:
1.2 timbl 133: #ifndef NO_RULES
134: physical = HTTranslate(addr);
135: if (!physical) {
136: return HT_FORBIDDEN;
137: }
138: HTAnchor_setPhysical(anchor, physical);
139: free(physical); /* free our copy */
140: #else
141: HTAnchor_setPhysical(anchor, addr);
142: #endif
143:
144: access = HTParse(HTAnchor_physical(anchor),
145: "file:", PARSE_ACCESS);
1.1 timbl 146:
147: /* Check whether gateway access has been set up for this
1.8 timbl 148: **
149: ** This function can be replaced by the rule system above.
1.1 timbl 150: */
1.8 timbl 151: #define USE_GATEWAYS
1.1 timbl 152: #ifdef USE_GATEWAYS
1.2 timbl 153: {
1.9 timbl 154: char * gateway_parameter, *gateway;
1.2 timbl 155: gateway_parameter = (char *)malloc(strlen(access)+20);
156: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
157: strcpy(gateway_parameter, "WWW_");
158: strcat(gateway_parameter, access);
159: strcat(gateway_parameter, "_GATEWAY");
160: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
161: free(gateway_parameter);
1.8 timbl 162:
163: #ifndef DIRECT_WAIS
1.9 timbl 164: if (!gateway && 0==strcmp(access, "wais")) {
1.8 timbl 165: gateway = DEFAULT_WAIS_GATEWAY;
166: }
167: #endif
1.2 timbl 168: if (gateway) {
1.9 timbl 169: char * path = HTParse(addr, "",
170: PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
171: /* Chop leading / off to make host into part of path */
172: char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
173: free(path);
1.8 timbl 174: HTAnchor_setPhysical(anchor, gatewayed);
1.9 timbl 175: free(gatewayed);
1.2 timbl 176: free(access);
1.9 timbl 177:
1.8 timbl 178: access = HTParse(HTAnchor_physical(anchor),
179: "http:", PARSE_ACCESS);
1.2 timbl 180: }
181: }
1.1 timbl 182: #endif
183:
184:
185:
186: /* Search registered protocols to find suitable one
187: */
188: {
189: int i, n;
190: #ifndef NO_INIT
1.2 timbl 191: if (!protocols) HTAccessInit();
1.1 timbl 192: #endif
193: n = HTList_count(protocols);
194: for (i=0; i<n; i++) {
1.2 timbl 195: HTProtocol *p = HTList_objectAt(protocols, i);
196: if (strcmp(p->name, access)==0) {
197: HTAnchor_setProtocol(anchor, p);
198: free(access);
199: return (HT_OK);
1.1 timbl 200: }
201: }
202: }
203:
204: free(access);
1.2 timbl 205: return HT_NO_ACCESS;
1.1 timbl 206: }
207:
208:
209: /* Load a document
210: ** ---------------
211: **
1.2 timbl 212: ** This is an internal routine, which has an address AND a matching
213: ** anchor. (The public routines are called with one OR the other.)
214: **
215: ** On entry,
216: ** addr must point to the fully qualified hypertext reference.
217: ** anchor a pareent anchor with whose address is addr
218: **
219: ** On exit,
220: ** returns <0 Error has occured.
221: ** HT_LOADED Success
222: ** HT_NO_DATA Success, but no document loaded.
1.8 timbl 223: ** (telnet sesssion started etc)
1.2 timbl 224: **
225: */
226: PRIVATE int HTLoad ARGS4(
227: CONST char *, addr,
228: HTParentAnchor *, anchor,
229: HTFormat, format_out,
230: HTStream *, sink)
231: {
232: HTProtocol* p;
233: int status = get_physical(addr, anchor);
234: if (status == HT_FORBIDDEN) {
235: return HTLoadError(sink, 500, "Access forbidden by rule");
236: }
237: if (status < 0) return status; /* Can't resolve or forbidden */
238:
239: p = HTAnchor_protocol(anchor);
240: return (*(p->load))(HTAnchor_physical(anchor),
241: anchor, format_out, sink);
242: }
243:
244:
245: /* Get a save stream for a document
246: ** --------------------------------
247: */
248: PUBLIC HTStream *HTSaveStream ARGS1(HTParentAnchor *, anchor)
249: {
250: HTProtocol * p = HTAnchor_protocol(anchor);
251: if (!p) return NULL;
252:
253: return (*p->saveStream)(anchor);
254:
255: }
256:
257:
258: /* Load a document - with logging etc
259: ** ----------------------------------
260: **
261: ** - Checks or documents already loaded
262: ** - Logs the access
263: ** - Allows stdin filter option
264: ** - Trace ouput and error messages
265: **
1.1 timbl 266: ** On Entry,
267: ** anchor is the node_anchor for the document
268: ** full_address The address of the document to be accessed.
1.2 timbl 269: ** filter if YES, treat stdin as HTML
1.1 timbl 270: **
271: ** On Exit,
272: ** returns YES Success in opening document
273: ** NO Failure
274: **
275: */
276:
1.2 timbl 277: PRIVATE BOOL HTLoadDocument ARGS4(
278: CONST char *, full_address,
279: HTParentAnchor *, anchor,
280: HTFormat, format_out,
281: HTStream*, sink)
1.1 timbl 282:
283: {
284: int status;
285: HText * text;
286:
287: if (TRACE) fprintf (stderr,
288: "HTAccess: loading document %s\n", full_address);
289:
290: if (text=(HText *)HTAnchor_document(anchor)) { /* Already loaded */
291: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
292: HText_select(text);
293: return YES;
294: }
295:
1.2 timbl 296: status = HTLoad(full_address, anchor, format_out, sink);
297:
298:
1.1 timbl 299: /* Log the access if necessary
300: */
301: if (logfile) {
302: time_t theTime;
303: time(&theTime);
304: fprintf(logfile, "%24.24s %s %s %s\n",
305: ctime(&theTime),
306: HTClientHost ? HTClientHost : "local",
307: status<0 ? "FAIL" : "GET",
308: full_address);
309: fflush(logfile); /* Actually update it on disk */
310: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
311: ctime(&theTime),
312: HTClientHost ? HTClientHost : "local",
313: status<0 ? "FAIL" : "GET",
314: full_address);
315: }
316:
317:
318: if (status == HT_LOADED) {
319: if (TRACE) {
320: fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
321: full_address);
322: }
323: return YES;
324: }
325:
326: if (status == HT_NO_DATA) {
327: if (TRACE) {
328: fprintf(stderr,
329: "HTAccess: `%s' has been accessed, No data left.\n",
330: full_address);
331: }
332: return NO;
333: }
334:
1.2 timbl 335: if (status<0) { /* Failure in accessing a document */
1.1 timbl 336: #ifdef CURSES
337: user_message("Can't access `%s'", full_address);
338: #else
1.5 timbl 339: if (TRACE) fprintf(stderr,
340: "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 341: #endif
1.6 timbl 342: HTLoadError(sink, 500, "Unable to access document.");
1.1 timbl 343: return NO;
344: }
1.9 timbl 345:
346: /* If you get this, then please find which routine is returning
347: a positive unrecognised error code! */
348:
1.1 timbl 349: fprintf(stderr,
1.2 timbl 350: "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.9 timbl 351: fprintf(stderr,
352: "**** HTAccess: Internal software error. Please mail www-bug@info.cern.ch!\n");
1.1 timbl 353: exit(-6996);
354:
1.2 timbl 355: } /* HTLoadDocument */
1.1 timbl 356:
357:
358:
359: /* Load a document from absolute name
360: ** ---------------
361: **
362: ** On Entry,
363: ** addr The absolute address of the document to be accessed.
364: ** filter if YES, treat document as HTML
365: **
366: ** On Exit,
367: ** returns YES Success in opening document
368: ** NO Failure
369: **
370: **
371: */
372:
1.2 timbl 373: PUBLIC BOOL HTLoadAbsolute ARGS1(CONST char *,addr)
374: {
375: return HTLoadDocument( addr,
376: HTAnchor_parent(HTAnchor_findAddress(addr)),
377: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
378: HTOutputStream);
379: }
380:
381:
382: /* Load a document from absolute name to stream
383: ** --------------------------------------------
384: **
385: ** On Entry,
386: ** addr The absolute address of the document to be accessed.
387: ** sink if non-NULL, send data down this stream
388: **
389: ** On Exit,
390: ** returns YES Success in opening document
391: ** NO Failure
392: **
393: **
394: */
395:
396: PUBLIC BOOL HTLoadToStream ARGS3(
397: CONST char *, addr,
398: BOOL, filter,
399: HTStream *, sink)
1.1 timbl 400: {
1.2 timbl 401: return HTLoadDocument(addr,
1.1 timbl 402: HTAnchor_parent(HTAnchor_findAddress(addr)),
1.2 timbl 403: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
404: sink);
1.1 timbl 405: }
406:
407:
1.2 timbl 408:
409:
1.1 timbl 410: /* Load a document from relative name
411: ** ---------------
412: **
413: ** On Entry,
1.2 timbl 414: ** relative_name The relative address of the document
415: ** to be accessed.
1.1 timbl 416: **
417: ** On Exit,
418: ** returns YES Success in opening document
419: ** NO Failure
420: **
421: **
422: */
423:
1.2 timbl 424: PUBLIC BOOL HTLoadRelative ARGS2(
425: CONST char *, relative_name,
426: HTParentAnchor *, here)
1.1 timbl 427: {
428: char * full_address = 0;
429: BOOL result;
430: char * mycopy = 0;
431: char * stripped = 0;
432: char * current_address =
1.2 timbl 433: HTAnchor_address((HTAnchor*)here);
1.1 timbl 434:
435: StrAllocCopy(mycopy, relative_name);
436:
437: stripped = HTStrip(mycopy);
438: full_address = HTParse(stripped,
439: current_address,
440: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 441: result = HTLoadAbsolute(full_address);
1.1 timbl 442: free(full_address);
443: free(current_address);
444: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
445: return result;
446: }
447:
448:
449: /* Load if necessary, and select an anchor
450: ** --------------------------------------
451: **
452: ** On Entry,
453: ** destination The child or parenet anchor to be loaded.
454: **
455: ** On Exit,
456: ** returns YES Success
457: ** NO Failure
458: **
459: */
460:
461: PUBLIC BOOL HTLoadAnchor ARGS1(HTAnchor *,destination)
462: {
463: HTParentAnchor * parent;
464: BOOL loaded = NO;
465: if (!destination) return NO; /* No link */
466:
467: parent = HTAnchor_parent(destination);
468:
469: if (HTAnchor_document(parent) == NULL) { /* If not alread loaded */
470: /* TBL 921202 */
1.2 timbl 471:
1.1 timbl 472: BOOL result;
473: char * address = HTAnchor_address((HTAnchor*) parent);
1.2 timbl 474: result = HTLoadDocument(address, parent,
475: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
476: HTOutputStream);
1.1 timbl 477: free(address);
478: if (!result) return NO;
479: loaded = YES;
480: }
481:
482: {
483: HText *text = (HText*)HTAnchor_document(parent);
484: if (destination != (HTAnchor *)parent) { /* If child anchor */
485: HText_selectAnchor(text,
486: (HTChildAnchor*)destination); /* Double display? @@ */
487: } else {
488: if (!loaded) HText_select(text);
489: }
490: }
491: return YES;
492:
493: } /* HTLoadAnchor */
494:
495:
496: /* Search
497: ** ------
498: ** Performs a keyword search on word given by the user. Adds the keyword to
499: ** the end of the current address and attempts to open the new address.
500: **
501: ** On Entry,
502: ** *keywords space-separated keyword list or similar search list
1.2 timbl 503: ** here is anchor search is to be done on.
1.1 timbl 504: */
505:
1.2 timbl 506: PRIVATE char hex(i)
507: int i;
508: {
509: return i < 10 ? '0'+i : 'A'+ i - 10;
510: }
1.1 timbl 511:
1.2 timbl 512: PUBLIC BOOL HTSearch ARGS2(
513: CONST char *, keywords,
514: HTParentAnchor *, here)
1.1 timbl 515: {
1.2 timbl 516:
517: #define acceptable \
518: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
519:
520: char *q, *u;
521: CONST char * p, *s, *e; /* Pointers into keywords */
522: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 523: BOOL result;
1.2 timbl 524: char * escaped = malloc(strlen(keywords)*3+1);
525:
526: static CONST BOOL isAcceptable[96] =
527:
528: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
529: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
530: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
531: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
532: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
533: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
534: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
535:
536: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
537:
538:
539: /* Convert spaces to + and hex escape unacceptable characters
540: */
541: for(s=keywords; *s && WHITE(*s); s++) /*scan */ ; /* Skip white space */
542: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
543: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
544: int c = (int)TOASCII(*p);
545: if (WHITE(*p)) {
546: *q++ = '+';
547: } else if (c>=32 && c<=(char)127 && isAcceptable[c-32]) {
548: *q++ = (char)c;
549: } else {
550: *q++ = '%';
551: *q++ = hex(c / 16);
552: *q++ = hex(c % 16);
553: }
554: } /* Loop over string */
1.1 timbl 555:
1.2 timbl 556: *q=0;
557: /* terminate escaped sctring */
558: u=strchr(address, '?'); /* Find old search string */
559: if (u) *u = 0; /* Chop old search off */
1.1 timbl 560:
561: StrAllocCat(address, "?");
1.2 timbl 562: StrAllocCat(address, escaped);
563: free(escaped);
564: result = HTLoadRelative(address, here);
1.1 timbl 565: free(address);
1.2 timbl 566:
1.1 timbl 567: return result;
1.2 timbl 568: }
569:
570:
571: /* Search Given Indexname
572: ** ------
573: ** Performs a keyword search on word given by the user. Adds the keyword to
574: ** the end of the current address and attempts to open the new address.
575: **
576: ** On Entry,
577: ** *keywords space-separated keyword list or similar search list
578: ** *addres is name of object search is to be done on.
579: */
580:
581: PUBLIC BOOL HTSearchAbsolute ARGS2(
582: CONST char *, keywords,
583: CONST char *, indexname)
584: {
585: HTParentAnchor * anchor =
586: (HTParentAnchor*) HTAnchor_findAddress(indexname);
587: return HTSearch(keywords, anchor);
588: }
589:
590:
591: /* Generate the anchor for the home page
592: ** -------------------------------------
593: **
594: ** As it involves file access, this should only be done once
595: ** when the program first runs.
1.10 ! timbl 596: ** This is a default algorithm -- browser don't HAVE to use this.
! 597: ** But consistency betwen browsers is STRONGLY recommended!
1.2 timbl 598: **
1.10 ! timbl 599: ** Priority order is:
! 600: **
! 601: ** 1 WWW_HOME environment variable (logical name, etc)
! 602: ** 2 ~/WWW/default.html
! 603: ** 3 /usr/local/bin/default.html
! 604: ** 4 http://info.cern.ch/default.html
! 605: **
1.2 timbl 606: */
607: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
608: {
1.10 ! timbl 609: char * my_home_document = (char *)getenv(LOGICAL_DEFAULT);
1.2 timbl 610: char * ref;
611: HTParentAnchor * anchor;
1.1 timbl 612:
1.2 timbl 613: #ifdef unix
1.10 ! timbl 614: if (!my_home_document) {
! 615: FILE * fp = NULL;
! 616: CONST char * home = (CONST char*)getenv("HOME");
! 617: if (home) {
! 618: my_home_document = (char *)malloc(
! 619: strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
! 620: if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
! 621: sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
! 622: fp = fopen(my_home_document, "r");
! 623: }
! 624:
! 625: if (!fp) {
! 626: StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
! 627: fp = fopen(my_home_document, "r");
! 628: }
1.2 timbl 629: if (fp) {
630: fclose(fp);
631: } else {
632: if (TRACE) fprintf(stderr,
1.10 ! timbl 633: "HTBrowse: No local home document ~/%s or %s\n",
! 634: PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.2 timbl 635: }
636: }
637: #endif
1.10 ! timbl 638: ref = HTParse( my_home_document ? my_home_document :
! 639: HTClientHost ? REMOTE_ADDRESS
! 640: : LAST_RESORT,
! 641: "file:",
1.2 timbl 642: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10 ! timbl 643: if (my_home_document) {
1.2 timbl 644: if (TRACE) fprintf(stderr,
645: "HTAccess: Using custom home page %s i.e. address %s\n",
1.10 ! timbl 646: my_home_document, ref);
! 647: free(my_home_document);
1.2 timbl 648: }
649: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
650: free(ref);
651: return anchor;
1.1 timbl 652: }
653:
654:
Webmaster