Annotation of libwww/Library/src/HTAccess.c, revision 1.8
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
11: ** 6 Oct 92 Moved HTClientHost and logfile into here. TBL
12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
14: ** PARAMETERS TO HTSEARCH AND HTLOAFRELATIVE CHANGED
15: **
16: ** Bugs
17: ** This module assumes that that the graphic object is hypertext, as it
18: ** needs to select it when it has been loaded. A supercalss needs to be
19: ** defined which accepts select and select_anchor.
1.1 timbl 20: */
21:
1.8 ! timbl 22: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
! 23:
1.1 timbl 24: /* Implements:
25: */
26: #include "HTAccess.h"
27:
28: /* Uses:
29: */
30:
31: #include "HTParse.h"
32: #include "HTUtils.h"
1.4 timbl 33: #include "HTML.h" /* SCW */
1.2 timbl 34:
35: #ifndef NO_RULES
36: #include "HTRules.h"
37: #endif
38:
1.1 timbl 39: #include <stdio.h>
40:
1.2 timbl 41: #include "HTList.h"
42: #include "HText.h" /* See bugs above */
43: #include "HTAlert.h"
44:
1.1 timbl 45:
46: /* These flags may be set to modify the operation of this module
47: */
48: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
49: PUBLIC FILE * logfile = 0; /* File to which to output one-liners */
50:
1.2 timbl 51: /* To generate other things, play with these:
52: */
53:
54: PUBLIC HTFormat HTOutputFormat = NULL;
55: PUBLIC HTStream* HTOutputStream = NULL; /* For non-interactive, set this */
1.1 timbl 56:
57: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
58:
59:
60: /* Register a Protocol HTRegisterProtocol
61: ** -------------------
62: */
63:
64: PUBLIC BOOL HTRegisterProtocol(protocol)
65: HTProtocol * protocol;
66: {
67: if (!protocols) protocols = HTList_new();
68: HTList_addObject(protocols, protocol);
69: return YES;
70: }
71:
72:
73: /* Register all known protocols
74: ** ----------------------------
75: **
76: ** Add to or subtract from this list if you add or remove protocol modules.
77: ** This routine is called the first time the protocol list is needed,
78: ** unless any protocols are already registered, in which case it is not called.
79: ** Therefore the application can override this list.
80: **
81: ** Compiling with NO_INIT prevents all known protocols from being forced
82: ** in at link time.
83: */
84: #ifndef NO_INIT
85: PRIVATE void HTAccessInit NOARGS /* Call me once */
86: {
1.2 timbl 87: extern HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 88: #ifndef DECNET
1.2 timbl 89: extern HTProtocol HTFTP, HTNews, HTGopher;
1.3 timbl 90: #ifdef DIRECT_WAIS
91: extern HTProtocol HTWAIS;
92: #endif
1.2 timbl 93: HTRegisterProtocol(&HTFTP);
94: HTRegisterProtocol(&HTNews);
95: HTRegisterProtocol(&HTGopher);
1.3 timbl 96: #ifdef DIRECT_WAIS
97: HTRegisterProtocol(&HTWAIS);
98: #endif
1.1 timbl 99: #endif
100:
1.2 timbl 101: HTRegisterProtocol(&HTTP);
102: HTRegisterProtocol(&HTFile);
103: HTRegisterProtocol(&HTTelnet);
104: HTRegisterProtocol(&HTTn3270);
105: HTRegisterProtocol(&HTRlogin);
1.1 timbl 106: }
107: #endif
108:
109:
1.2 timbl 110: /* Find physical name and access protocol
111: ** --------------------------------------
1.1 timbl 112: **
113: **
114: ** On entry,
115: ** addr must point to the fully qualified hypertext reference.
116: ** anchor a pareent anchor with whose address is addr
117: **
118: ** On exit,
1.2 timbl 119: ** returns HT_NO_ACCESS Error has occured.
120: ** HT_OK Success
1.1 timbl 121: **
122: */
1.2 timbl 123: PRIVATE int get_physical ARGS2(
124: CONST char *, addr,
125: HTParentAnchor *, anchor)
1.1 timbl 126: {
127: char * access=0; /* Name of access method */
1.2 timbl 128: char * physical = 0;
1.1 timbl 129:
1.2 timbl 130: #ifndef NO_RULES
131: physical = HTTranslate(addr);
132: if (!physical) {
133: return HT_FORBIDDEN;
134: }
135: HTAnchor_setPhysical(anchor, physical);
136: free(physical); /* free our copy */
137: #else
138: HTAnchor_setPhysical(anchor, addr);
139: #endif
140:
141: access = HTParse(HTAnchor_physical(anchor),
142: "file:", PARSE_ACCESS);
1.1 timbl 143:
144: /* Check whether gateway access has been set up for this
1.8 ! timbl 145: **
! 146: ** This function can be replaced by the rule system above.
1.1 timbl 147: */
1.8 ! timbl 148: #define USE_GATEWAYS
1.1 timbl 149: #ifdef USE_GATEWAYS
1.2 timbl 150: {
151: char * gateway_parameter, gateway;
152: gateway_parameter = (char *)malloc(strlen(access)+20);
153: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
154: strcpy(gateway_parameter, "WWW_");
155: strcat(gateway_parameter, access);
156: strcat(gateway_parameter, "_GATEWAY");
157: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
158: free(gateway_parameter);
1.8 ! timbl 159:
! 160: #ifndef DIRECT_WAIS
! 161: if (!gateway && 0=strcmp(access, "wais")) {
! 162: gateway = DEFAULT_WAIS_GATEWAY;
! 163: }
! 164: #endif
1.2 timbl 165: if (gateway) {
1.8 ! timbl 166: char * path = HTParse(addr, "", PARSE_PATH + PARSE_PUNCTUATION);
! 167: char * gatewayed = HTParse(path, gateway, PARSE_ALL);
! 168: HTAnchor_setPhysical(anchor, gatewayed);
1.2 timbl 169: free(access);
1.8 ! timbl 170: access = HTParse(HTAnchor_physical(anchor),
! 171: "http:", PARSE_ACCESS);
1.2 timbl 172: }
173: }
1.1 timbl 174: #endif
175:
176:
177:
178: /* Search registered protocols to find suitable one
179: */
180: {
181: int i, n;
182: #ifndef NO_INIT
1.2 timbl 183: if (!protocols) HTAccessInit();
1.1 timbl 184: #endif
185: n = HTList_count(protocols);
186: for (i=0; i<n; i++) {
1.2 timbl 187: HTProtocol *p = HTList_objectAt(protocols, i);
188: if (strcmp(p->name, access)==0) {
189: HTAnchor_setProtocol(anchor, p);
190: free(access);
191: return (HT_OK);
1.1 timbl 192: }
193: }
194: }
195:
196: free(access);
1.2 timbl 197: return HT_NO_ACCESS;
1.1 timbl 198: }
199:
200:
201: /* Load a document
202: ** ---------------
203: **
1.2 timbl 204: ** This is an internal routine, which has an address AND a matching
205: ** anchor. (The public routines are called with one OR the other.)
206: **
207: ** On entry,
208: ** addr must point to the fully qualified hypertext reference.
209: ** anchor a pareent anchor with whose address is addr
210: **
211: ** On exit,
212: ** returns <0 Error has occured.
213: ** HT_LOADED Success
214: ** HT_NO_DATA Success, but no document loaded.
1.8 ! timbl 215: ** (telnet sesssion started etc)
1.2 timbl 216: **
217: */
218: PRIVATE int HTLoad ARGS4(
219: CONST char *, addr,
220: HTParentAnchor *, anchor,
221: HTFormat, format_out,
222: HTStream *, sink)
223: {
224: HTProtocol* p;
225: int status = get_physical(addr, anchor);
226: if (status == HT_FORBIDDEN) {
227: return HTLoadError(sink, 500, "Access forbidden by rule");
228: }
229: if (status < 0) return status; /* Can't resolve or forbidden */
230:
231: p = HTAnchor_protocol(anchor);
232: return (*(p->load))(HTAnchor_physical(anchor),
233: anchor, format_out, sink);
234: }
235:
236:
237: /* Get a save stream for a document
238: ** --------------------------------
239: */
240: PUBLIC HTStream *HTSaveStream ARGS1(HTParentAnchor *, anchor)
241: {
242: HTProtocol * p = HTAnchor_protocol(anchor);
243: if (!p) return NULL;
244:
245: return (*p->saveStream)(anchor);
246:
247: }
248:
249:
250: /* Load a document - with logging etc
251: ** ----------------------------------
252: **
253: ** - Checks or documents already loaded
254: ** - Logs the access
255: ** - Allows stdin filter option
256: ** - Trace ouput and error messages
257: **
1.1 timbl 258: ** On Entry,
259: ** anchor is the node_anchor for the document
260: ** full_address The address of the document to be accessed.
1.2 timbl 261: ** filter if YES, treat stdin as HTML
1.1 timbl 262: **
263: ** On Exit,
264: ** returns YES Success in opening document
265: ** NO Failure
266: **
267: */
268:
1.2 timbl 269: PRIVATE BOOL HTLoadDocument ARGS4(
270: CONST char *, full_address,
271: HTParentAnchor *, anchor,
272: HTFormat, format_out,
273: HTStream*, sink)
1.1 timbl 274:
275: {
276: int status;
277: HText * text;
278:
279: if (TRACE) fprintf (stderr,
280: "HTAccess: loading document %s\n", full_address);
281:
282: if (text=(HText *)HTAnchor_document(anchor)) { /* Already loaded */
283: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
284: HText_select(text);
285: return YES;
286: }
287:
1.2 timbl 288: status = HTLoad(full_address, anchor, format_out, sink);
289:
290:
1.1 timbl 291: /* Log the access if necessary
292: */
293: if (logfile) {
294: time_t theTime;
295: time(&theTime);
296: fprintf(logfile, "%24.24s %s %s %s\n",
297: ctime(&theTime),
298: HTClientHost ? HTClientHost : "local",
299: status<0 ? "FAIL" : "GET",
300: full_address);
301: fflush(logfile); /* Actually update it on disk */
302: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
303: ctime(&theTime),
304: HTClientHost ? HTClientHost : "local",
305: status<0 ? "FAIL" : "GET",
306: full_address);
307: }
308:
309:
310: if (status == HT_LOADED) {
311: if (TRACE) {
312: fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
313: full_address);
314: }
315: return YES;
316: }
317:
318: if (status == HT_NO_DATA) {
319: if (TRACE) {
320: fprintf(stderr,
321: "HTAccess: `%s' has been accessed, No data left.\n",
322: full_address);
323: }
324: return NO;
325: }
326:
1.2 timbl 327: if (status<0) { /* Failure in accessing a document */
1.1 timbl 328: #ifdef CURSES
329: user_message("Can't access `%s'", full_address);
330: #else
1.5 timbl 331: if (TRACE) fprintf(stderr,
332: "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 333: #endif
1.6 timbl 334: HTLoadError(sink, 500, "Unable to access document.");
1.1 timbl 335: return NO;
336: }
337:
338: fprintf(stderr,
1.2 timbl 339: "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.7 secret 340: { char c = *(char*)0; } /* crash */
1.1 timbl 341: exit(-6996);
342:
1.2 timbl 343: } /* HTLoadDocument */
1.1 timbl 344:
345:
346:
347: /* Load a document from absolute name
348: ** ---------------
349: **
350: ** On Entry,
351: ** addr The absolute address of the document to be accessed.
352: ** filter if YES, treat document as HTML
353: **
354: ** On Exit,
355: ** returns YES Success in opening document
356: ** NO Failure
357: **
358: **
359: */
360:
1.2 timbl 361: PUBLIC BOOL HTLoadAbsolute ARGS1(CONST char *,addr)
362: {
363: return HTLoadDocument( addr,
364: HTAnchor_parent(HTAnchor_findAddress(addr)),
365: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
366: HTOutputStream);
367: }
368:
369:
370: /* Load a document from absolute name to stream
371: ** --------------------------------------------
372: **
373: ** On Entry,
374: ** addr The absolute address of the document to be accessed.
375: ** sink if non-NULL, send data down this stream
376: **
377: ** On Exit,
378: ** returns YES Success in opening document
379: ** NO Failure
380: **
381: **
382: */
383:
384: PUBLIC BOOL HTLoadToStream ARGS3(
385: CONST char *, addr,
386: BOOL, filter,
387: HTStream *, sink)
1.1 timbl 388: {
1.2 timbl 389: return HTLoadDocument(addr,
1.1 timbl 390: HTAnchor_parent(HTAnchor_findAddress(addr)),
1.2 timbl 391: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
392: sink);
1.1 timbl 393: }
394:
395:
1.2 timbl 396:
397:
1.1 timbl 398: /* Load a document from relative name
399: ** ---------------
400: **
401: ** On Entry,
1.2 timbl 402: ** relative_name The relative address of the document
403: ** to be accessed.
1.1 timbl 404: **
405: ** On Exit,
406: ** returns YES Success in opening document
407: ** NO Failure
408: **
409: **
410: */
411:
1.2 timbl 412: PUBLIC BOOL HTLoadRelative ARGS2(
413: CONST char *, relative_name,
414: HTParentAnchor *, here)
1.1 timbl 415: {
416: char * full_address = 0;
417: BOOL result;
418: char * mycopy = 0;
419: char * stripped = 0;
420: char * current_address =
1.2 timbl 421: HTAnchor_address((HTAnchor*)here);
1.1 timbl 422:
423: StrAllocCopy(mycopy, relative_name);
424:
425: stripped = HTStrip(mycopy);
426: full_address = HTParse(stripped,
427: current_address,
428: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 429: result = HTLoadAbsolute(full_address);
1.1 timbl 430: free(full_address);
431: free(current_address);
432: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
433: return result;
434: }
435:
436:
437: /* Load if necessary, and select an anchor
438: ** --------------------------------------
439: **
440: ** On Entry,
441: ** destination The child or parenet anchor to be loaded.
442: **
443: ** On Exit,
444: ** returns YES Success
445: ** NO Failure
446: **
447: */
448:
449: PUBLIC BOOL HTLoadAnchor ARGS1(HTAnchor *,destination)
450: {
451: HTParentAnchor * parent;
452: BOOL loaded = NO;
453: if (!destination) return NO; /* No link */
454:
455: parent = HTAnchor_parent(destination);
456:
457: if (HTAnchor_document(parent) == NULL) { /* If not alread loaded */
458: /* TBL 921202 */
1.2 timbl 459:
1.1 timbl 460: BOOL result;
461: char * address = HTAnchor_address((HTAnchor*) parent);
1.2 timbl 462: result = HTLoadDocument(address, parent,
463: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
464: HTOutputStream);
1.1 timbl 465: free(address);
466: if (!result) return NO;
467: loaded = YES;
468: }
469:
470: {
471: HText *text = (HText*)HTAnchor_document(parent);
472: if (destination != (HTAnchor *)parent) { /* If child anchor */
473: HText_selectAnchor(text,
474: (HTChildAnchor*)destination); /* Double display? @@ */
475: } else {
476: if (!loaded) HText_select(text);
477: }
478: }
479: return YES;
480:
481: } /* HTLoadAnchor */
482:
483:
484: /* Search
485: ** ------
486: ** Performs a keyword search on word given by the user. Adds the keyword to
487: ** the end of the current address and attempts to open the new address.
488: **
489: ** On Entry,
490: ** *keywords space-separated keyword list or similar search list
1.2 timbl 491: ** here is anchor search is to be done on.
1.1 timbl 492: */
493:
1.2 timbl 494: PRIVATE char hex(i)
495: int i;
496: {
497: return i < 10 ? '0'+i : 'A'+ i - 10;
498: }
1.1 timbl 499:
1.2 timbl 500: PUBLIC BOOL HTSearch ARGS2(
501: CONST char *, keywords,
502: HTParentAnchor *, here)
1.1 timbl 503: {
1.2 timbl 504:
505: #define acceptable \
506: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
507:
508: char *q, *u;
509: CONST char * p, *s, *e; /* Pointers into keywords */
510: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 511: BOOL result;
1.2 timbl 512: char * escaped = malloc(strlen(keywords)*3+1);
513:
514: static CONST BOOL isAcceptable[96] =
515:
516: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
517: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
518: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
519: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
520: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
521: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
522: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
523:
524: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
525:
526:
527: /* Convert spaces to + and hex escape unacceptable characters
528: */
529: for(s=keywords; *s && WHITE(*s); s++) /*scan */ ; /* Skip white space */
530: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
531: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
532: int c = (int)TOASCII(*p);
533: if (WHITE(*p)) {
534: *q++ = '+';
535: } else if (c>=32 && c<=(char)127 && isAcceptable[c-32]) {
536: *q++ = (char)c;
537: } else {
538: *q++ = '%';
539: *q++ = hex(c / 16);
540: *q++ = hex(c % 16);
541: }
542: } /* Loop over string */
1.1 timbl 543:
1.2 timbl 544: *q=0;
545: /* terminate escaped sctring */
546: u=strchr(address, '?'); /* Find old search string */
547: if (u) *u = 0; /* Chop old search off */
1.1 timbl 548:
549: StrAllocCat(address, "?");
1.2 timbl 550: StrAllocCat(address, escaped);
551: free(escaped);
552: result = HTLoadRelative(address, here);
1.1 timbl 553: free(address);
1.2 timbl 554:
1.1 timbl 555: return result;
1.2 timbl 556: }
557:
558:
559: /* Search Given Indexname
560: ** ------
561: ** Performs a keyword search on word given by the user. Adds the keyword to
562: ** the end of the current address and attempts to open the new address.
563: **
564: ** On Entry,
565: ** *keywords space-separated keyword list or similar search list
566: ** *addres is name of object search is to be done on.
567: */
568:
569: PUBLIC BOOL HTSearchAbsolute ARGS2(
570: CONST char *, keywords,
571: CONST char *, indexname)
572: {
573: HTParentAnchor * anchor =
574: (HTParentAnchor*) HTAnchor_findAddress(indexname);
575: return HTSearch(keywords, anchor);
576: }
577:
578:
579: /* Generate the anchor for the home page
580: ** -------------------------------------
581: **
582: ** As it involves file access, this should only be done once
583: ** when the program first runs.
584: ** This is a default algorithm -- browesr don't HAVE to use this.
585: **
586: */
587: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
588: {
589: char * my_home = (char *)getenv(LOGICAL_DEFAULT);
590: BOOL got_local_default = NO;
591: char * ref;
592: HTParentAnchor * anchor;
1.1 timbl 593:
1.2 timbl 594: #ifdef unix
595: {
596: FILE * fp = fopen(LOCAL_DEFAULT_FILE, "r");
597: if (fp) {
598: fclose(fp);
599: got_local_default = YES;
600: } else {
601: if (TRACE) fprintf(stderr,
602: "HTBrowse: No local default home %s\n",
603: LOCAL_DEFAULT_FILE);
604: }
605: }
606: #endif
607: ref = HTParse( my_home ? my_home :
608: HTClientHost ? REMOTE_ADDRESS :
609: got_local_default ? LOCAL_DEFAULT
610: : LAST_RESORT,
611: LAST_RESORT,
612: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
613: if (my_home) {
614: if (TRACE) fprintf(stderr,
615: "HTAccess: Using custom home page %s i.e. address %s\n",
616: my_home, ref);
617: }
618: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
619: free(ref);
620: return anchor;
1.1 timbl 621: }
622:
623:
Webmaster