Annotation of libwww/Library/src/HTAccess.c, revision 1.3
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
6: ** JFG Jean-Francois Groff jgh@next.com
7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
11: ** 6 Oct 92 Moved HTClientHost and logfile into here. TBL
12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
14: ** PARAMETERS TO HTSEARCH AND HTLOAFRELATIVE CHANGED
15: **
16: ** Bugs
17: ** This module assumes that that the graphic object is hypertext, as it
18: ** needs to select it when it has been loaded. A supercalss needs to be
19: ** defined which accepts select and select_anchor.
1.1 timbl 20: */
21:
22: /* Implements:
23: */
24: #include "HTAccess.h"
25:
26: /* Uses:
27: */
28:
29: #include "HTParse.h"
30: #include "HTUtils.h"
1.2 timbl 31:
32: #ifndef NO_RULES
33: #include "HTRules.h"
34: #endif
35:
1.1 timbl 36: #include <stdio.h>
37:
1.2 timbl 38: #include "HTList.h"
39: #include "HText.h" /* See bugs above */
40: #include "HTAlert.h"
41:
1.1 timbl 42:
43: /* These flags may be set to modify the operation of this module
44: */
45: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
46: PUBLIC FILE * logfile = 0; /* File to which to output one-liners */
47:
1.2 timbl 48: /* To generate other things, play with these:
49: */
50:
51: PUBLIC HTFormat HTOutputFormat = NULL;
52: PUBLIC HTStream* HTOutputStream = NULL; /* For non-interactive, set this */
1.1 timbl 53:
54: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
55:
56:
57: /* Register a Protocol HTRegisterProtocol
58: ** -------------------
59: */
60:
61: PUBLIC BOOL HTRegisterProtocol(protocol)
62: HTProtocol * protocol;
63: {
64: if (!protocols) protocols = HTList_new();
65: HTList_addObject(protocols, protocol);
66: return YES;
67: }
68:
69:
70: /* Register all known protocols
71: ** ----------------------------
72: **
73: ** Add to or subtract from this list if you add or remove protocol modules.
74: ** This routine is called the first time the protocol list is needed,
75: ** unless any protocols are already registered, in which case it is not called.
76: ** Therefore the application can override this list.
77: **
78: ** Compiling with NO_INIT prevents all known protocols from being forced
79: ** in at link time.
80: */
81: #ifndef NO_INIT
82: PRIVATE void HTAccessInit NOARGS /* Call me once */
83: {
1.2 timbl 84: extern HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 85: #ifndef DECNET
1.2 timbl 86: extern HTProtocol HTFTP, HTNews, HTGopher;
1.3 ! timbl 87: #ifdef DIRECT_WAIS
! 88: extern HTProtocol HTWAIS;
! 89: #endif
1.2 timbl 90: HTRegisterProtocol(&HTFTP);
91: HTRegisterProtocol(&HTNews);
92: HTRegisterProtocol(&HTGopher);
1.3 ! timbl 93: #ifdef DIRECT_WAIS
! 94: HTRegisterProtocol(&HTWAIS);
! 95: #endif
1.1 timbl 96: #endif
97:
1.2 timbl 98: HTRegisterProtocol(&HTTP);
99: HTRegisterProtocol(&HTFile);
100: HTRegisterProtocol(&HTTelnet);
101: HTRegisterProtocol(&HTTn3270);
102: HTRegisterProtocol(&HTRlogin);
1.1 timbl 103: }
104: #endif
105:
106:
1.2 timbl 107: /* Find physical name and access protocol
108: ** --------------------------------------
1.1 timbl 109: **
110: **
111: ** On entry,
112: ** addr must point to the fully qualified hypertext reference.
113: ** anchor a pareent anchor with whose address is addr
114: **
115: ** On exit,
1.2 timbl 116: ** returns HT_NO_ACCESS Error has occured.
117: ** HT_OK Success
1.1 timbl 118: **
119: */
1.2 timbl 120: PRIVATE int get_physical ARGS2(
121: CONST char *, addr,
122: HTParentAnchor *, anchor)
1.1 timbl 123: {
124: char * access=0; /* Name of access method */
1.2 timbl 125: char * physical = 0;
1.1 timbl 126:
1.2 timbl 127: #ifndef NO_RULES
128: physical = HTTranslate(addr);
129: if (!physical) {
130: return HT_FORBIDDEN;
131: }
132: HTAnchor_setPhysical(anchor, physical);
133: free(physical); /* free our copy */
134: #else
135: HTAnchor_setPhysical(anchor, addr);
136: #endif
137:
138: access = HTParse(HTAnchor_physical(anchor),
139: "file:", PARSE_ACCESS);
1.1 timbl 140:
141: /* Check whether gateway access has been set up for this
142: */
143: #ifdef USE_GATEWAYS
1.2 timbl 144: {
145: char * gateway_parameter, gateway;
146: gateway_parameter = (char *)malloc(strlen(access)+20);
147: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
148: strcpy(gateway_parameter, "WWW_");
149: strcat(gateway_parameter, access);
150: strcat(gateway_parameter, "_GATEWAY");
151: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
152: free(gateway_parameter);
153: if (gateway) {
154: status = HTLoadHTTP(addr, gateway, anchor,
155: HTOutputFormat ? HTOutputFormat : WWW_PRESENT, sink);
156: HTAlert("Cannot retrieve required information from gateway.");
157: free(access);
158: return status;
159: }
160: }
1.1 timbl 161: #endif
162:
163:
164:
165: /* Search registered protocols to find suitable one
166: */
167: {
168: int i, n;
169: #ifndef NO_INIT
1.2 timbl 170: if (!protocols) HTAccessInit();
1.1 timbl 171: #endif
172: n = HTList_count(protocols);
173: for (i=0; i<n; i++) {
1.2 timbl 174: HTProtocol *p = HTList_objectAt(protocols, i);
175: if (strcmp(p->name, access)==0) {
176: HTAnchor_setProtocol(anchor, p);
177: free(access);
178: return (HT_OK);
1.1 timbl 179: }
180: }
181: }
182:
183: free(access);
1.2 timbl 184: return HT_NO_ACCESS;
1.1 timbl 185: }
186:
187:
188: /* Load a document
189: ** ---------------
190: **
1.2 timbl 191: ** This is an internal routine, which has an address AND a matching
192: ** anchor. (The public routines are called with one OR the other.)
193: **
194: ** On entry,
195: ** addr must point to the fully qualified hypertext reference.
196: ** anchor a pareent anchor with whose address is addr
197: **
198: ** On exit,
199: ** returns <0 Error has occured.
200: ** HT_LOADED Success
201: ** HT_NO_DATA Success, but no document loaded.
202: **
203: */
204: PRIVATE int HTLoad ARGS4(
205: CONST char *, addr,
206: HTParentAnchor *, anchor,
207: HTFormat, format_out,
208: HTStream *, sink)
209: {
210: HTProtocol* p;
211: int status = get_physical(addr, anchor);
212: if (status == HT_FORBIDDEN) {
213: return HTLoadError(sink, 500, "Access forbidden by rule");
214: }
215: if (status < 0) return status; /* Can't resolve or forbidden */
216:
217: p = HTAnchor_protocol(anchor);
218: return (*(p->load))(HTAnchor_physical(anchor),
219: anchor, format_out, sink);
220: }
221:
222:
223: /* Get a save stream for a document
224: ** --------------------------------
225: */
226: PUBLIC HTStream *HTSaveStream ARGS1(HTParentAnchor *, anchor)
227: {
228: HTProtocol * p = HTAnchor_protocol(anchor);
229: if (!p) return NULL;
230:
231: return (*p->saveStream)(anchor);
232:
233: }
234:
235:
236: /* Load a document - with logging etc
237: ** ----------------------------------
238: **
239: ** - Checks or documents already loaded
240: ** - Logs the access
241: ** - Allows stdin filter option
242: ** - Trace ouput and error messages
243: **
1.1 timbl 244: ** On Entry,
245: ** anchor is the node_anchor for the document
246: ** full_address The address of the document to be accessed.
1.2 timbl 247: ** filter if YES, treat stdin as HTML
1.1 timbl 248: **
249: ** On Exit,
250: ** returns YES Success in opening document
251: ** NO Failure
252: **
253: */
254:
1.2 timbl 255: PRIVATE BOOL HTLoadDocument ARGS4(
256: CONST char *, full_address,
257: HTParentAnchor *, anchor,
258: HTFormat, format_out,
259: HTStream*, sink)
1.1 timbl 260:
261: {
262: int status;
263: HText * text;
264:
265: if (TRACE) fprintf (stderr,
266: "HTAccess: loading document %s\n", full_address);
267:
268: if (text=(HText *)HTAnchor_document(anchor)) { /* Already loaded */
269: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
270: HText_select(text);
271: return YES;
272: }
273:
1.2 timbl 274: status = HTLoad(full_address, anchor, format_out, sink);
275:
276:
1.1 timbl 277: /* Log the access if necessary
278: */
279: if (logfile) {
280: time_t theTime;
281: time(&theTime);
282: fprintf(logfile, "%24.24s %s %s %s\n",
283: ctime(&theTime),
284: HTClientHost ? HTClientHost : "local",
285: status<0 ? "FAIL" : "GET",
286: full_address);
287: fflush(logfile); /* Actually update it on disk */
288: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
289: ctime(&theTime),
290: HTClientHost ? HTClientHost : "local",
291: status<0 ? "FAIL" : "GET",
292: full_address);
293: }
294:
295:
296: if (status == HT_LOADED) {
297: if (TRACE) {
298: fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
299: full_address);
300: }
301: return YES;
302: }
303:
304: if (status == HT_NO_DATA) {
305: if (TRACE) {
306: fprintf(stderr,
307: "HTAccess: `%s' has been accessed, No data left.\n",
308: full_address);
309: }
310: return NO;
311: }
312:
1.2 timbl 313: if (status<0) { /* Failure in accessing a document */
1.1 timbl 314: #ifdef CURSES
315: user_message("Can't access `%s'", full_address);
316: #else
1.2 timbl 317: fprintf(stderr, "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 318: #endif
319:
320: return NO;
321: }
322:
323: fprintf(stderr,
1.2 timbl 324: "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.1 timbl 325: exit(-6996);
326:
1.2 timbl 327: } /* HTLoadDocument */
1.1 timbl 328:
329:
330:
331: /* Load a document from absolute name
332: ** ---------------
333: **
334: ** On Entry,
335: ** addr The absolute address of the document to be accessed.
336: ** filter if YES, treat document as HTML
337: **
338: ** On Exit,
339: ** returns YES Success in opening document
340: ** NO Failure
341: **
342: **
343: */
344:
1.2 timbl 345: PUBLIC BOOL HTLoadAbsolute ARGS1(CONST char *,addr)
346: {
347: return HTLoadDocument( addr,
348: HTAnchor_parent(HTAnchor_findAddress(addr)),
349: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
350: HTOutputStream);
351: }
352:
353:
354: /* Load a document from absolute name to stream
355: ** --------------------------------------------
356: **
357: ** On Entry,
358: ** addr The absolute address of the document to be accessed.
359: ** sink if non-NULL, send data down this stream
360: **
361: ** On Exit,
362: ** returns YES Success in opening document
363: ** NO Failure
364: **
365: **
366: */
367:
368: PUBLIC BOOL HTLoadToStream ARGS3(
369: CONST char *, addr,
370: BOOL, filter,
371: HTStream *, sink)
1.1 timbl 372: {
1.2 timbl 373: return HTLoadDocument(addr,
1.1 timbl 374: HTAnchor_parent(HTAnchor_findAddress(addr)),
1.2 timbl 375: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
376: sink);
1.1 timbl 377: }
378:
379:
1.2 timbl 380:
381:
1.1 timbl 382: /* Load a document from relative name
383: ** ---------------
384: **
385: ** On Entry,
1.2 timbl 386: ** relative_name The relative address of the document
387: ** to be accessed.
1.1 timbl 388: **
389: ** On Exit,
390: ** returns YES Success in opening document
391: ** NO Failure
392: **
393: **
394: */
395:
1.2 timbl 396: PUBLIC BOOL HTLoadRelative ARGS2(
397: CONST char *, relative_name,
398: HTParentAnchor *, here)
1.1 timbl 399: {
400: char * full_address = 0;
401: BOOL result;
402: char * mycopy = 0;
403: char * stripped = 0;
404: char * current_address =
1.2 timbl 405: HTAnchor_address((HTAnchor*)here);
1.1 timbl 406:
407: StrAllocCopy(mycopy, relative_name);
408:
409: stripped = HTStrip(mycopy);
410: full_address = HTParse(stripped,
411: current_address,
412: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 413: result = HTLoadAbsolute(full_address);
1.1 timbl 414: free(full_address);
415: free(current_address);
416: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
417: return result;
418: }
419:
420:
421: /* Load if necessary, and select an anchor
422: ** --------------------------------------
423: **
424: ** On Entry,
425: ** destination The child or parenet anchor to be loaded.
426: **
427: ** On Exit,
428: ** returns YES Success
429: ** NO Failure
430: **
431: */
432:
433: PUBLIC BOOL HTLoadAnchor ARGS1(HTAnchor *,destination)
434: {
435: HTParentAnchor * parent;
436: BOOL loaded = NO;
437: if (!destination) return NO; /* No link */
438:
439: parent = HTAnchor_parent(destination);
440:
441: if (HTAnchor_document(parent) == NULL) { /* If not alread loaded */
442: /* TBL 921202 */
1.2 timbl 443:
1.1 timbl 444: BOOL result;
445: char * address = HTAnchor_address((HTAnchor*) parent);
1.2 timbl 446: result = HTLoadDocument(address, parent,
447: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
448: HTOutputStream);
1.1 timbl 449: free(address);
450: if (!result) return NO;
451: loaded = YES;
452: }
453:
454: {
455: HText *text = (HText*)HTAnchor_document(parent);
456: if (destination != (HTAnchor *)parent) { /* If child anchor */
457: HText_selectAnchor(text,
458: (HTChildAnchor*)destination); /* Double display? @@ */
459: } else {
460: if (!loaded) HText_select(text);
461: }
462: }
463: return YES;
464:
465: } /* HTLoadAnchor */
466:
467:
468: /* Search
469: ** ------
470: ** Performs a keyword search on word given by the user. Adds the keyword to
471: ** the end of the current address and attempts to open the new address.
472: **
473: ** On Entry,
474: ** *keywords space-separated keyword list or similar search list
1.2 timbl 475: ** here is anchor search is to be done on.
1.1 timbl 476: */
477:
1.2 timbl 478: PRIVATE char hex(i)
479: int i;
480: {
481: return i < 10 ? '0'+i : 'A'+ i - 10;
482: }
1.1 timbl 483:
1.2 timbl 484: PUBLIC BOOL HTSearch ARGS2(
485: CONST char *, keywords,
486: HTParentAnchor *, here)
1.1 timbl 487: {
1.2 timbl 488:
489: #define acceptable \
490: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
491:
492: char *q, *u;
493: CONST char * p, *s, *e; /* Pointers into keywords */
494: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 495: BOOL result;
1.2 timbl 496: char * escaped = malloc(strlen(keywords)*3+1);
497:
498: static CONST BOOL isAcceptable[96] =
499:
500: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
501: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
502: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
503: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
504: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
505: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
506: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
507:
508: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
509:
510:
511: /* Convert spaces to + and hex escape unacceptable characters
512: */
513: for(s=keywords; *s && WHITE(*s); s++) /*scan */ ; /* Skip white space */
514: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
515: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
516: int c = (int)TOASCII(*p);
517: if (WHITE(*p)) {
518: *q++ = '+';
519: } else if (c>=32 && c<=(char)127 && isAcceptable[c-32]) {
520: *q++ = (char)c;
521: } else {
522: *q++ = '%';
523: *q++ = hex(c / 16);
524: *q++ = hex(c % 16);
525: }
526: } /* Loop over string */
1.1 timbl 527:
1.2 timbl 528: *q=0;
529: /* terminate escaped sctring */
530: u=strchr(address, '?'); /* Find old search string */
531: if (u) *u = 0; /* Chop old search off */
1.1 timbl 532:
533: StrAllocCat(address, "?");
1.2 timbl 534: StrAllocCat(address, escaped);
535: free(escaped);
536: result = HTLoadRelative(address, here);
1.1 timbl 537: free(address);
1.2 timbl 538:
1.1 timbl 539: return result;
1.2 timbl 540: }
541:
542:
543: /* Search Given Indexname
544: ** ------
545: ** Performs a keyword search on word given by the user. Adds the keyword to
546: ** the end of the current address and attempts to open the new address.
547: **
548: ** On Entry,
549: ** *keywords space-separated keyword list or similar search list
550: ** *addres is name of object search is to be done on.
551: */
552:
553: PUBLIC BOOL HTSearchAbsolute ARGS2(
554: CONST char *, keywords,
555: CONST char *, indexname)
556: {
557: HTParentAnchor * anchor =
558: (HTParentAnchor*) HTAnchor_findAddress(indexname);
559: return HTSearch(keywords, anchor);
560: }
561:
562:
563: /* Generate the anchor for the home page
564: ** -------------------------------------
565: **
566: ** As it involves file access, this should only be done once
567: ** when the program first runs.
568: ** This is a default algorithm -- browesr don't HAVE to use this.
569: **
570: */
571: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
572: {
573: char * my_home = (char *)getenv(LOGICAL_DEFAULT);
574: BOOL got_local_default = NO;
575: char * ref;
576: HTParentAnchor * anchor;
1.1 timbl 577:
1.2 timbl 578: #ifdef unix
579: {
580: FILE * fp = fopen(LOCAL_DEFAULT_FILE, "r");
581: if (fp) {
582: fclose(fp);
583: got_local_default = YES;
584: } else {
585: if (TRACE) fprintf(stderr,
586: "HTBrowse: No local default home %s\n",
587: LOCAL_DEFAULT_FILE);
588: }
589: }
590: #endif
591: ref = HTParse( my_home ? my_home :
592: HTClientHost ? REMOTE_ADDRESS :
593: got_local_default ? LOCAL_DEFAULT
594: : LAST_RESORT,
595: LAST_RESORT,
596: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
597: if (my_home) {
598: if (TRACE) fprintf(stderr,
599: "HTAccess: Using custom home page %s i.e. address %s\n",
600: my_home, ref);
601: }
602: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
603: free(ref);
604: return anchor;
1.1 timbl 605: }
606:
607:
Webmaster