Annotation of libwww/Library/src/HTAccess.c, revision 1.4
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 ! timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
11: ** 6 Oct 92 Moved HTClientHost and logfile into here. TBL
12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
14: ** PARAMETERS TO HTSEARCH AND HTLOAFRELATIVE CHANGED
15: **
16: ** Bugs
17: ** This module assumes that that the graphic object is hypertext, as it
18: ** needs to select it when it has been loaded. A supercalss needs to be
19: ** defined which accepts select and select_anchor.
1.1 timbl 20: */
21:
22: /* Implements:
23: */
24: #include "HTAccess.h"
25:
26: /* Uses:
27: */
28:
29: #include "HTParse.h"
30: #include "HTUtils.h"
1.4 ! timbl 31: #include "HTML.h" /* SCW */
1.2 timbl 32:
33: #ifndef NO_RULES
34: #include "HTRules.h"
35: #endif
36:
1.1 timbl 37: #include <stdio.h>
38:
1.2 timbl 39: #include "HTList.h"
40: #include "HText.h" /* See bugs above */
41: #include "HTAlert.h"
42:
1.1 timbl 43:
44: /* These flags may be set to modify the operation of this module
45: */
46: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
47: PUBLIC FILE * logfile = 0; /* File to which to output one-liners */
48:
1.2 timbl 49: /* To generate other things, play with these:
50: */
51:
52: PUBLIC HTFormat HTOutputFormat = NULL;
53: PUBLIC HTStream* HTOutputStream = NULL; /* For non-interactive, set this */
1.1 timbl 54:
55: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
56:
57:
58: /* Register a Protocol HTRegisterProtocol
59: ** -------------------
60: */
61:
62: PUBLIC BOOL HTRegisterProtocol(protocol)
63: HTProtocol * protocol;
64: {
65: if (!protocols) protocols = HTList_new();
66: HTList_addObject(protocols, protocol);
67: return YES;
68: }
69:
70:
71: /* Register all known protocols
72: ** ----------------------------
73: **
74: ** Add to or subtract from this list if you add or remove protocol modules.
75: ** This routine is called the first time the protocol list is needed,
76: ** unless any protocols are already registered, in which case it is not called.
77: ** Therefore the application can override this list.
78: **
79: ** Compiling with NO_INIT prevents all known protocols from being forced
80: ** in at link time.
81: */
82: #ifndef NO_INIT
83: PRIVATE void HTAccessInit NOARGS /* Call me once */
84: {
1.2 timbl 85: extern HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 86: #ifndef DECNET
1.2 timbl 87: extern HTProtocol HTFTP, HTNews, HTGopher;
1.3 timbl 88: #ifdef DIRECT_WAIS
89: extern HTProtocol HTWAIS;
90: #endif
1.2 timbl 91: HTRegisterProtocol(&HTFTP);
92: HTRegisterProtocol(&HTNews);
93: HTRegisterProtocol(&HTGopher);
1.3 timbl 94: #ifdef DIRECT_WAIS
95: HTRegisterProtocol(&HTWAIS);
96: #endif
1.1 timbl 97: #endif
98:
1.2 timbl 99: HTRegisterProtocol(&HTTP);
100: HTRegisterProtocol(&HTFile);
101: HTRegisterProtocol(&HTTelnet);
102: HTRegisterProtocol(&HTTn3270);
103: HTRegisterProtocol(&HTRlogin);
1.1 timbl 104: }
105: #endif
106:
107:
1.2 timbl 108: /* Find physical name and access protocol
109: ** --------------------------------------
1.1 timbl 110: **
111: **
112: ** On entry,
113: ** addr must point to the fully qualified hypertext reference.
114: ** anchor a pareent anchor with whose address is addr
115: **
116: ** On exit,
1.2 timbl 117: ** returns HT_NO_ACCESS Error has occured.
118: ** HT_OK Success
1.1 timbl 119: **
120: */
1.2 timbl 121: PRIVATE int get_physical ARGS2(
122: CONST char *, addr,
123: HTParentAnchor *, anchor)
1.1 timbl 124: {
125: char * access=0; /* Name of access method */
1.2 timbl 126: char * physical = 0;
1.1 timbl 127:
1.2 timbl 128: #ifndef NO_RULES
129: physical = HTTranslate(addr);
130: if (!physical) {
131: return HT_FORBIDDEN;
132: }
133: HTAnchor_setPhysical(anchor, physical);
134: free(physical); /* free our copy */
135: #else
136: HTAnchor_setPhysical(anchor, addr);
137: #endif
138:
139: access = HTParse(HTAnchor_physical(anchor),
140: "file:", PARSE_ACCESS);
1.1 timbl 141:
142: /* Check whether gateway access has been set up for this
143: */
144: #ifdef USE_GATEWAYS
1.2 timbl 145: {
146: char * gateway_parameter, gateway;
147: gateway_parameter = (char *)malloc(strlen(access)+20);
148: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
149: strcpy(gateway_parameter, "WWW_");
150: strcat(gateway_parameter, access);
151: strcat(gateway_parameter, "_GATEWAY");
152: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
153: free(gateway_parameter);
154: if (gateway) {
155: status = HTLoadHTTP(addr, gateway, anchor,
156: HTOutputFormat ? HTOutputFormat : WWW_PRESENT, sink);
157: HTAlert("Cannot retrieve required information from gateway.");
158: free(access);
159: return status;
160: }
161: }
1.1 timbl 162: #endif
163:
164:
165:
166: /* Search registered protocols to find suitable one
167: */
168: {
169: int i, n;
170: #ifndef NO_INIT
1.2 timbl 171: if (!protocols) HTAccessInit();
1.1 timbl 172: #endif
173: n = HTList_count(protocols);
174: for (i=0; i<n; i++) {
1.2 timbl 175: HTProtocol *p = HTList_objectAt(protocols, i);
176: if (strcmp(p->name, access)==0) {
177: HTAnchor_setProtocol(anchor, p);
178: free(access);
179: return (HT_OK);
1.1 timbl 180: }
181: }
182: }
183:
184: free(access);
1.2 timbl 185: return HT_NO_ACCESS;
1.1 timbl 186: }
187:
188:
189: /* Load a document
190: ** ---------------
191: **
1.2 timbl 192: ** This is an internal routine, which has an address AND a matching
193: ** anchor. (The public routines are called with one OR the other.)
194: **
195: ** On entry,
196: ** addr must point to the fully qualified hypertext reference.
197: ** anchor a pareent anchor with whose address is addr
198: **
199: ** On exit,
200: ** returns <0 Error has occured.
201: ** HT_LOADED Success
202: ** HT_NO_DATA Success, but no document loaded.
203: **
204: */
205: PRIVATE int HTLoad ARGS4(
206: CONST char *, addr,
207: HTParentAnchor *, anchor,
208: HTFormat, format_out,
209: HTStream *, sink)
210: {
211: HTProtocol* p;
212: int status = get_physical(addr, anchor);
213: if (status == HT_FORBIDDEN) {
214: return HTLoadError(sink, 500, "Access forbidden by rule");
215: }
216: if (status < 0) return status; /* Can't resolve or forbidden */
217:
218: p = HTAnchor_protocol(anchor);
219: return (*(p->load))(HTAnchor_physical(anchor),
220: anchor, format_out, sink);
221: }
222:
223:
224: /* Get a save stream for a document
225: ** --------------------------------
226: */
227: PUBLIC HTStream *HTSaveStream ARGS1(HTParentAnchor *, anchor)
228: {
229: HTProtocol * p = HTAnchor_protocol(anchor);
230: if (!p) return NULL;
231:
232: return (*p->saveStream)(anchor);
233:
234: }
235:
236:
237: /* Load a document - with logging etc
238: ** ----------------------------------
239: **
240: ** - Checks or documents already loaded
241: ** - Logs the access
242: ** - Allows stdin filter option
243: ** - Trace ouput and error messages
244: **
1.1 timbl 245: ** On Entry,
246: ** anchor is the node_anchor for the document
247: ** full_address The address of the document to be accessed.
1.2 timbl 248: ** filter if YES, treat stdin as HTML
1.1 timbl 249: **
250: ** On Exit,
251: ** returns YES Success in opening document
252: ** NO Failure
253: **
254: */
255:
1.2 timbl 256: PRIVATE BOOL HTLoadDocument ARGS4(
257: CONST char *, full_address,
258: HTParentAnchor *, anchor,
259: HTFormat, format_out,
260: HTStream*, sink)
1.1 timbl 261:
262: {
263: int status;
264: HText * text;
265:
266: if (TRACE) fprintf (stderr,
267: "HTAccess: loading document %s\n", full_address);
268:
269: if (text=(HText *)HTAnchor_document(anchor)) { /* Already loaded */
270: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
271: HText_select(text);
272: return YES;
273: }
274:
1.2 timbl 275: status = HTLoad(full_address, anchor, format_out, sink);
276:
277:
1.1 timbl 278: /* Log the access if necessary
279: */
280: if (logfile) {
281: time_t theTime;
282: time(&theTime);
283: fprintf(logfile, "%24.24s %s %s %s\n",
284: ctime(&theTime),
285: HTClientHost ? HTClientHost : "local",
286: status<0 ? "FAIL" : "GET",
287: full_address);
288: fflush(logfile); /* Actually update it on disk */
289: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
290: ctime(&theTime),
291: HTClientHost ? HTClientHost : "local",
292: status<0 ? "FAIL" : "GET",
293: full_address);
294: }
295:
296:
297: if (status == HT_LOADED) {
298: if (TRACE) {
299: fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
300: full_address);
301: }
302: return YES;
303: }
304:
305: if (status == HT_NO_DATA) {
306: if (TRACE) {
307: fprintf(stderr,
308: "HTAccess: `%s' has been accessed, No data left.\n",
309: full_address);
310: }
311: return NO;
312: }
313:
1.2 timbl 314: if (status<0) { /* Failure in accessing a document */
1.1 timbl 315: #ifdef CURSES
316: user_message("Can't access `%s'", full_address);
317: #else
1.2 timbl 318: fprintf(stderr, "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 319: #endif
320:
321: return NO;
322: }
323:
324: fprintf(stderr,
1.2 timbl 325: "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.1 timbl 326: exit(-6996);
327:
1.2 timbl 328: } /* HTLoadDocument */
1.1 timbl 329:
330:
331:
332: /* Load a document from absolute name
333: ** ---------------
334: **
335: ** On Entry,
336: ** addr The absolute address of the document to be accessed.
337: ** filter if YES, treat document as HTML
338: **
339: ** On Exit,
340: ** returns YES Success in opening document
341: ** NO Failure
342: **
343: **
344: */
345:
1.2 timbl 346: PUBLIC BOOL HTLoadAbsolute ARGS1(CONST char *,addr)
347: {
348: return HTLoadDocument( addr,
349: HTAnchor_parent(HTAnchor_findAddress(addr)),
350: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
351: HTOutputStream);
352: }
353:
354:
355: /* Load a document from absolute name to stream
356: ** --------------------------------------------
357: **
358: ** On Entry,
359: ** addr The absolute address of the document to be accessed.
360: ** sink if non-NULL, send data down this stream
361: **
362: ** On Exit,
363: ** returns YES Success in opening document
364: ** NO Failure
365: **
366: **
367: */
368:
369: PUBLIC BOOL HTLoadToStream ARGS3(
370: CONST char *, addr,
371: BOOL, filter,
372: HTStream *, sink)
1.1 timbl 373: {
1.2 timbl 374: return HTLoadDocument(addr,
1.1 timbl 375: HTAnchor_parent(HTAnchor_findAddress(addr)),
1.2 timbl 376: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
377: sink);
1.1 timbl 378: }
379:
380:
1.2 timbl 381:
382:
1.1 timbl 383: /* Load a document from relative name
384: ** ---------------
385: **
386: ** On Entry,
1.2 timbl 387: ** relative_name The relative address of the document
388: ** to be accessed.
1.1 timbl 389: **
390: ** On Exit,
391: ** returns YES Success in opening document
392: ** NO Failure
393: **
394: **
395: */
396:
1.2 timbl 397: PUBLIC BOOL HTLoadRelative ARGS2(
398: CONST char *, relative_name,
399: HTParentAnchor *, here)
1.1 timbl 400: {
401: char * full_address = 0;
402: BOOL result;
403: char * mycopy = 0;
404: char * stripped = 0;
405: char * current_address =
1.2 timbl 406: HTAnchor_address((HTAnchor*)here);
1.1 timbl 407:
408: StrAllocCopy(mycopy, relative_name);
409:
410: stripped = HTStrip(mycopy);
411: full_address = HTParse(stripped,
412: current_address,
413: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 414: result = HTLoadAbsolute(full_address);
1.1 timbl 415: free(full_address);
416: free(current_address);
417: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
418: return result;
419: }
420:
421:
422: /* Load if necessary, and select an anchor
423: ** --------------------------------------
424: **
425: ** On Entry,
426: ** destination The child or parenet anchor to be loaded.
427: **
428: ** On Exit,
429: ** returns YES Success
430: ** NO Failure
431: **
432: */
433:
434: PUBLIC BOOL HTLoadAnchor ARGS1(HTAnchor *,destination)
435: {
436: HTParentAnchor * parent;
437: BOOL loaded = NO;
438: if (!destination) return NO; /* No link */
439:
440: parent = HTAnchor_parent(destination);
441:
442: if (HTAnchor_document(parent) == NULL) { /* If not alread loaded */
443: /* TBL 921202 */
1.2 timbl 444:
1.1 timbl 445: BOOL result;
446: char * address = HTAnchor_address((HTAnchor*) parent);
1.2 timbl 447: result = HTLoadDocument(address, parent,
448: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
449: HTOutputStream);
1.1 timbl 450: free(address);
451: if (!result) return NO;
452: loaded = YES;
453: }
454:
455: {
456: HText *text = (HText*)HTAnchor_document(parent);
457: if (destination != (HTAnchor *)parent) { /* If child anchor */
458: HText_selectAnchor(text,
459: (HTChildAnchor*)destination); /* Double display? @@ */
460: } else {
461: if (!loaded) HText_select(text);
462: }
463: }
464: return YES;
465:
466: } /* HTLoadAnchor */
467:
468:
469: /* Search
470: ** ------
471: ** Performs a keyword search on word given by the user. Adds the keyword to
472: ** the end of the current address and attempts to open the new address.
473: **
474: ** On Entry,
475: ** *keywords space-separated keyword list or similar search list
1.2 timbl 476: ** here is anchor search is to be done on.
1.1 timbl 477: */
478:
1.2 timbl 479: PRIVATE char hex(i)
480: int i;
481: {
482: return i < 10 ? '0'+i : 'A'+ i - 10;
483: }
1.1 timbl 484:
1.2 timbl 485: PUBLIC BOOL HTSearch ARGS2(
486: CONST char *, keywords,
487: HTParentAnchor *, here)
1.1 timbl 488: {
1.2 timbl 489:
490: #define acceptable \
491: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
492:
493: char *q, *u;
494: CONST char * p, *s, *e; /* Pointers into keywords */
495: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 496: BOOL result;
1.2 timbl 497: char * escaped = malloc(strlen(keywords)*3+1);
498:
499: static CONST BOOL isAcceptable[96] =
500:
501: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
502: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
503: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
504: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
505: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
506: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
507: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
508:
509: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
510:
511:
512: /* Convert spaces to + and hex escape unacceptable characters
513: */
514: for(s=keywords; *s && WHITE(*s); s++) /*scan */ ; /* Skip white space */
515: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
516: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
517: int c = (int)TOASCII(*p);
518: if (WHITE(*p)) {
519: *q++ = '+';
520: } else if (c>=32 && c<=(char)127 && isAcceptable[c-32]) {
521: *q++ = (char)c;
522: } else {
523: *q++ = '%';
524: *q++ = hex(c / 16);
525: *q++ = hex(c % 16);
526: }
527: } /* Loop over string */
1.1 timbl 528:
1.2 timbl 529: *q=0;
530: /* terminate escaped sctring */
531: u=strchr(address, '?'); /* Find old search string */
532: if (u) *u = 0; /* Chop old search off */
1.1 timbl 533:
534: StrAllocCat(address, "?");
1.2 timbl 535: StrAllocCat(address, escaped);
536: free(escaped);
537: result = HTLoadRelative(address, here);
1.1 timbl 538: free(address);
1.2 timbl 539:
1.1 timbl 540: return result;
1.2 timbl 541: }
542:
543:
544: /* Search Given Indexname
545: ** ------
546: ** Performs a keyword search on word given by the user. Adds the keyword to
547: ** the end of the current address and attempts to open the new address.
548: **
549: ** On Entry,
550: ** *keywords space-separated keyword list or similar search list
551: ** *addres is name of object search is to be done on.
552: */
553:
554: PUBLIC BOOL HTSearchAbsolute ARGS2(
555: CONST char *, keywords,
556: CONST char *, indexname)
557: {
558: HTParentAnchor * anchor =
559: (HTParentAnchor*) HTAnchor_findAddress(indexname);
560: return HTSearch(keywords, anchor);
561: }
562:
563:
564: /* Generate the anchor for the home page
565: ** -------------------------------------
566: **
567: ** As it involves file access, this should only be done once
568: ** when the program first runs.
569: ** This is a default algorithm -- browesr don't HAVE to use this.
570: **
571: */
572: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
573: {
574: char * my_home = (char *)getenv(LOGICAL_DEFAULT);
575: BOOL got_local_default = NO;
576: char * ref;
577: HTParentAnchor * anchor;
1.1 timbl 578:
1.2 timbl 579: #ifdef unix
580: {
581: FILE * fp = fopen(LOCAL_DEFAULT_FILE, "r");
582: if (fp) {
583: fclose(fp);
584: got_local_default = YES;
585: } else {
586: if (TRACE) fprintf(stderr,
587: "HTBrowse: No local default home %s\n",
588: LOCAL_DEFAULT_FILE);
589: }
590: }
591: #endif
592: ref = HTParse( my_home ? my_home :
593: HTClientHost ? REMOTE_ADDRESS :
594: got_local_default ? LOCAL_DEFAULT
595: : LAST_RESORT,
596: LAST_RESORT,
597: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
598: if (my_home) {
599: if (TRACE) fprintf(stderr,
600: "HTAccess: Using custom home page %s i.e. address %s\n",
601: my_home, ref);
602: }
603: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
604: free(ref);
605: return anchor;
1.1 timbl 606: }
607:
608:
Webmaster