Annotation of libwww/Library/src/HTAccess.c, revision 1.7
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
11: ** 6 Oct 92 Moved HTClientHost and logfile into here. TBL
12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
14: ** PARAMETERS TO HTSEARCH AND HTLOAFRELATIVE CHANGED
15: **
16: ** Bugs
17: ** This module assumes that that the graphic object is hypertext, as it
18: ** needs to select it when it has been loaded. A supercalss needs to be
19: ** defined which accepts select and select_anchor.
1.1 timbl 20: */
21:
22: /* Implements:
23: */
24: #include "HTAccess.h"
25:
26: /* Uses:
27: */
28:
29: #include "HTParse.h"
30: #include "HTUtils.h"
1.4 timbl 31: #include "HTML.h" /* SCW */
1.2 timbl 32:
33: #ifndef NO_RULES
34: #include "HTRules.h"
35: #endif
36:
1.1 timbl 37: #include <stdio.h>
38:
1.2 timbl 39: #include "HTList.h"
40: #include "HText.h" /* See bugs above */
41: #include "HTAlert.h"
42:
1.1 timbl 43:
44: /* These flags may be set to modify the operation of this module
45: */
46: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
47: PUBLIC FILE * logfile = 0; /* File to which to output one-liners */
48:
1.2 timbl 49: /* To generate other things, play with these:
50: */
51:
52: PUBLIC HTFormat HTOutputFormat = NULL;
53: PUBLIC HTStream* HTOutputStream = NULL; /* For non-interactive, set this */
1.1 timbl 54:
55: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
56:
57:
58: /* Register a Protocol HTRegisterProtocol
59: ** -------------------
60: */
61:
62: PUBLIC BOOL HTRegisterProtocol(protocol)
63: HTProtocol * protocol;
64: {
65: if (!protocols) protocols = HTList_new();
66: HTList_addObject(protocols, protocol);
67: return YES;
68: }
69:
70:
71: /* Register all known protocols
72: ** ----------------------------
73: **
74: ** Add to or subtract from this list if you add or remove protocol modules.
75: ** This routine is called the first time the protocol list is needed,
76: ** unless any protocols are already registered, in which case it is not called.
77: ** Therefore the application can override this list.
78: **
79: ** Compiling with NO_INIT prevents all known protocols from being forced
80: ** in at link time.
81: */
82: #ifndef NO_INIT
83: PRIVATE void HTAccessInit NOARGS /* Call me once */
84: {
1.2 timbl 85: extern HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 86: #ifndef DECNET
1.2 timbl 87: extern HTProtocol HTFTP, HTNews, HTGopher;
1.3 timbl 88: #ifdef DIRECT_WAIS
89: extern HTProtocol HTWAIS;
90: #endif
1.2 timbl 91: HTRegisterProtocol(&HTFTP);
92: HTRegisterProtocol(&HTNews);
93: HTRegisterProtocol(&HTGopher);
1.3 timbl 94: #ifdef DIRECT_WAIS
95: HTRegisterProtocol(&HTWAIS);
96: #endif
1.1 timbl 97: #endif
98:
1.2 timbl 99: HTRegisterProtocol(&HTTP);
100: HTRegisterProtocol(&HTFile);
101: HTRegisterProtocol(&HTTelnet);
102: HTRegisterProtocol(&HTTn3270);
103: HTRegisterProtocol(&HTRlogin);
1.1 timbl 104: }
105: #endif
106:
107:
1.2 timbl 108: /* Find physical name and access protocol
109: ** --------------------------------------
1.1 timbl 110: **
111: **
112: ** On entry,
113: ** addr must point to the fully qualified hypertext reference.
114: ** anchor a pareent anchor with whose address is addr
115: **
116: ** On exit,
1.2 timbl 117: ** returns HT_NO_ACCESS Error has occured.
118: ** HT_OK Success
1.1 timbl 119: **
120: */
1.2 timbl 121: PRIVATE int get_physical ARGS2(
122: CONST char *, addr,
123: HTParentAnchor *, anchor)
1.1 timbl 124: {
125: char * access=0; /* Name of access method */
1.2 timbl 126: char * physical = 0;
1.1 timbl 127:
1.2 timbl 128: #ifndef NO_RULES
129: physical = HTTranslate(addr);
130: if (!physical) {
131: return HT_FORBIDDEN;
132: }
133: HTAnchor_setPhysical(anchor, physical);
134: free(physical); /* free our copy */
135: #else
136: HTAnchor_setPhysical(anchor, addr);
137: #endif
138:
139: access = HTParse(HTAnchor_physical(anchor),
140: "file:", PARSE_ACCESS);
1.1 timbl 141:
142: /* Check whether gateway access has been set up for this
143: */
144: #ifdef USE_GATEWAYS
1.2 timbl 145: {
146: char * gateway_parameter, gateway;
147: gateway_parameter = (char *)malloc(strlen(access)+20);
148: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
149: strcpy(gateway_parameter, "WWW_");
150: strcat(gateway_parameter, access);
151: strcat(gateway_parameter, "_GATEWAY");
152: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
153: free(gateway_parameter);
154: if (gateway) {
155: status = HTLoadHTTP(addr, gateway, anchor,
156: HTOutputFormat ? HTOutputFormat : WWW_PRESENT, sink);
157: HTAlert("Cannot retrieve required information from gateway.");
158: free(access);
159: return status;
160: }
161: }
1.1 timbl 162: #endif
163:
164:
165:
166: /* Search registered protocols to find suitable one
167: */
168: {
169: int i, n;
170: #ifndef NO_INIT
1.2 timbl 171: if (!protocols) HTAccessInit();
1.1 timbl 172: #endif
173: n = HTList_count(protocols);
174: for (i=0; i<n; i++) {
1.2 timbl 175: HTProtocol *p = HTList_objectAt(protocols, i);
176: if (strcmp(p->name, access)==0) {
177: HTAnchor_setProtocol(anchor, p);
178: free(access);
179: return (HT_OK);
1.1 timbl 180: }
181: }
182: }
183:
184: free(access);
1.2 timbl 185: return HT_NO_ACCESS;
1.1 timbl 186: }
187:
188:
189: /* Load a document
190: ** ---------------
191: **
1.2 timbl 192: ** This is an internal routine, which has an address AND a matching
193: ** anchor. (The public routines are called with one OR the other.)
194: **
195: ** On entry,
196: ** addr must point to the fully qualified hypertext reference.
197: ** anchor a pareent anchor with whose address is addr
198: **
199: ** On exit,
200: ** returns <0 Error has occured.
201: ** HT_LOADED Success
202: ** HT_NO_DATA Success, but no document loaded.
203: **
204: */
205: PRIVATE int HTLoad ARGS4(
206: CONST char *, addr,
207: HTParentAnchor *, anchor,
208: HTFormat, format_out,
209: HTStream *, sink)
210: {
211: HTProtocol* p;
212: int status = get_physical(addr, anchor);
213: if (status == HT_FORBIDDEN) {
214: return HTLoadError(sink, 500, "Access forbidden by rule");
215: }
216: if (status < 0) return status; /* Can't resolve or forbidden */
217:
218: p = HTAnchor_protocol(anchor);
219: return (*(p->load))(HTAnchor_physical(anchor),
220: anchor, format_out, sink);
221: }
222:
223:
224: /* Get a save stream for a document
225: ** --------------------------------
226: */
227: PUBLIC HTStream *HTSaveStream ARGS1(HTParentAnchor *, anchor)
228: {
229: HTProtocol * p = HTAnchor_protocol(anchor);
230: if (!p) return NULL;
231:
232: return (*p->saveStream)(anchor);
233:
234: }
235:
236:
237: /* Load a document - with logging etc
238: ** ----------------------------------
239: **
240: ** - Checks or documents already loaded
241: ** - Logs the access
242: ** - Allows stdin filter option
243: ** - Trace ouput and error messages
244: **
1.1 timbl 245: ** On Entry,
246: ** anchor is the node_anchor for the document
247: ** full_address The address of the document to be accessed.
1.2 timbl 248: ** filter if YES, treat stdin as HTML
1.1 timbl 249: **
250: ** On Exit,
251: ** returns YES Success in opening document
252: ** NO Failure
253: **
254: */
255:
1.2 timbl 256: PRIVATE BOOL HTLoadDocument ARGS4(
257: CONST char *, full_address,
258: HTParentAnchor *, anchor,
259: HTFormat, format_out,
260: HTStream*, sink)
1.1 timbl 261:
262: {
263: int status;
264: HText * text;
265:
266: if (TRACE) fprintf (stderr,
267: "HTAccess: loading document %s\n", full_address);
268:
269: if (text=(HText *)HTAnchor_document(anchor)) { /* Already loaded */
270: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
271: HText_select(text);
272: return YES;
273: }
274:
1.2 timbl 275: status = HTLoad(full_address, anchor, format_out, sink);
276:
277:
1.1 timbl 278: /* Log the access if necessary
279: */
280: if (logfile) {
281: time_t theTime;
282: time(&theTime);
283: fprintf(logfile, "%24.24s %s %s %s\n",
284: ctime(&theTime),
285: HTClientHost ? HTClientHost : "local",
286: status<0 ? "FAIL" : "GET",
287: full_address);
288: fflush(logfile); /* Actually update it on disk */
289: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
290: ctime(&theTime),
291: HTClientHost ? HTClientHost : "local",
292: status<0 ? "FAIL" : "GET",
293: full_address);
294: }
295:
296:
297: if (status == HT_LOADED) {
298: if (TRACE) {
299: fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
300: full_address);
301: }
302: return YES;
303: }
304:
305: if (status == HT_NO_DATA) {
306: if (TRACE) {
307: fprintf(stderr,
308: "HTAccess: `%s' has been accessed, No data left.\n",
309: full_address);
310: }
311: return NO;
312: }
313:
1.2 timbl 314: if (status<0) { /* Failure in accessing a document */
1.1 timbl 315: #ifdef CURSES
316: user_message("Can't access `%s'", full_address);
317: #else
1.5 timbl 318: if (TRACE) fprintf(stderr,
319: "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 320: #endif
1.6 timbl 321: HTLoadError(sink, 500, "Unable to access document.");
1.1 timbl 322: return NO;
323: }
324:
325: fprintf(stderr,
1.2 timbl 326: "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.7 ! secret 327: { char c = *(char*)0; } /* crash */
1.1 timbl 328: exit(-6996);
329:
1.2 timbl 330: } /* HTLoadDocument */
1.1 timbl 331:
332:
333:
334: /* Load a document from absolute name
335: ** ---------------
336: **
337: ** On Entry,
338: ** addr The absolute address of the document to be accessed.
339: ** filter if YES, treat document as HTML
340: **
341: ** On Exit,
342: ** returns YES Success in opening document
343: ** NO Failure
344: **
345: **
346: */
347:
1.2 timbl 348: PUBLIC BOOL HTLoadAbsolute ARGS1(CONST char *,addr)
349: {
350: return HTLoadDocument( addr,
351: HTAnchor_parent(HTAnchor_findAddress(addr)),
352: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
353: HTOutputStream);
354: }
355:
356:
357: /* Load a document from absolute name to stream
358: ** --------------------------------------------
359: **
360: ** On Entry,
361: ** addr The absolute address of the document to be accessed.
362: ** sink if non-NULL, send data down this stream
363: **
364: ** On Exit,
365: ** returns YES Success in opening document
366: ** NO Failure
367: **
368: **
369: */
370:
371: PUBLIC BOOL HTLoadToStream ARGS3(
372: CONST char *, addr,
373: BOOL, filter,
374: HTStream *, sink)
1.1 timbl 375: {
1.2 timbl 376: return HTLoadDocument(addr,
1.1 timbl 377: HTAnchor_parent(HTAnchor_findAddress(addr)),
1.2 timbl 378: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
379: sink);
1.1 timbl 380: }
381:
382:
1.2 timbl 383:
384:
1.1 timbl 385: /* Load a document from relative name
386: ** ---------------
387: **
388: ** On Entry,
1.2 timbl 389: ** relative_name The relative address of the document
390: ** to be accessed.
1.1 timbl 391: **
392: ** On Exit,
393: ** returns YES Success in opening document
394: ** NO Failure
395: **
396: **
397: */
398:
1.2 timbl 399: PUBLIC BOOL HTLoadRelative ARGS2(
400: CONST char *, relative_name,
401: HTParentAnchor *, here)
1.1 timbl 402: {
403: char * full_address = 0;
404: BOOL result;
405: char * mycopy = 0;
406: char * stripped = 0;
407: char * current_address =
1.2 timbl 408: HTAnchor_address((HTAnchor*)here);
1.1 timbl 409:
410: StrAllocCopy(mycopy, relative_name);
411:
412: stripped = HTStrip(mycopy);
413: full_address = HTParse(stripped,
414: current_address,
415: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 416: result = HTLoadAbsolute(full_address);
1.1 timbl 417: free(full_address);
418: free(current_address);
419: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
420: return result;
421: }
422:
423:
424: /* Load if necessary, and select an anchor
425: ** --------------------------------------
426: **
427: ** On Entry,
428: ** destination The child or parenet anchor to be loaded.
429: **
430: ** On Exit,
431: ** returns YES Success
432: ** NO Failure
433: **
434: */
435:
436: PUBLIC BOOL HTLoadAnchor ARGS1(HTAnchor *,destination)
437: {
438: HTParentAnchor * parent;
439: BOOL loaded = NO;
440: if (!destination) return NO; /* No link */
441:
442: parent = HTAnchor_parent(destination);
443:
444: if (HTAnchor_document(parent) == NULL) { /* If not alread loaded */
445: /* TBL 921202 */
1.2 timbl 446:
1.1 timbl 447: BOOL result;
448: char * address = HTAnchor_address((HTAnchor*) parent);
1.2 timbl 449: result = HTLoadDocument(address, parent,
450: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
451: HTOutputStream);
1.1 timbl 452: free(address);
453: if (!result) return NO;
454: loaded = YES;
455: }
456:
457: {
458: HText *text = (HText*)HTAnchor_document(parent);
459: if (destination != (HTAnchor *)parent) { /* If child anchor */
460: HText_selectAnchor(text,
461: (HTChildAnchor*)destination); /* Double display? @@ */
462: } else {
463: if (!loaded) HText_select(text);
464: }
465: }
466: return YES;
467:
468: } /* HTLoadAnchor */
469:
470:
471: /* Search
472: ** ------
473: ** Performs a keyword search on word given by the user. Adds the keyword to
474: ** the end of the current address and attempts to open the new address.
475: **
476: ** On Entry,
477: ** *keywords space-separated keyword list or similar search list
1.2 timbl 478: ** here is anchor search is to be done on.
1.1 timbl 479: */
480:
1.2 timbl 481: PRIVATE char hex(i)
482: int i;
483: {
484: return i < 10 ? '0'+i : 'A'+ i - 10;
485: }
1.1 timbl 486:
1.2 timbl 487: PUBLIC BOOL HTSearch ARGS2(
488: CONST char *, keywords,
489: HTParentAnchor *, here)
1.1 timbl 490: {
1.2 timbl 491:
492: #define acceptable \
493: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
494:
495: char *q, *u;
496: CONST char * p, *s, *e; /* Pointers into keywords */
497: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 498: BOOL result;
1.2 timbl 499: char * escaped = malloc(strlen(keywords)*3+1);
500:
501: static CONST BOOL isAcceptable[96] =
502:
503: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
504: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
505: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
506: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
507: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
508: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
509: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
510:
511: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
512:
513:
514: /* Convert spaces to + and hex escape unacceptable characters
515: */
516: for(s=keywords; *s && WHITE(*s); s++) /*scan */ ; /* Skip white space */
517: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
518: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
519: int c = (int)TOASCII(*p);
520: if (WHITE(*p)) {
521: *q++ = '+';
522: } else if (c>=32 && c<=(char)127 && isAcceptable[c-32]) {
523: *q++ = (char)c;
524: } else {
525: *q++ = '%';
526: *q++ = hex(c / 16);
527: *q++ = hex(c % 16);
528: }
529: } /* Loop over string */
1.1 timbl 530:
1.2 timbl 531: *q=0;
532: /* terminate escaped sctring */
533: u=strchr(address, '?'); /* Find old search string */
534: if (u) *u = 0; /* Chop old search off */
1.1 timbl 535:
536: StrAllocCat(address, "?");
1.2 timbl 537: StrAllocCat(address, escaped);
538: free(escaped);
539: result = HTLoadRelative(address, here);
1.1 timbl 540: free(address);
1.2 timbl 541:
1.1 timbl 542: return result;
1.2 timbl 543: }
544:
545:
546: /* Search Given Indexname
547: ** ------
548: ** Performs a keyword search on word given by the user. Adds the keyword to
549: ** the end of the current address and attempts to open the new address.
550: **
551: ** On Entry,
552: ** *keywords space-separated keyword list or similar search list
553: ** *addres is name of object search is to be done on.
554: */
555:
556: PUBLIC BOOL HTSearchAbsolute ARGS2(
557: CONST char *, keywords,
558: CONST char *, indexname)
559: {
560: HTParentAnchor * anchor =
561: (HTParentAnchor*) HTAnchor_findAddress(indexname);
562: return HTSearch(keywords, anchor);
563: }
564:
565:
566: /* Generate the anchor for the home page
567: ** -------------------------------------
568: **
569: ** As it involves file access, this should only be done once
570: ** when the program first runs.
571: ** This is a default algorithm -- browesr don't HAVE to use this.
572: **
573: */
574: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
575: {
576: char * my_home = (char *)getenv(LOGICAL_DEFAULT);
577: BOOL got_local_default = NO;
578: char * ref;
579: HTParentAnchor * anchor;
1.1 timbl 580:
1.2 timbl 581: #ifdef unix
582: {
583: FILE * fp = fopen(LOCAL_DEFAULT_FILE, "r");
584: if (fp) {
585: fclose(fp);
586: got_local_default = YES;
587: } else {
588: if (TRACE) fprintf(stderr,
589: "HTBrowse: No local default home %s\n",
590: LOCAL_DEFAULT_FILE);
591: }
592: }
593: #endif
594: ref = HTParse( my_home ? my_home :
595: HTClientHost ? REMOTE_ADDRESS :
596: got_local_default ? LOCAL_DEFAULT
597: : LAST_RESORT,
598: LAST_RESORT,
599: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
600: if (my_home) {
601: if (TRACE) fprintf(stderr,
602: "HTAccess: Using custom home page %s i.e. address %s\n",
603: my_home, ref);
604: }
605: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
606: free(ref);
607: return anchor;
1.1 timbl 608: }
609:
610:
Webmaster