Annotation of libwww/Library/src/HTAccess.c, revision 1.6
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
11: ** 6 Oct 92 Moved HTClientHost and logfile into here. TBL
12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
14: ** PARAMETERS TO HTSEARCH AND HTLOAFRELATIVE CHANGED
15: **
16: ** Bugs
17: ** This module assumes that that the graphic object is hypertext, as it
18: ** needs to select it when it has been loaded. A supercalss needs to be
19: ** defined which accepts select and select_anchor.
1.1 timbl 20: */
21:
22: /* Implements:
23: */
24: #include "HTAccess.h"
25:
26: /* Uses:
27: */
28:
29: #include "HTParse.h"
30: #include "HTUtils.h"
1.4 timbl 31: #include "HTML.h" /* SCW */
1.2 timbl 32:
33: #ifndef NO_RULES
34: #include "HTRules.h"
35: #endif
36:
1.1 timbl 37: #include <stdio.h>
38:
1.2 timbl 39: #include "HTList.h"
40: #include "HText.h" /* See bugs above */
41: #include "HTAlert.h"
42:
1.1 timbl 43:
44: /* These flags may be set to modify the operation of this module
45: */
46: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
47: PUBLIC FILE * logfile = 0; /* File to which to output one-liners */
48:
1.2 timbl 49: /* To generate other things, play with these:
50: */
51:
52: PUBLIC HTFormat HTOutputFormat = NULL;
53: PUBLIC HTStream* HTOutputStream = NULL; /* For non-interactive, set this */
1.1 timbl 54:
55: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
56:
57:
58: /* Register a Protocol HTRegisterProtocol
59: ** -------------------
60: */
61:
62: PUBLIC BOOL HTRegisterProtocol(protocol)
63: HTProtocol * protocol;
64: {
65: if (!protocols) protocols = HTList_new();
66: HTList_addObject(protocols, protocol);
67: return YES;
68: }
69:
70:
71: /* Register all known protocols
72: ** ----------------------------
73: **
74: ** Add to or subtract from this list if you add or remove protocol modules.
75: ** This routine is called the first time the protocol list is needed,
76: ** unless any protocols are already registered, in which case it is not called.
77: ** Therefore the application can override this list.
78: **
79: ** Compiling with NO_INIT prevents all known protocols from being forced
80: ** in at link time.
81: */
82: #ifndef NO_INIT
83: PRIVATE void HTAccessInit NOARGS /* Call me once */
84: {
1.2 timbl 85: extern HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 86: #ifndef DECNET
1.2 timbl 87: extern HTProtocol HTFTP, HTNews, HTGopher;
1.3 timbl 88: #ifdef DIRECT_WAIS
89: extern HTProtocol HTWAIS;
90: #endif
1.2 timbl 91: HTRegisterProtocol(&HTFTP);
92: HTRegisterProtocol(&HTNews);
93: HTRegisterProtocol(&HTGopher);
1.3 timbl 94: #ifdef DIRECT_WAIS
95: HTRegisterProtocol(&HTWAIS);
96: #endif
1.1 timbl 97: #endif
98:
1.2 timbl 99: HTRegisterProtocol(&HTTP);
100: HTRegisterProtocol(&HTFile);
101: HTRegisterProtocol(&HTTelnet);
102: HTRegisterProtocol(&HTTn3270);
103: HTRegisterProtocol(&HTRlogin);
1.1 timbl 104: }
105: #endif
106:
107:
1.2 timbl 108: /* Find physical name and access protocol
109: ** --------------------------------------
1.1 timbl 110: **
111: **
112: ** On entry,
113: ** addr must point to the fully qualified hypertext reference.
114: ** anchor a pareent anchor with whose address is addr
115: **
116: ** On exit,
1.2 timbl 117: ** returns HT_NO_ACCESS Error has occured.
118: ** HT_OK Success
1.1 timbl 119: **
120: */
1.2 timbl 121: PRIVATE int get_physical ARGS2(
122: CONST char *, addr,
123: HTParentAnchor *, anchor)
1.1 timbl 124: {
125: char * access=0; /* Name of access method */
1.2 timbl 126: char * physical = 0;
1.1 timbl 127:
1.2 timbl 128: #ifndef NO_RULES
129: physical = HTTranslate(addr);
130: if (!physical) {
131: return HT_FORBIDDEN;
132: }
133: HTAnchor_setPhysical(anchor, physical);
134: free(physical); /* free our copy */
135: #else
136: HTAnchor_setPhysical(anchor, addr);
137: #endif
138:
139: access = HTParse(HTAnchor_physical(anchor),
140: "file:", PARSE_ACCESS);
1.1 timbl 141:
142: /* Check whether gateway access has been set up for this
143: */
144: #ifdef USE_GATEWAYS
1.2 timbl 145: {
146: char * gateway_parameter, gateway;
147: gateway_parameter = (char *)malloc(strlen(access)+20);
148: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
149: strcpy(gateway_parameter, "WWW_");
150: strcat(gateway_parameter, access);
151: strcat(gateway_parameter, "_GATEWAY");
152: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
153: free(gateway_parameter);
154: if (gateway) {
155: status = HTLoadHTTP(addr, gateway, anchor,
156: HTOutputFormat ? HTOutputFormat : WWW_PRESENT, sink);
157: HTAlert("Cannot retrieve required information from gateway.");
158: free(access);
159: return status;
160: }
161: }
1.1 timbl 162: #endif
163:
164:
165:
166: /* Search registered protocols to find suitable one
167: */
168: {
169: int i, n;
170: #ifndef NO_INIT
1.2 timbl 171: if (!protocols) HTAccessInit();
1.1 timbl 172: #endif
173: n = HTList_count(protocols);
174: for (i=0; i<n; i++) {
1.2 timbl 175: HTProtocol *p = HTList_objectAt(protocols, i);
176: if (strcmp(p->name, access)==0) {
177: HTAnchor_setProtocol(anchor, p);
178: free(access);
179: return (HT_OK);
1.1 timbl 180: }
181: }
182: }
183:
184: free(access);
1.2 timbl 185: return HT_NO_ACCESS;
1.1 timbl 186: }
187:
188:
189: /* Load a document
190: ** ---------------
191: **
1.2 timbl 192: ** This is an internal routine, which has an address AND a matching
193: ** anchor. (The public routines are called with one OR the other.)
194: **
195: ** On entry,
196: ** addr must point to the fully qualified hypertext reference.
197: ** anchor a pareent anchor with whose address is addr
198: **
199: ** On exit,
200: ** returns <0 Error has occured.
201: ** HT_LOADED Success
202: ** HT_NO_DATA Success, but no document loaded.
203: **
204: */
205: PRIVATE int HTLoad ARGS4(
206: CONST char *, addr,
207: HTParentAnchor *, anchor,
208: HTFormat, format_out,
209: HTStream *, sink)
210: {
211: HTProtocol* p;
212: int status = get_physical(addr, anchor);
213: if (status == HT_FORBIDDEN) {
214: return HTLoadError(sink, 500, "Access forbidden by rule");
215: }
216: if (status < 0) return status; /* Can't resolve or forbidden */
217:
218: p = HTAnchor_protocol(anchor);
219: return (*(p->load))(HTAnchor_physical(anchor),
220: anchor, format_out, sink);
221: }
222:
223:
224: /* Get a save stream for a document
225: ** --------------------------------
226: */
227: PUBLIC HTStream *HTSaveStream ARGS1(HTParentAnchor *, anchor)
228: {
229: HTProtocol * p = HTAnchor_protocol(anchor);
230: if (!p) return NULL;
231:
232: return (*p->saveStream)(anchor);
233:
234: }
235:
236:
237: /* Load a document - with logging etc
238: ** ----------------------------------
239: **
240: ** - Checks or documents already loaded
241: ** - Logs the access
242: ** - Allows stdin filter option
243: ** - Trace ouput and error messages
244: **
1.1 timbl 245: ** On Entry,
246: ** anchor is the node_anchor for the document
247: ** full_address The address of the document to be accessed.
1.2 timbl 248: ** filter if YES, treat stdin as HTML
1.1 timbl 249: **
250: ** On Exit,
251: ** returns YES Success in opening document
252: ** NO Failure
253: **
254: */
255:
1.2 timbl 256: PRIVATE BOOL HTLoadDocument ARGS4(
257: CONST char *, full_address,
258: HTParentAnchor *, anchor,
259: HTFormat, format_out,
260: HTStream*, sink)
1.1 timbl 261:
262: {
263: int status;
264: HText * text;
265:
266: if (TRACE) fprintf (stderr,
267: "HTAccess: loading document %s\n", full_address);
268:
269: if (text=(HText *)HTAnchor_document(anchor)) { /* Already loaded */
270: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
271: HText_select(text);
272: return YES;
273: }
274:
1.2 timbl 275: status = HTLoad(full_address, anchor, format_out, sink);
276:
277:
1.1 timbl 278: /* Log the access if necessary
279: */
280: if (logfile) {
281: time_t theTime;
282: time(&theTime);
283: fprintf(logfile, "%24.24s %s %s %s\n",
284: ctime(&theTime),
285: HTClientHost ? HTClientHost : "local",
286: status<0 ? "FAIL" : "GET",
287: full_address);
288: fflush(logfile); /* Actually update it on disk */
289: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
290: ctime(&theTime),
291: HTClientHost ? HTClientHost : "local",
292: status<0 ? "FAIL" : "GET",
293: full_address);
294: }
295:
296:
297: if (status == HT_LOADED) {
298: if (TRACE) {
299: fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
300: full_address);
301: }
302: return YES;
303: }
304:
305: if (status == HT_NO_DATA) {
306: if (TRACE) {
307: fprintf(stderr,
308: "HTAccess: `%s' has been accessed, No data left.\n",
309: full_address);
310: }
311: return NO;
312: }
313:
1.2 timbl 314: if (status<0) { /* Failure in accessing a document */
1.1 timbl 315: #ifdef CURSES
316: user_message("Can't access `%s'", full_address);
317: #else
1.5 timbl 318: if (TRACE) fprintf(stderr,
319: "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 320: #endif
1.6 ! timbl 321: HTLoadError(sink, 500, "Unable to access document.");
1.1 timbl 322: return NO;
323: }
324:
325: fprintf(stderr,
1.2 timbl 326: "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.1 timbl 327: exit(-6996);
328:
1.2 timbl 329: } /* HTLoadDocument */
1.1 timbl 330:
331:
332:
333: /* Load a document from absolute name
334: ** ---------------
335: **
336: ** On Entry,
337: ** addr The absolute address of the document to be accessed.
338: ** filter if YES, treat document as HTML
339: **
340: ** On Exit,
341: ** returns YES Success in opening document
342: ** NO Failure
343: **
344: **
345: */
346:
1.2 timbl 347: PUBLIC BOOL HTLoadAbsolute ARGS1(CONST char *,addr)
348: {
349: return HTLoadDocument( addr,
350: HTAnchor_parent(HTAnchor_findAddress(addr)),
351: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
352: HTOutputStream);
353: }
354:
355:
356: /* Load a document from absolute name to stream
357: ** --------------------------------------------
358: **
359: ** On Entry,
360: ** addr The absolute address of the document to be accessed.
361: ** sink if non-NULL, send data down this stream
362: **
363: ** On Exit,
364: ** returns YES Success in opening document
365: ** NO Failure
366: **
367: **
368: */
369:
370: PUBLIC BOOL HTLoadToStream ARGS3(
371: CONST char *, addr,
372: BOOL, filter,
373: HTStream *, sink)
1.1 timbl 374: {
1.2 timbl 375: return HTLoadDocument(addr,
1.1 timbl 376: HTAnchor_parent(HTAnchor_findAddress(addr)),
1.2 timbl 377: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
378: sink);
1.1 timbl 379: }
380:
381:
1.2 timbl 382:
383:
1.1 timbl 384: /* Load a document from relative name
385: ** ---------------
386: **
387: ** On Entry,
1.2 timbl 388: ** relative_name The relative address of the document
389: ** to be accessed.
1.1 timbl 390: **
391: ** On Exit,
392: ** returns YES Success in opening document
393: ** NO Failure
394: **
395: **
396: */
397:
1.2 timbl 398: PUBLIC BOOL HTLoadRelative ARGS2(
399: CONST char *, relative_name,
400: HTParentAnchor *, here)
1.1 timbl 401: {
402: char * full_address = 0;
403: BOOL result;
404: char * mycopy = 0;
405: char * stripped = 0;
406: char * current_address =
1.2 timbl 407: HTAnchor_address((HTAnchor*)here);
1.1 timbl 408:
409: StrAllocCopy(mycopy, relative_name);
410:
411: stripped = HTStrip(mycopy);
412: full_address = HTParse(stripped,
413: current_address,
414: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 415: result = HTLoadAbsolute(full_address);
1.1 timbl 416: free(full_address);
417: free(current_address);
418: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
419: return result;
420: }
421:
422:
423: /* Load if necessary, and select an anchor
424: ** --------------------------------------
425: **
426: ** On Entry,
427: ** destination The child or parenet anchor to be loaded.
428: **
429: ** On Exit,
430: ** returns YES Success
431: ** NO Failure
432: **
433: */
434:
435: PUBLIC BOOL HTLoadAnchor ARGS1(HTAnchor *,destination)
436: {
437: HTParentAnchor * parent;
438: BOOL loaded = NO;
439: if (!destination) return NO; /* No link */
440:
441: parent = HTAnchor_parent(destination);
442:
443: if (HTAnchor_document(parent) == NULL) { /* If not alread loaded */
444: /* TBL 921202 */
1.2 timbl 445:
1.1 timbl 446: BOOL result;
447: char * address = HTAnchor_address((HTAnchor*) parent);
1.2 timbl 448: result = HTLoadDocument(address, parent,
449: HTOutputFormat ? HTOutputFormat : WWW_PRESENT,
450: HTOutputStream);
1.1 timbl 451: free(address);
452: if (!result) return NO;
453: loaded = YES;
454: }
455:
456: {
457: HText *text = (HText*)HTAnchor_document(parent);
458: if (destination != (HTAnchor *)parent) { /* If child anchor */
459: HText_selectAnchor(text,
460: (HTChildAnchor*)destination); /* Double display? @@ */
461: } else {
462: if (!loaded) HText_select(text);
463: }
464: }
465: return YES;
466:
467: } /* HTLoadAnchor */
468:
469:
470: /* Search
471: ** ------
472: ** Performs a keyword search on word given by the user. Adds the keyword to
473: ** the end of the current address and attempts to open the new address.
474: **
475: ** On Entry,
476: ** *keywords space-separated keyword list or similar search list
1.2 timbl 477: ** here is anchor search is to be done on.
1.1 timbl 478: */
479:
1.2 timbl 480: PRIVATE char hex(i)
481: int i;
482: {
483: return i < 10 ? '0'+i : 'A'+ i - 10;
484: }
1.1 timbl 485:
1.2 timbl 486: PUBLIC BOOL HTSearch ARGS2(
487: CONST char *, keywords,
488: HTParentAnchor *, here)
1.1 timbl 489: {
1.2 timbl 490:
491: #define acceptable \
492: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
493:
494: char *q, *u;
495: CONST char * p, *s, *e; /* Pointers into keywords */
496: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 497: BOOL result;
1.2 timbl 498: char * escaped = malloc(strlen(keywords)*3+1);
499:
500: static CONST BOOL isAcceptable[96] =
501:
502: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
503: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
504: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
505: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
506: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
507: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
508: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
509:
510: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
511:
512:
513: /* Convert spaces to + and hex escape unacceptable characters
514: */
515: for(s=keywords; *s && WHITE(*s); s++) /*scan */ ; /* Skip white space */
516: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
517: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
518: int c = (int)TOASCII(*p);
519: if (WHITE(*p)) {
520: *q++ = '+';
521: } else if (c>=32 && c<=(char)127 && isAcceptable[c-32]) {
522: *q++ = (char)c;
523: } else {
524: *q++ = '%';
525: *q++ = hex(c / 16);
526: *q++ = hex(c % 16);
527: }
528: } /* Loop over string */
1.1 timbl 529:
1.2 timbl 530: *q=0;
531: /* terminate escaped sctring */
532: u=strchr(address, '?'); /* Find old search string */
533: if (u) *u = 0; /* Chop old search off */
1.1 timbl 534:
535: StrAllocCat(address, "?");
1.2 timbl 536: StrAllocCat(address, escaped);
537: free(escaped);
538: result = HTLoadRelative(address, here);
1.1 timbl 539: free(address);
1.2 timbl 540:
1.1 timbl 541: return result;
1.2 timbl 542: }
543:
544:
545: /* Search Given Indexname
546: ** ------
547: ** Performs a keyword search on word given by the user. Adds the keyword to
548: ** the end of the current address and attempts to open the new address.
549: **
550: ** On Entry,
551: ** *keywords space-separated keyword list or similar search list
552: ** *addres is name of object search is to be done on.
553: */
554:
555: PUBLIC BOOL HTSearchAbsolute ARGS2(
556: CONST char *, keywords,
557: CONST char *, indexname)
558: {
559: HTParentAnchor * anchor =
560: (HTParentAnchor*) HTAnchor_findAddress(indexname);
561: return HTSearch(keywords, anchor);
562: }
563:
564:
565: /* Generate the anchor for the home page
566: ** -------------------------------------
567: **
568: ** As it involves file access, this should only be done once
569: ** when the program first runs.
570: ** This is a default algorithm -- browesr don't HAVE to use this.
571: **
572: */
573: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
574: {
575: char * my_home = (char *)getenv(LOGICAL_DEFAULT);
576: BOOL got_local_default = NO;
577: char * ref;
578: HTParentAnchor * anchor;
1.1 timbl 579:
1.2 timbl 580: #ifdef unix
581: {
582: FILE * fp = fopen(LOCAL_DEFAULT_FILE, "r");
583: if (fp) {
584: fclose(fp);
585: got_local_default = YES;
586: } else {
587: if (TRACE) fprintf(stderr,
588: "HTBrowse: No local default home %s\n",
589: LOCAL_DEFAULT_FILE);
590: }
591: }
592: #endif
593: ref = HTParse( my_home ? my_home :
594: HTClientHost ? REMOTE_ADDRESS :
595: got_local_default ? LOCAL_DEFAULT
596: : LAST_RESORT,
597: LAST_RESORT,
598: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
599: if (my_home) {
600: if (TRACE) fprintf(stderr,
601: "HTAccess: Using custom home page %s i.e. address %s\n",
602: my_home, ref);
603: }
604: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
605: free(ref);
606: return anchor;
1.1 timbl 607: }
608:
609:
Webmaster