Annotation of libwww/Library/src/HTAccess.c, revision 1.56
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42 frystyk 11: ** 6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1 timbl 12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
1.9 timbl 14: ** PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
15: ** 28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19 timbl 16: ** Dec 93 Bug change around, more reentrant, etc
1.42 frystyk 17: ** 09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.53 duns 18: ** 8 Jul 94 Insulate free() from _free structure element.
1.2 timbl 19: ** Bugs
20: ** This module assumes that that the graphic object is hypertext, as it
1.9 timbl 21: ** needs to select it when it has been loaded. A superclass needs to be
1.2 timbl 22: ** defined which accepts select and select_anchor.
1.1 timbl 23: */
24:
1.9 timbl 25: #ifndef DEFAULT_WAIS_GATEWAY
1.8 timbl 26: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.54 frystyk 27: #endif
1.8 timbl 28:
1.1 timbl 29: /* Implements:
30: */
31: #include "HTAccess.h"
32:
33: /* Uses:
34: */
35:
36: #include "HTParse.h"
37: #include "HTUtils.h"
1.4 timbl 38: #include "HTML.h" /* SCW */
1.2 timbl 39:
40: #ifndef NO_RULES
41: #include "HTRules.h"
42: #endif
43:
44: #include "HTList.h"
45: #include "HText.h" /* See bugs above */
46: #include "HTAlert.h"
1.17 timbl 47: #include "HTFWriter.h" /* for cache stuff */
48: #include "HTTee.h"
1.46 frystyk 49: #include "HTError.h"
1.2 timbl 50:
1.54 frystyk 51: /* These flags may be set to modify the operation of this module */
52: PUBLIC char * HTCacheDir = NULL; /* Root for cached files or 0 for no cache */
53: PUBLIC char * HTSaveLocallyDir = SAVE_LOCALLY_HOME_DIR; /* Save & exe files */
54: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
55: PUBLIC FILE * HTlogfile = 0; /* File to which to output one-liners */
1.41 luotonen 56:
1.34 frystyk 57: PUBLIC BOOL HTForceReload = NO; /* Force reload from cache or net */
1.12 timbl 58: PUBLIC BOOL HTSecure = NO; /* Disable access for telnet users? */
1.27 luotonen 59: PUBLIC BOOL using_proxy = NO; /* are we using a proxy gateway? */
1.43 luotonen 60: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.27 luotonen 61: PUBLIC BOOL HTImProxy = NO; /* cern_httpd as a proxy? */
1.1 timbl 62:
1.43 luotonen 63:
1.2 timbl 64: /* To generate other things, play with these:
65: */
66:
1.15 timbl 67: /* PUBLIC HTFormat HTOutputFormat = NULL; use request->output_format */
68: /* PUBLIC HTStream* HTOutputStream = NULL; use request->output_stream */
1.1 timbl 69:
70: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
71:
1.24 timbl 72: /* Superclass defn */
1.1 timbl 73:
1.24 timbl 74: struct _HTStream {
75: HTStreamClass * isa;
76: /* ... */
77: };
78:
1.15 timbl 79: /* Create a request structure
80: ** ---------------------------
81: */
82:
83: PUBLIC HTRequest * HTRequest_new NOARGS
84: {
1.28 luotonen 85: HTRequest * me = (HTRequest*) calloc(1, sizeof(*me)); /* zero fill */
1.15 timbl 86: if (!me) outofmem(__FILE__, "HTRequest_new()");
87:
1.20 luotonen 88: me->conversions = HTList_new(); /* No conversions registerd yet */
89: me->output_format = WWW_PRESENT; /* default it to present to user */
90:
1.15 timbl 91: return me;
92: }
93:
94:
1.49 frystyk 95: /* Clear a request structure
96: ** ---------------------------
97: ** This function clears the reguest structure so that only the
98: ** conversions remain. Everything else is as if it was created from
99: ** scratch.
100: */
101: PUBLIC void HTRequest_clear ARGS1(HTRequest *, req)
102: {
103: HTList *conversions;
104: if (!req) {
105: if (TRACE)
106: fprintf(stderr, "Clear....... request: Bad argument!\n");
107: return;
108: }
109: conversions = req->conversions; /* Save the conversions */
110: HTErrorFree(req);
111: HTAACleanup(req);
112: FREE(req->from);
113: memset(req, '\0', sizeof(HTRequest));
114:
115: /* Now initialize as from scratch but with the old list of conversions */
116: req->conversions = conversions;
117: req->output_format = WWW_PRESENT; /* default it to present to user */
118: }
119:
120:
1.20 luotonen 121: /* Delete a request structure
122: ** --------------------------
123: */
124: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
125: {
126: if (req) {
1.34 frystyk 127: HTFormatDelete(req->conversions);
1.46 frystyk 128: HTErrorFree(req);
1.34 frystyk 129: HTAACleanup(req);
1.37 luotonen 130: FREE(req->from);
1.34 frystyk 131: FREE(req);
1.20 luotonen 132: }
133: }
134:
135:
1.22 luotonen 136: PRIVATE char * method_names[(int)MAX_METHODS + 1] =
137: {
138: "INVALID-METHOD",
139: "GET",
140: "HEAD",
141: "POST",
142: "PUT",
143: "DELETE",
144: "CHECKOUT",
145: "CHECKIN",
146: "SHOWMETHOD",
147: "LINK",
148: "UNLINK",
149: NULL
150: };
151:
152: /* Get method enum value
153: ** ---------------------
154: */
155: PUBLIC HTMethod HTMethod_enum ARGS1(char *, name)
156: {
157: if (name) {
158: int i;
159: for (i=1; i < (int)MAX_METHODS; i++)
160: if (!strcmp(name, method_names[i]))
161: return (HTMethod)i;
162: }
163: return METHOD_INVALID;
164: }
165:
166:
167: /* Get method name
168: ** ---------------
169: */
170: PUBLIC char * HTMethod_name ARGS1(HTMethod, method)
171: {
172: if ((int)method > (int)METHOD_INVALID &&
173: (int)method < (int)MAX_METHODS)
174: return method_names[(int)method];
175: else
176: return method_names[(int)METHOD_INVALID];
177: }
178:
179:
180: /* Is method in a list of method names?
181: ** -----------------------------------
182: */
183: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod, method,
184: HTList *, list)
185: {
186: char * method_name = HTMethod_name(method);
187: HTList *cur = list;
188: char *item;
189:
190: while (NULL != (item = (char*)HTList_nextObject(cur))) {
191: CTRACE(stderr, " %s", item);
192: if (0==strcasecomp(item, method_name))
193: return YES;
194: }
195: return NO; /* Not found */
196: }
197:
198:
199:
200:
1.20 luotonen 201:
1.1 timbl 202: /* Register a Protocol HTRegisterProtocol
203: ** -------------------
204: */
205:
1.56 ! frystyk 206: PUBLIC BOOL HTRegisterProtocol ARGS1(HTProtocol *, protocol)
1.1 timbl 207: {
208: if (!protocols) protocols = HTList_new();
209: HTList_addObject(protocols, protocol);
210: return YES;
211: }
212:
213:
214: /* Register all known protocols
215: ** ----------------------------
216: **
217: ** Add to or subtract from this list if you add or remove protocol modules.
218: ** This routine is called the first time the protocol list is needed,
1.52 frystyk 219: ** unless any protocols are already registered, in which case it is not
220: ** called. Therefore the application can override this list.
1.1 timbl 221: **
222: ** Compiling with NO_INIT prevents all known protocols from being forced
223: ** in at link time.
224: */
225: #ifndef NO_INIT
226: PRIVATE void HTAccessInit NOARGS /* Call me once */
227: {
1.14 duns 228: GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 229: #ifndef DECNET
1.54 frystyk 230: #ifdef NEW_CODE
231: GLOBALREF HTProtocol HTFTP, HTNews, HTNNTP, HTGopher;
232: #endif
1.14 duns 233: GLOBALREF HTProtocol HTFTP, HTNews, HTGopher;
1.42 frystyk 234:
1.3 timbl 235: #ifdef DIRECT_WAIS
1.14 duns 236: GLOBALREF HTProtocol HTWAIS;
1.3 timbl 237: #endif
1.2 timbl 238: HTRegisterProtocol(&HTFTP);
239: HTRegisterProtocol(&HTNews);
1.54 frystyk 240: #ifdef NEW_CODE
241: HTRegisterProtocol(&HTNNTP);
242: #endif
1.2 timbl 243: HTRegisterProtocol(&HTGopher);
1.42 frystyk 244:
1.3 timbl 245: #ifdef DIRECT_WAIS
246: HTRegisterProtocol(&HTWAIS);
247: #endif
1.1 timbl 248:
1.54 frystyk 249: #endif /* DECNET */
1.2 timbl 250: HTRegisterProtocol(&HTTP);
251: HTRegisterProtocol(&HTFile);
252: HTRegisterProtocol(&HTTelnet);
253: HTRegisterProtocol(&HTTn3270);
254: HTRegisterProtocol(&HTRlogin);
1.1 timbl 255: }
256: #endif
257:
258:
1.33 luotonen 259:
260: /* override_proxy()
261: **
262: ** Check the no_proxy environment variable to get the list
263: ** of hosts for which proxy server is not consulted.
264: **
265: ** no_proxy is a comma- or space-separated list of machine
266: ** or domain names, with optional :port part. If no :port
267: ** part is present, it applies to all ports on that domain.
268: **
269: ** Example:
270: ** no_proxy="cern.ch,some.domain:8001"
271: **
272: */
273: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
274: {
275: CONST char * no_proxy = getenv("no_proxy");
276: char * p = NULL;
277: char * host = NULL;
278: int port = 0;
279: int h_len = 0;
280:
281: if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
282: return NO;
283: if (!*host) { free(host); return NO; }
284:
1.34 frystyk 285: if ((p = strchr(host, ':')) != NULL) { /* Port specified */
1.33 luotonen 286: *p++ = 0; /* Chop off port */
287: port = atoi(p);
288: }
289: else { /* Use default port */
290: char * access = HTParse(addr, "", PARSE_ACCESS);
291: if (access) {
292: if (!strcmp(access,"http")) port = 80;
293: else if (!strcmp(access,"gopher")) port = 70;
294: else if (!strcmp(access,"ftp")) port = 21;
295: free(access);
296: }
297: }
298: if (!port) port = 80; /* Default */
299: h_len = strlen(host);
300:
301: while (*no_proxy) {
302: CONST char * end;
303: CONST char * colon = NULL;
304: int templ_port = 0;
305: int t_len;
306:
307: while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
308: no_proxy++; /* Skip whitespace and separators */
309:
310: end = no_proxy;
311: while (*end && !WHITE(*end) && *end != ',') { /* Find separator */
312: if (*end==':') colon = end; /* Port number given */
313: end++;
314: }
315:
316: if (colon) {
317: templ_port = atoi(colon+1);
318: t_len = colon - no_proxy;
319: }
320: else {
321: t_len = end - no_proxy;
322: }
323:
324: if ((!templ_port || templ_port == port) &&
325: (t_len > 0 && t_len <= h_len &&
326: !strncmp(host + h_len - t_len, no_proxy, t_len))) {
327: free(host);
328: return YES;
329: }
330: if (*end) no_proxy = end+1;
331: else break;
332: }
333:
334: free(host);
335: return NO;
336: }
337:
338:
339:
1.2 timbl 340: /* Find physical name and access protocol
341: ** --------------------------------------
1.1 timbl 342: **
343: **
344: ** On entry,
345: ** addr must point to the fully qualified hypertext reference.
346: ** anchor a pareent anchor with whose address is addr
347: **
348: ** On exit,
1.2 timbl 349: ** returns HT_NO_ACCESS Error has occured.
350: ** HT_OK Success
1.1 timbl 351: **
352: */
1.21 luotonen 353: PRIVATE int get_physical ARGS1(HTRequest *, req)
354: {
1.1 timbl 355: char * access=0; /* Name of access method */
1.21 luotonen 356: char * addr = HTAnchor_address((HTAnchor*)req->anchor); /* free me */
1.27 luotonen 357:
1.35 luotonen 358: /*
359: ** This HACK is here until we have redirection implemented.
360: ** This is used when we are recursively calling HTLoad().
361: ** We then take the physical address, because currently the
362: ** virtual address is kept in a hash table so it can't be
363: ** changed -- otherwise it wouldn't be found anymore.
364: */
1.36 luotonen 365: if (HTAnchor_physical(req->anchor))
366: StrAllocCopy(addr, HTAnchor_physical(req->anchor));
1.35 luotonen 367:
1.2 timbl 368: #ifndef NO_RULES
1.47 luotonen 369: if (HTImServer) { /* cern_httpd has already done its own translations */
1.45 luotonen 370: HTAnchor_setPhysical(req->anchor, HTImServer);
1.47 luotonen 371: StrAllocCopy(addr, HTImServer); /* Oops, queries thru many proxies */
372: /* didn't work without this -- AL */
373: }
1.21 luotonen 374: else {
1.27 luotonen 375: char * physical = HTTranslate(addr);
1.21 luotonen 376: if (!physical) {
1.47 luotonen 377: free(addr);
1.21 luotonen 378: return HT_FORBIDDEN;
379: }
380: HTAnchor_setPhysical(req->anchor, physical);
381: free(physical); /* free our copy */
1.2 timbl 382: }
383: #else
1.21 luotonen 384: HTAnchor_setPhysical(req->anchor, addr);
1.2 timbl 385: #endif
386:
1.21 luotonen 387: access = HTParse(HTAnchor_physical(req->anchor),
1.27 luotonen 388: "file:", PARSE_ACCESS);
1.1 timbl 389:
390: /* Check whether gateway access has been set up for this
1.8 timbl 391: **
392: ** This function can be replaced by the rule system above.
1.1 timbl 393: */
1.8 timbl 394: #define USE_GATEWAYS
1.1 timbl 395: #ifdef USE_GATEWAYS
1.39 luotonen 396:
397: /* make sure the using_proxy variable is false */
398: using_proxy = NO;
399:
1.33 luotonen 400: if (!override_proxy(addr)) {
1.27 luotonen 401: char * gateway_parameter, *gateway, *proxy;
402:
1.2 timbl 403: gateway_parameter = (char *)malloc(strlen(access)+20);
404: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27 luotonen 405:
406: /* search for proxy gateways */
1.2 timbl 407: strcpy(gateway_parameter, "WWW_");
408: strcat(gateway_parameter, access);
409: strcat(gateway_parameter, "_GATEWAY");
410: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27 luotonen 411:
412: /* search for proxy servers */
413: strcpy(gateway_parameter, access);
414: strcat(gateway_parameter, "_proxy");
415: proxy = (char *)getenv(gateway_parameter);
416:
1.2 timbl 417: free(gateway_parameter);
1.27 luotonen 418:
419: if (TRACE && gateway)
420: fprintf(stderr,"Gateway found: %s\n",gateway);
421: if (TRACE && proxy)
422: fprintf(stderr,"Proxy server found: %s\n",proxy);
423:
1.8 timbl 424: #ifndef DIRECT_WAIS
1.9 timbl 425: if (!gateway && 0==strcmp(access, "wais")) {
1.8 timbl 426: gateway = DEFAULT_WAIS_GATEWAY;
427: }
428: #endif
1.27 luotonen 429:
430: /* proxy servers have precedence over gateway servers */
431: if (proxy) {
432: char * gatewayed=0;
433:
434: StrAllocCopy(gatewayed,proxy);
435: StrAllocCat(gatewayed,addr);
436: using_proxy = YES;
437: HTAnchor_setPhysical(req->anchor, gatewayed);
438: free(gatewayed);
439: free(access);
440:
441: access = HTParse(HTAnchor_physical(req->anchor),
442: "http:", PARSE_ACCESS);
443: } else if (gateway) {
1.9 timbl 444: char * path = HTParse(addr, "",
445: PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
446: /* Chop leading / off to make host into part of path */
447: char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
448: free(path);
1.21 luotonen 449: HTAnchor_setPhysical(req->anchor, gatewayed);
1.9 timbl 450: free(gatewayed);
1.2 timbl 451: free(access);
1.9 timbl 452:
1.21 luotonen 453: access = HTParse(HTAnchor_physical(req->anchor),
1.8 timbl 454: "http:", PARSE_ACCESS);
1.2 timbl 455: }
456: }
1.1 timbl 457: #endif
458:
1.19 timbl 459: free(addr);
1.1 timbl 460:
461:
462: /* Search registered protocols to find suitable one
463: */
464: {
1.20 luotonen 465: HTList *cur;
466: HTProtocol *p;
1.1 timbl 467: #ifndef NO_INIT
1.2 timbl 468: if (!protocols) HTAccessInit();
1.1 timbl 469: #endif
1.20 luotonen 470: cur = protocols;
471: while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.2 timbl 472: if (strcmp(p->name, access)==0) {
1.21 luotonen 473: HTAnchor_setProtocol(req->anchor, p);
1.2 timbl 474: free(access);
475: return (HT_OK);
1.1 timbl 476: }
477: }
478: }
479:
480: free(access);
1.2 timbl 481: return HT_NO_ACCESS;
1.1 timbl 482: }
483:
484:
485: /* Load a document
486: ** ---------------
487: **
1.2 timbl 488: ** This is an internal routine, which has an address AND a matching
489: ** anchor. (The public routines are called with one OR the other.)
490: **
491: ** On entry,
1.15 timbl 492: ** request->
1.35 luotonen 493: ** anchor a parent anchor with fully qualified
494: ** hypertext reference as its address set
1.15 timbl 495: ** output_format valid
496: ** output_stream valid on NULL
1.2 timbl 497: **
498: ** On exit,
499: ** returns <0 Error has occured.
500: ** HT_LOADED Success
501: ** HT_NO_DATA Success, but no document loaded.
1.8 timbl 502: ** (telnet sesssion started etc)
1.2 timbl 503: **
504: */
1.52 frystyk 505: PUBLIC int HTLoad ARGS2(HTRequest *, request, BOOL, keep_error_stack)
1.2 timbl 506: {
1.25 frystyk 507: char *arg = NULL;
508: HTProtocol *p;
509: int status;
510:
1.22 luotonen 511: if (request->method == METHOD_INVALID)
512: request->method = METHOD_GET;
1.52 frystyk 513: if (!keep_error_stack) {
514: HTErrorFree(request);
515: request->error_block = NO;
516: }
517:
1.21 luotonen 518: status = get_physical(request);
1.2 timbl 519: if (status == HT_FORBIDDEN) {
1.49 frystyk 520: char *url = HTAnchor_address((HTAnchor *) request->anchor);
521: if (url) {
522: HTUnEscape(url);
523: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
524: (void *) url, (int) strlen(url), "HTLoad");
525: free(url);
526: } else {
527: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
528: NULL, 0, "HTLoad");
529: }
530: return -1;
1.2 timbl 531: }
532: if (status < 0) return status; /* Can't resolve or forbidden */
1.25 frystyk 533:
534: if(!(arg = HTAnchor_physical(request->anchor)) || !*arg)
535: return (-1);
1.27 luotonen 536:
1.56 ! frystyk 537: p = (HTProtocol *) HTAnchor_protocol(request->anchor);
1.17 timbl 538: return (*(p->load))(request);
1.2 timbl 539: }
540:
541:
542: /* Get a save stream for a document
543: ** --------------------------------
544: */
1.19 timbl 545: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
1.15 timbl 546: {
547: HTProtocol * p;
1.19 timbl 548: int status;
1.22 luotonen 549: request->method = METHOD_PUT;
1.21 luotonen 550: status = get_physical(request);
1.19 timbl 551: if (status == HT_FORBIDDEN) {
1.49 frystyk 552: char *url = HTAnchor_address((HTAnchor *) request->anchor);
553: if (url) {
554: HTUnEscape(url);
555: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
556: (void *) url, (int) strlen(url), "HTLoad");
557: free(url);
558: } else {
559: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
560: NULL, 0, "HTLoad");
561: }
562: return NULL; /* should return error status? */
1.19 timbl 563: }
564: if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
565:
1.56 ! frystyk 566: p = (HTProtocol *) HTAnchor_protocol(request->anchor);
1.2 timbl 567: if (!p) return NULL;
568:
1.15 timbl 569: return (*p->saveStream)(request);
1.2 timbl 570:
571: }
572:
573:
574: /* Load a document - with logging etc
575: ** ----------------------------------
576: **
577: ** - Checks or documents already loaded
578: ** - Logs the access
579: ** - Allows stdin filter option
580: ** - Trace ouput and error messages
581: **
1.1 timbl 582: ** On Entry,
1.19 timbl 583: ** request->anchor valid for of the document to be accessed.
584: ** request->childAnchor optional anchor within doc to be selected
585: **
1.2 timbl 586: ** filter if YES, treat stdin as HTML
1.1 timbl 587: **
1.15 timbl 588: ** request->anchor is the node_anchor for the document
589: ** request->output_format is valid
590: **
1.1 timbl 591: ** On Exit,
592: ** returns YES Success in opening document
593: ** NO Failure
594: **
595: */
596:
1.52 frystyk 597: PRIVATE BOOL HTLoadDocument ARGS2(HTRequest *, request,
598: BOOL, keep_error_stack)
1.1 timbl 599:
600: {
601: int status;
602: HText * text;
1.19 timbl 603: char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
1.54 frystyk 604:
1.49 frystyk 605: if (TRACE) fprintf (stderr, "HTAccess.... Loading document %s\n",
606: full_address);
1.1 timbl 607:
1.18 timbl 608: request->using_cache = NULL;
609:
1.15 timbl 610: if (!request->output_format) request->output_format = WWW_PRESENT;
1.25 frystyk 611:
1.31 frystyk 612: if (!HTForceReload && (text=(HText *)HTAnchor_document(request->anchor)))
1.15 timbl 613: { /* Already loaded */
1.1 timbl 614: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
1.19 timbl 615: if (request->childAnchor) {
616: HText_selectAnchor(text, request->childAnchor);
617: } else {
618: HText_select(text);
619: }
620: free(full_address);
1.1 timbl 621: return YES;
622: }
1.17 timbl 623:
1.34 frystyk 624: /* Check the Cache */
1.17 timbl 625: /* Bug: for each format, we only check whether it is ok, we
626: don't check them all and chose the best */
1.54 frystyk 627: if (request->anchor->cacheItems) {
1.17 timbl 628: HTList * list = request->anchor->cacheItems;
1.20 luotonen 629: HTList * cur = list;
630: HTCacheItem * item;
631:
632: while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
1.18 timbl 633: HTStream * s;
634:
635: request->using_cache = item;
636:
1.37 luotonen 637: s = HTStreamStack(item->format, request, NO);
1.17 timbl 638: if (s) { /* format was suitable */
639: FILE * fp = fopen(item->filename, "r");
1.18 timbl 640: if (TRACE) fprintf(stderr, "Cache: HIT file %s for %s\n",
1.20 luotonen 641: item->filename,
642: full_address);
1.17 timbl 643: if (fp) {
644: HTFileCopy(fp, s);
1.53 duns 645: (*s->isa->_free)(s); /* close up pipeline */
1.17 timbl 646: fclose(fp);
1.19 timbl 647: free(full_address);
1.17 timbl 648: return YES;
649: } else {
650: fprintf(stderr, "***** Can't read cache file %s !\n",
1.20 luotonen 651: item->filename);
1.17 timbl 652: } /* file open ok */
653: } /* stream ok */
654: } /* next cache item */
655: } /* if cache available for this anchor */
1.1 timbl 656:
1.52 frystyk 657: status = HTLoad(request, keep_error_stack);
1.2 timbl 658:
1.1 timbl 659: /* Log the access if necessary
660: */
1.42 frystyk 661: if (HTlogfile) {
1.1 timbl 662: time_t theTime;
663: time(&theTime);
1.42 frystyk 664: fprintf(HTlogfile, "%24.24s %s %s %s\n",
1.1 timbl 665: ctime(&theTime),
666: HTClientHost ? HTClientHost : "local",
667: status<0 ? "FAIL" : "GET",
668: full_address);
1.42 frystyk 669: fflush(HTlogfile); /* Actually update it on disk */
1.1 timbl 670: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
671: ctime(&theTime),
672: HTClientHost ? HTClientHost : "local",
673: status<0 ? "FAIL" : "GET",
674: full_address);
675: }
676:
1.52 frystyk 677: /* The error stack might contain general information to the client
678: about what has been going on in the library (not only errors) */
679: if (request->error_stack)
680: HTErrorMsg(request);
681:
1.1 timbl 682: if (status == HT_LOADED) {
683: if (TRACE) {
1.54 frystyk 684: fprintf(stderr, "HTAccess.... `%s' has been accessed.\n",
1.1 timbl 685: full_address);
686: }
1.19 timbl 687: free(full_address);
1.1 timbl 688: return YES;
689: }
690:
691: if (status == HT_NO_DATA) {
692: if (TRACE) {
693: fprintf(stderr,
1.54 frystyk 694: "HTAccess.... `%s' has been accessed, No data left.\n",
1.1 timbl 695: full_address);
696: }
1.19 timbl 697: free(full_address);
1.1 timbl 698: return NO;
699: }
700:
1.34 frystyk 701: /* Bug fix thanks to Lou Montulli. Henrik 10/03-94 */
702: if (status<=0) { /* Failure in accessing a document */
1.1 timbl 703: #ifdef CURSES
704: user_message("Can't access `%s'", full_address);
705: #else
1.5 timbl 706: if (TRACE) fprintf(stderr,
707: "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 708: #endif
1.19 timbl 709: free(full_address);
1.1 timbl 710: return NO;
711: }
1.9 timbl 712:
713: /* If you get this, then please find which routine is returning
714: a positive unrecognised error code! */
1.1 timbl 715: fprintf(stderr,
1.50 frystyk 716: "**** HTAccess: Internal software error in CERN WWWLib version %s ****\n\nPlease mail www-bug@info.cern.ch quoting what software and what version you are using\nand the URL: %s that caused the problem, thanks!\n",
717: HTLibraryVersion,
718: full_address);
1.19 timbl 719: free(full_address);
720:
1.1 timbl 721: exit(-6996);
1.20 luotonen 722: return NO; /* For gcc :-( */
1.2 timbl 723: } /* HTLoadDocument */
1.1 timbl 724:
725:
726:
727: /* Load a document from absolute name
728: ** ---------------
729: **
730: ** On Entry,
731: ** addr The absolute address of the document to be accessed.
732: ** filter if YES, treat document as HTML
733: **
734: ** On Exit,
735: ** returns YES Success in opening document
736: ** NO Failure
737: **
738: **
739: */
740:
1.15 timbl 741: PUBLIC BOOL HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2 timbl 742: {
1.19 timbl 743: HTAnchor * anchor = HTAnchor_findAddress(addr);
744: request->anchor = HTAnchor_parent(anchor);
745: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
746: NULL : (HTChildAnchor*) anchor;
1.52 frystyk 747: return HTLoadDocument(request, NO);
1.2 timbl 748: }
749:
750:
751: /* Load a document from absolute name to stream
752: ** --------------------------------------------
753: **
754: ** On Entry,
755: ** addr The absolute address of the document to be accessed.
1.15 timbl 756: ** request->output_stream if non-NULL, send data down this stream
1.2 timbl 757: **
758: ** On Exit,
759: ** returns YES Success in opening document
760: ** NO Failure
761: **
762: **
763: */
764:
765: PUBLIC BOOL HTLoadToStream ARGS3(
766: CONST char *, addr,
767: BOOL, filter,
1.15 timbl 768: HTRequest*, request)
1.1 timbl 769: {
1.19 timbl 770: HTAnchor * anchor = HTAnchor_findAddress(addr);
771: request->anchor = HTAnchor_parent(anchor);
772: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
773: (HTChildAnchor*) anchor;
1.15 timbl 774: request->output_stream = request->output_stream;
1.52 frystyk 775: return HTLoadDocument(request, NO);
1.1 timbl 776: }
777:
778:
1.2 timbl 779:
780:
1.1 timbl 781: /* Load a document from relative name
782: ** ---------------
783: **
784: ** On Entry,
1.2 timbl 785: ** relative_name The relative address of the document
786: ** to be accessed.
1.1 timbl 787: **
788: ** On Exit,
789: ** returns YES Success in opening document
790: ** NO Failure
791: **
792: **
793: */
794:
1.15 timbl 795: PUBLIC BOOL HTLoadRelative ARGS3(
1.2 timbl 796: CONST char *, relative_name,
1.15 timbl 797: HTParentAnchor *, here,
1.20 luotonen 798: HTRequest *, request)
1.1 timbl 799: {
800: char * full_address = 0;
801: BOOL result;
802: char * mycopy = 0;
803: char * stripped = 0;
804: char * current_address =
1.2 timbl 805: HTAnchor_address((HTAnchor*)here);
1.1 timbl 806:
807: StrAllocCopy(mycopy, relative_name);
808:
809: stripped = HTStrip(mycopy);
810: full_address = HTParse(stripped,
811: current_address,
812: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15 timbl 813: result = HTLoadAbsolute(full_address, request);
1.1 timbl 814: free(full_address);
815: free(current_address);
816: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
817: return result;
818: }
819:
820:
821: /* Load if necessary, and select an anchor
822: ** --------------------------------------
823: **
824: ** On Entry,
825: ** destination The child or parenet anchor to be loaded.
826: **
827: ** On Exit,
828: ** returns YES Success
829: ** NO Failure
830: **
831: */
832:
1.15 timbl 833: PUBLIC BOOL HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1 timbl 834: {
1.15 timbl 835: if (!anchor) return NO; /* No link */
1.1 timbl 836:
1.15 timbl 837: request->anchor = HTAnchor_parent(anchor);
1.19 timbl 838: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
839: : (HTChildAnchor*) anchor;
1.1 timbl 840:
1.52 frystyk 841: return HTLoadDocument(request, NO) ? YES : NO;
842:
843: } /* HTLoadAnchor */
844:
845:
846: /* Load if necessary, and select an anchor
847: ** --------------------------------------
848: **
849: ** This function is almost identical to HTLoadAnchor, but it doesn't
850: ** clear the error stack so that the information in there is kept.
851: **
852: ** On Entry,
853: ** destination The child or parenet anchor to be loaded.
854: **
855: ** On Exit,
856: ** returns YES Success
857: ** NO Failure
858: **
859: */
860:
861: PUBLIC BOOL HTLoadAnchorRecursive ARGS2(HTAnchor*, anchor,
862: HTRequest *, request)
863: {
864: if (!anchor) return NO; /* No link */
865:
866: request->anchor = HTAnchor_parent(anchor);
867: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
868: : (HTChildAnchor*) anchor;
869:
870: return HTLoadDocument(request, YES) ? YES : NO;
1.1 timbl 871:
872: } /* HTLoadAnchor */
873:
874:
875: /* Search
876: ** ------
877: ** Performs a keyword search on word given by the user. Adds the keyword to
878: ** the end of the current address and attempts to open the new address.
879: **
880: ** On Entry,
881: ** *keywords space-separated keyword list or similar search list
1.2 timbl 882: ** here is anchor search is to be done on.
1.1 timbl 883: */
884:
1.56 ! frystyk 885: PRIVATE char hex ARGS1(int, i)
1.2 timbl 886: {
1.13 timbl 887: char * hexchars = "0123456789ABCDEF";
888: return hexchars[i];
1.2 timbl 889: }
1.1 timbl 890:
1.15 timbl 891: PUBLIC BOOL HTSearch ARGS3(
1.2 timbl 892: CONST char *, keywords,
1.15 timbl 893: HTParentAnchor *, here,
894: HTRequest *, request)
1.1 timbl 895: {
1.2 timbl 896:
897: #define acceptable \
898: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
899:
900: char *q, *u;
901: CONST char * p, *s, *e; /* Pointers into keywords */
902: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 903: BOOL result;
1.56 ! frystyk 904: char * escaped = (char *) malloc(strlen(keywords)*3+1);
1.2 timbl 905:
1.29 frystyk 906: /* static CONST BOOL isAcceptable[96] = */
907: /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30 luotonen 908: static BOOL isAcceptable[96] =
1.2 timbl 909: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
910: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
911: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
912: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
913: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
914: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
915: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
916:
917: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
918:
1.29 frystyk 919: /* Convert spaces to + and hex escape unacceptable characters */
1.2 timbl 920:
1.29 frystyk 921: for(s=keywords; *s && WHITE(*s); s++); /*scan */ /* Skip white space */
922: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
923: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
1.2 timbl 924: int c = (int)TOASCII(*p);
925: if (WHITE(*p)) {
926: *q++ = '+';
1.29 frystyk 927: } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13 timbl 928: *q++ = *p; /* 930706 TBL for MVS bug */
1.2 timbl 929: } else {
930: *q++ = '%';
931: *q++ = hex(c / 16);
932: *q++ = hex(c % 16);
933: }
934: } /* Loop over string */
1.1 timbl 935:
1.2 timbl 936: *q=0;
937: /* terminate escaped sctring */
938: u=strchr(address, '?'); /* Find old search string */
939: if (u) *u = 0; /* Chop old search off */
1.1 timbl 940:
941: StrAllocCat(address, "?");
1.2 timbl 942: StrAllocCat(address, escaped);
943: free(escaped);
1.15 timbl 944: result = HTLoadRelative(address, here, request);
1.1 timbl 945: free(address);
1.2 timbl 946:
1.1 timbl 947: return result;
1.2 timbl 948: }
949:
950:
951: /* Search Given Indexname
952: ** ------
953: ** Performs a keyword search on word given by the user. Adds the keyword to
954: ** the end of the current address and attempts to open the new address.
955: **
956: ** On Entry,
957: ** *keywords space-separated keyword list or similar search list
958: ** *addres is name of object search is to be done on.
959: */
960:
1.15 timbl 961: PUBLIC BOOL HTSearchAbsolute ARGS3(
1.2 timbl 962: CONST char *, keywords,
1.15 timbl 963: CONST char *, indexname,
964: HTRequest *, request)
1.2 timbl 965: {
966: HTParentAnchor * anchor =
967: (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15 timbl 968: return HTSearch(keywords, anchor, request);
1.2 timbl 969: }
970:
971:
972: /* Generate the anchor for the home page
973: ** -------------------------------------
974: **
975: ** As it involves file access, this should only be done once
976: ** when the program first runs.
1.10 timbl 977: ** This is a default algorithm -- browser don't HAVE to use this.
978: ** But consistency betwen browsers is STRONGLY recommended!
1.2 timbl 979: **
1.10 timbl 980: ** Priority order is:
981: **
982: ** 1 WWW_HOME environment variable (logical name, etc)
983: ** 2 ~/WWW/default.html
984: ** 3 /usr/local/bin/default.html
985: ** 4 http://info.cern.ch/default.html
986: **
1.2 timbl 987: */
988: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
989: {
1.12 timbl 990: char * my_home_document = NULL;
991: char * home = (char *)getenv(LOGICAL_DEFAULT);
1.2 timbl 992: char * ref;
993: HTParentAnchor * anchor;
1.1 timbl 994:
1.12 timbl 995: if (home) {
996: StrAllocCopy(my_home_document, home);
997:
998: /* Someone telnets in, they get a special home.
999: */
1000: #define MAX_FILE_NAME 1024 /* @@@ */
1001: } else if (HTClientHost) { /* Telnet server */
1002: FILE * fp = fopen(REMOTE_POINTER, "r");
1003: char * status;
1004: if (fp) {
1005: my_home_document = (char*) malloc(MAX_FILE_NAME);
1006: status = fgets(my_home_document, MAX_FILE_NAME, fp);
1007: if (!status) {
1008: free(my_home_document);
1009: my_home_document = NULL;
1010: }
1011: fclose(fp);
1012: }
1013: if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
1014: }
1015:
1016:
1017:
1.2 timbl 1018: #ifdef unix
1.12 timbl 1019:
1.10 timbl 1020: if (!my_home_document) {
1021: FILE * fp = NULL;
1022: CONST char * home = (CONST char*)getenv("HOME");
1023: if (home) {
1024: my_home_document = (char *)malloc(
1025: strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
1026: if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
1027: sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
1028: fp = fopen(my_home_document, "r");
1029: }
1030:
1031: if (!fp) {
1032: StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
1033: fp = fopen(my_home_document, "r");
1034: }
1.2 timbl 1035: if (fp) {
1036: fclose(fp);
1037: } else {
1038: if (TRACE) fprintf(stderr,
1.10 timbl 1039: "HTBrowse: No local home document ~/%s or %s\n",
1040: PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11 timbl 1041: free(my_home_document);
1042: my_home_document = NULL;
1.2 timbl 1043: }
1044: }
1045: #endif
1.10 timbl 1046: ref = HTParse( my_home_document ? my_home_document :
1047: HTClientHost ? REMOTE_ADDRESS
1048: : LAST_RESORT,
1049: "file:",
1.2 timbl 1050: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10 timbl 1051: if (my_home_document) {
1.2 timbl 1052: if (TRACE) fprintf(stderr,
1053: "HTAccess: Using custom home page %s i.e. address %s\n",
1.10 timbl 1054: my_home_document, ref);
1055: free(my_home_document);
1.2 timbl 1056: }
1057: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
1058: free(ref);
1059: return anchor;
1.1 timbl 1060: }
1.26 frystyk 1061:
1062:
1063: /* Bind an Anchor to the request structure
1064: ** ---------------------------------------
1065: **
1066: ** On Entry,
1067: ** anchor The child or parenet anchor to be binded
1068: ** request The request sturcture
1069: ** On Exit,
1070: ** returns YES Success
1071: ** NO Failure
1072: **
1073: ** Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
1074: ** Henrik Frystyk 17/02-94
1075: */
1076:
1077: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1078: {
1079: if (!anchor) return NO; /* No link */
1080:
1081: request->anchor = HTAnchor_parent(anchor);
1082: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
1083: : (HTChildAnchor*) anchor;
1084:
1.29 frystyk 1085: return YES;
1.26 frystyk 1086: } /* HTBindAnchor */
1087:
Webmaster