Annotation of libwww/Library/src/HTAccess.c, revision 1.53
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42 frystyk 11: ** 6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1 timbl 12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
1.9 timbl 14: ** PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
15: ** 28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19 timbl 16: ** Dec 93 Bug change around, more reentrant, etc
1.42 frystyk 17: ** 09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.53 ! duns 18: ** 8 Jul 94 Insulate free() from _free structure element.
1.2 timbl 19: ** Bugs
20: ** This module assumes that that the graphic object is hypertext, as it
1.9 timbl 21: ** needs to select it when it has been loaded. A superclass needs to be
1.2 timbl 22: ** defined which accepts select and select_anchor.
1.1 timbl 23: */
24:
1.9 timbl 25: #ifndef DEFAULT_WAIS_GATEWAY
1.8 timbl 26: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.9 timbl 27: #endif
1.8 timbl 28:
1.1 timbl 29: /* Implements:
30: */
31: #include "HTAccess.h"
32:
33: /* Uses:
34: */
35:
36: #include "HTParse.h"
37: #include "HTUtils.h"
1.4 timbl 38: #include "HTML.h" /* SCW */
1.2 timbl 39:
40: #ifndef NO_RULES
41: #include "HTRules.h"
42: #endif
43:
1.1 timbl 44: #include <stdio.h>
45:
1.2 timbl 46: #include "HTList.h"
47: #include "HText.h" /* See bugs above */
48: #include "HTAlert.h"
1.17 timbl 49: #include "HTFWriter.h" /* for cache stuff */
50: #include "HTTee.h"
1.46 frystyk 51: #include "HTError.h"
1.2 timbl 52:
1.1 timbl 53: /* These flags may be set to modify the operation of this module
54: */
1.34 frystyk 55: PUBLIC char * HTCacheDir = 0; /* Root for cached files or 0 for no cache */
56: PUBLIC char * HTSaveLocallyDir = SAVE_LOCALLY_HOME_DIR; /* Save & exe files */
1.1 timbl 57: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
1.42 frystyk 58: PUBLIC FILE * HTlogfile = 0; /* File to which to output one-liners */
1.41 luotonen 59:
1.34 frystyk 60: PUBLIC BOOL HTForceReload = NO; /* Force reload from cache or net */
1.12 timbl 61: PUBLIC BOOL HTSecure = NO; /* Disable access for telnet users? */
1.27 luotonen 62: PUBLIC BOOL using_proxy = NO; /* are we using a proxy gateway? */
1.43 luotonen 63: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.27 luotonen 64: PUBLIC BOOL HTImProxy = NO; /* cern_httpd as a proxy? */
1.1 timbl 65:
1.43 luotonen 66:
1.2 timbl 67: /* To generate other things, play with these:
68: */
69:
1.15 timbl 70: /* PUBLIC HTFormat HTOutputFormat = NULL; use request->output_format */
71: /* PUBLIC HTStream* HTOutputStream = NULL; use request->output_stream */
1.1 timbl 72:
73: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
74:
1.24 timbl 75: /* Superclass defn */
1.1 timbl 76:
1.24 timbl 77: struct _HTStream {
78: HTStreamClass * isa;
79: /* ... */
80: };
81:
1.15 timbl 82: /* Create a request structure
83: ** ---------------------------
84: */
85:
86: PUBLIC HTRequest * HTRequest_new NOARGS
87: {
1.28 luotonen 88: HTRequest * me = (HTRequest*) calloc(1, sizeof(*me)); /* zero fill */
1.15 timbl 89: if (!me) outofmem(__FILE__, "HTRequest_new()");
90:
1.20 luotonen 91: me->conversions = HTList_new(); /* No conversions registerd yet */
92: me->output_format = WWW_PRESENT; /* default it to present to user */
93:
1.15 timbl 94: return me;
95: }
96:
97:
1.49 frystyk 98: /* Clear a request structure
99: ** ---------------------------
100: ** This function clears the reguest structure so that only the
101: ** conversions remain. Everything else is as if it was created from
102: ** scratch.
103: */
104: PUBLIC void HTRequest_clear ARGS1(HTRequest *, req)
105: {
106: HTList *conversions;
107: if (!req) {
108: if (TRACE)
109: fprintf(stderr, "Clear....... request: Bad argument!\n");
110: return;
111: }
112: conversions = req->conversions; /* Save the conversions */
113: HTErrorFree(req);
114: HTAACleanup(req);
115: FREE(req->from);
116: memset(req, '\0', sizeof(HTRequest));
117:
118: /* Now initialize as from scratch but with the old list of conversions */
119: req->conversions = conversions;
120: req->output_format = WWW_PRESENT; /* default it to present to user */
121: }
122:
123:
1.20 luotonen 124: /* Delete a request structure
125: ** --------------------------
126: */
127: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
128: {
129: if (req) {
1.34 frystyk 130: HTFormatDelete(req->conversions);
1.46 frystyk 131: HTErrorFree(req);
1.34 frystyk 132: HTAACleanup(req);
1.37 luotonen 133: FREE(req->from);
1.34 frystyk 134: FREE(req);
1.20 luotonen 135: }
136: }
137:
138:
1.22 luotonen 139: PRIVATE char * method_names[(int)MAX_METHODS + 1] =
140: {
141: "INVALID-METHOD",
142: "GET",
143: "HEAD",
144: "POST",
145: "PUT",
146: "DELETE",
147: "CHECKOUT",
148: "CHECKIN",
149: "SHOWMETHOD",
150: "LINK",
151: "UNLINK",
152: NULL
153: };
154:
155: /* Get method enum value
156: ** ---------------------
157: */
158: PUBLIC HTMethod HTMethod_enum ARGS1(char *, name)
159: {
160: if (name) {
161: int i;
162: for (i=1; i < (int)MAX_METHODS; i++)
163: if (!strcmp(name, method_names[i]))
164: return (HTMethod)i;
165: }
166: return METHOD_INVALID;
167: }
168:
169:
170: /* Get method name
171: ** ---------------
172: */
173: PUBLIC char * HTMethod_name ARGS1(HTMethod, method)
174: {
175: if ((int)method > (int)METHOD_INVALID &&
176: (int)method < (int)MAX_METHODS)
177: return method_names[(int)method];
178: else
179: return method_names[(int)METHOD_INVALID];
180: }
181:
182:
183: /* Is method in a list of method names?
184: ** -----------------------------------
185: */
186: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod, method,
187: HTList *, list)
188: {
189: char * method_name = HTMethod_name(method);
190: HTList *cur = list;
191: char *item;
192:
193: while (NULL != (item = (char*)HTList_nextObject(cur))) {
194: CTRACE(stderr, " %s", item);
195: if (0==strcasecomp(item, method_name))
196: return YES;
197: }
198: return NO; /* Not found */
199: }
200:
201:
202:
203:
1.20 luotonen 204:
1.1 timbl 205: /* Register a Protocol HTRegisterProtocol
206: ** -------------------
207: */
208:
209: PUBLIC BOOL HTRegisterProtocol(protocol)
210: HTProtocol * protocol;
211: {
212: if (!protocols) protocols = HTList_new();
213: HTList_addObject(protocols, protocol);
214: return YES;
215: }
216:
217:
218: /* Register all known protocols
219: ** ----------------------------
220: **
221: ** Add to or subtract from this list if you add or remove protocol modules.
222: ** This routine is called the first time the protocol list is needed,
1.52 frystyk 223: ** unless any protocols are already registered, in which case it is not
224: ** called. Therefore the application can override this list.
1.1 timbl 225: **
226: ** Compiling with NO_INIT prevents all known protocols from being forced
227: ** in at link time.
228: */
229: #ifndef NO_INIT
230: PRIVATE void HTAccessInit NOARGS /* Call me once */
231: {
1.14 duns 232: GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 233: #ifndef DECNET
1.14 duns 234: GLOBALREF HTProtocol HTFTP, HTNews, HTGopher;
1.42 frystyk 235:
236: /* This is the replacement when HTWhoIs gets a complete protocol module */
237: /* GLOBALREF HTProtocol HTFTP, HTNews, HTGopher, HTWhoIs; */
238: /* -------------------------------------------------------------------- */
239:
1.3 timbl 240: #ifdef DIRECT_WAIS
1.14 duns 241: GLOBALREF HTProtocol HTWAIS;
1.3 timbl 242: #endif
1.2 timbl 243: HTRegisterProtocol(&HTFTP);
244: HTRegisterProtocol(&HTNews);
245: HTRegisterProtocol(&HTGopher);
1.42 frystyk 246:
247: /* This should be added when HTWhoIs gets a complete protocol module */
248: /* HTRegisterProtocol(&HTWhoIs); */
249: /* ----------------------------------------------------------------- */
1.3 timbl 250: #ifdef DIRECT_WAIS
251: HTRegisterProtocol(&HTWAIS);
252: #endif
1.1 timbl 253: #endif
254:
1.2 timbl 255: HTRegisterProtocol(&HTTP);
256: HTRegisterProtocol(&HTFile);
257: HTRegisterProtocol(&HTTelnet);
258: HTRegisterProtocol(&HTTn3270);
259: HTRegisterProtocol(&HTRlogin);
1.1 timbl 260: }
261: #endif
262:
263:
1.33 luotonen 264:
265: /* override_proxy()
266: **
267: ** Check the no_proxy environment variable to get the list
268: ** of hosts for which proxy server is not consulted.
269: **
270: ** no_proxy is a comma- or space-separated list of machine
271: ** or domain names, with optional :port part. If no :port
272: ** part is present, it applies to all ports on that domain.
273: **
274: ** Example:
275: ** no_proxy="cern.ch,some.domain:8001"
276: **
277: */
278: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
279: {
280: CONST char * no_proxy = getenv("no_proxy");
281: char * p = NULL;
282: char * host = NULL;
283: int port = 0;
284: int h_len = 0;
285:
286: if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
287: return NO;
288: if (!*host) { free(host); return NO; }
289:
1.34 frystyk 290: if ((p = strchr(host, ':')) != NULL) { /* Port specified */
1.33 luotonen 291: *p++ = 0; /* Chop off port */
292: port = atoi(p);
293: }
294: else { /* Use default port */
295: char * access = HTParse(addr, "", PARSE_ACCESS);
296: if (access) {
297: if (!strcmp(access,"http")) port = 80;
298: else if (!strcmp(access,"gopher")) port = 70;
299: else if (!strcmp(access,"ftp")) port = 21;
300: free(access);
301: }
302: }
303: if (!port) port = 80; /* Default */
304: h_len = strlen(host);
305:
306: while (*no_proxy) {
307: CONST char * end;
308: CONST char * colon = NULL;
309: int templ_port = 0;
310: int t_len;
311:
312: while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
313: no_proxy++; /* Skip whitespace and separators */
314:
315: end = no_proxy;
316: while (*end && !WHITE(*end) && *end != ',') { /* Find separator */
317: if (*end==':') colon = end; /* Port number given */
318: end++;
319: }
320:
321: if (colon) {
322: templ_port = atoi(colon+1);
323: t_len = colon - no_proxy;
324: }
325: else {
326: t_len = end - no_proxy;
327: }
328:
329: if ((!templ_port || templ_port == port) &&
330: (t_len > 0 && t_len <= h_len &&
331: !strncmp(host + h_len - t_len, no_proxy, t_len))) {
332: free(host);
333: return YES;
334: }
335: if (*end) no_proxy = end+1;
336: else break;
337: }
338:
339: free(host);
340: return NO;
341: }
342:
343:
344:
1.2 timbl 345: /* Find physical name and access protocol
346: ** --------------------------------------
1.1 timbl 347: **
348: **
349: ** On entry,
350: ** addr must point to the fully qualified hypertext reference.
351: ** anchor a pareent anchor with whose address is addr
352: **
353: ** On exit,
1.2 timbl 354: ** returns HT_NO_ACCESS Error has occured.
355: ** HT_OK Success
1.1 timbl 356: **
357: */
1.21 luotonen 358: PRIVATE int get_physical ARGS1(HTRequest *, req)
359: {
1.1 timbl 360: char * access=0; /* Name of access method */
1.21 luotonen 361: char * addr = HTAnchor_address((HTAnchor*)req->anchor); /* free me */
1.27 luotonen 362:
1.35 luotonen 363: /*
364: ** This HACK is here until we have redirection implemented.
365: ** This is used when we are recursively calling HTLoad().
366: ** We then take the physical address, because currently the
367: ** virtual address is kept in a hash table so it can't be
368: ** changed -- otherwise it wouldn't be found anymore.
369: */
1.36 luotonen 370: if (HTAnchor_physical(req->anchor))
371: StrAllocCopy(addr, HTAnchor_physical(req->anchor));
1.35 luotonen 372:
1.2 timbl 373: #ifndef NO_RULES
1.47 luotonen 374: if (HTImServer) { /* cern_httpd has already done its own translations */
1.45 luotonen 375: HTAnchor_setPhysical(req->anchor, HTImServer);
1.47 luotonen 376: StrAllocCopy(addr, HTImServer); /* Oops, queries thru many proxies */
377: /* didn't work without this -- AL */
378: }
1.21 luotonen 379: else {
1.27 luotonen 380: char * physical = HTTranslate(addr);
1.21 luotonen 381: if (!physical) {
1.47 luotonen 382: free(addr);
1.21 luotonen 383: return HT_FORBIDDEN;
384: }
385: HTAnchor_setPhysical(req->anchor, physical);
386: free(physical); /* free our copy */
1.2 timbl 387: }
388: #else
1.21 luotonen 389: HTAnchor_setPhysical(req->anchor, addr);
1.2 timbl 390: #endif
391:
1.21 luotonen 392: access = HTParse(HTAnchor_physical(req->anchor),
1.27 luotonen 393: "file:", PARSE_ACCESS);
1.1 timbl 394:
395: /* Check whether gateway access has been set up for this
1.8 timbl 396: **
397: ** This function can be replaced by the rule system above.
1.1 timbl 398: */
1.8 timbl 399: #define USE_GATEWAYS
1.1 timbl 400: #ifdef USE_GATEWAYS
1.39 luotonen 401:
402: /* make sure the using_proxy variable is false */
403: using_proxy = NO;
404:
1.33 luotonen 405: if (!override_proxy(addr)) {
1.27 luotonen 406: char * gateway_parameter, *gateway, *proxy;
407:
1.2 timbl 408: gateway_parameter = (char *)malloc(strlen(access)+20);
409: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27 luotonen 410:
411: /* search for proxy gateways */
1.2 timbl 412: strcpy(gateway_parameter, "WWW_");
413: strcat(gateway_parameter, access);
414: strcat(gateway_parameter, "_GATEWAY");
415: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27 luotonen 416:
417: /* search for proxy servers */
418: strcpy(gateway_parameter, access);
419: strcat(gateway_parameter, "_proxy");
420: proxy = (char *)getenv(gateway_parameter);
421:
1.2 timbl 422: free(gateway_parameter);
1.27 luotonen 423:
424: if (TRACE && gateway)
425: fprintf(stderr,"Gateway found: %s\n",gateway);
426: if (TRACE && proxy)
427: fprintf(stderr,"Proxy server found: %s\n",proxy);
428:
1.8 timbl 429: #ifndef DIRECT_WAIS
1.9 timbl 430: if (!gateway && 0==strcmp(access, "wais")) {
1.8 timbl 431: gateway = DEFAULT_WAIS_GATEWAY;
432: }
433: #endif
1.27 luotonen 434:
435: /* proxy servers have precedence over gateway servers */
436: if (proxy) {
437: char * gatewayed=0;
438:
439: StrAllocCopy(gatewayed,proxy);
440: StrAllocCat(gatewayed,addr);
441: using_proxy = YES;
442: HTAnchor_setPhysical(req->anchor, gatewayed);
443: free(gatewayed);
444: free(access);
445:
446: access = HTParse(HTAnchor_physical(req->anchor),
447: "http:", PARSE_ACCESS);
448: } else if (gateway) {
1.9 timbl 449: char * path = HTParse(addr, "",
450: PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
451: /* Chop leading / off to make host into part of path */
452: char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
453: free(path);
1.21 luotonen 454: HTAnchor_setPhysical(req->anchor, gatewayed);
1.9 timbl 455: free(gatewayed);
1.2 timbl 456: free(access);
1.9 timbl 457:
1.21 luotonen 458: access = HTParse(HTAnchor_physical(req->anchor),
1.8 timbl 459: "http:", PARSE_ACCESS);
1.2 timbl 460: }
461: }
1.1 timbl 462: #endif
463:
1.19 timbl 464: free(addr);
1.1 timbl 465:
466:
467: /* Search registered protocols to find suitable one
468: */
469: {
1.20 luotonen 470: HTList *cur;
471: HTProtocol *p;
1.1 timbl 472: #ifndef NO_INIT
1.2 timbl 473: if (!protocols) HTAccessInit();
1.1 timbl 474: #endif
1.20 luotonen 475: cur = protocols;
476: while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.2 timbl 477: if (strcmp(p->name, access)==0) {
1.21 luotonen 478: HTAnchor_setProtocol(req->anchor, p);
1.2 timbl 479: free(access);
480: return (HT_OK);
1.1 timbl 481: }
482: }
483: }
484:
485: free(access);
1.2 timbl 486: return HT_NO_ACCESS;
1.1 timbl 487: }
488:
489:
490: /* Load a document
491: ** ---------------
492: **
1.2 timbl 493: ** This is an internal routine, which has an address AND a matching
494: ** anchor. (The public routines are called with one OR the other.)
495: **
496: ** On entry,
1.15 timbl 497: ** request->
1.35 luotonen 498: ** anchor a parent anchor with fully qualified
499: ** hypertext reference as its address set
1.15 timbl 500: ** output_format valid
501: ** output_stream valid on NULL
1.2 timbl 502: **
503: ** On exit,
504: ** returns <0 Error has occured.
505: ** HT_LOADED Success
506: ** HT_NO_DATA Success, but no document loaded.
1.8 timbl 507: ** (telnet sesssion started etc)
1.2 timbl 508: **
509: */
1.52 frystyk 510: PUBLIC int HTLoad ARGS2(HTRequest *, request, BOOL, keep_error_stack)
1.2 timbl 511: {
1.25 frystyk 512: char *arg = NULL;
513: HTProtocol *p;
514: int status;
515:
1.22 luotonen 516: if (request->method == METHOD_INVALID)
517: request->method = METHOD_GET;
1.52 frystyk 518: if (!keep_error_stack) {
519: HTErrorFree(request);
520: request->error_block = NO;
521: }
522:
1.21 luotonen 523: status = get_physical(request);
1.2 timbl 524: if (status == HT_FORBIDDEN) {
1.49 frystyk 525: char *url = HTAnchor_address((HTAnchor *) request->anchor);
526: if (url) {
527: HTUnEscape(url);
528: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
529: (void *) url, (int) strlen(url), "HTLoad");
530: free(url);
531: } else {
532: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
533: NULL, 0, "HTLoad");
534: }
535: return -1;
1.2 timbl 536: }
537: if (status < 0) return status; /* Can't resolve or forbidden */
1.25 frystyk 538:
539: if(!(arg = HTAnchor_physical(request->anchor)) || !*arg)
540: return (-1);
1.27 luotonen 541:
1.15 timbl 542: p = HTAnchor_protocol(request->anchor);
1.17 timbl 543: return (*(p->load))(request);
1.2 timbl 544: }
545:
546:
547: /* Get a save stream for a document
548: ** --------------------------------
549: */
1.19 timbl 550: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
1.15 timbl 551: {
552: HTProtocol * p;
1.19 timbl 553: int status;
1.22 luotonen 554: request->method = METHOD_PUT;
1.21 luotonen 555: status = get_physical(request);
1.19 timbl 556: if (status == HT_FORBIDDEN) {
1.49 frystyk 557: char *url = HTAnchor_address((HTAnchor *) request->anchor);
558: if (url) {
559: HTUnEscape(url);
560: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
561: (void *) url, (int) strlen(url), "HTLoad");
562: free(url);
563: } else {
564: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
565: NULL, 0, "HTLoad");
566: }
567: return NULL; /* should return error status? */
1.19 timbl 568: }
569: if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
570:
1.15 timbl 571: p = HTAnchor_protocol(request->anchor);
1.2 timbl 572: if (!p) return NULL;
573:
1.15 timbl 574: return (*p->saveStream)(request);
1.2 timbl 575:
576: }
577:
578:
579: /* Load a document - with logging etc
580: ** ----------------------------------
581: **
582: ** - Checks or documents already loaded
583: ** - Logs the access
584: ** - Allows stdin filter option
585: ** - Trace ouput and error messages
586: **
1.1 timbl 587: ** On Entry,
1.19 timbl 588: ** request->anchor valid for of the document to be accessed.
589: ** request->childAnchor optional anchor within doc to be selected
590: **
1.2 timbl 591: ** filter if YES, treat stdin as HTML
1.1 timbl 592: **
1.15 timbl 593: ** request->anchor is the node_anchor for the document
594: ** request->output_format is valid
595: **
1.1 timbl 596: ** On Exit,
597: ** returns YES Success in opening document
598: ** NO Failure
599: **
600: */
601:
1.52 frystyk 602: PRIVATE BOOL HTLoadDocument ARGS2(HTRequest *, request,
603: BOOL, keep_error_stack)
1.1 timbl 604:
605: {
606: int status;
607: HText * text;
1.19 timbl 608: char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
609:
1.49 frystyk 610: if (TRACE) fprintf (stderr, "HTAccess.... Loading document %s\n",
611: full_address);
1.1 timbl 612:
1.18 timbl 613: request->using_cache = NULL;
614:
1.15 timbl 615: if (!request->output_format) request->output_format = WWW_PRESENT;
1.25 frystyk 616:
1.31 frystyk 617: if (!HTForceReload && (text=(HText *)HTAnchor_document(request->anchor)))
1.15 timbl 618: { /* Already loaded */
1.1 timbl 619: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
1.19 timbl 620: if (request->childAnchor) {
621: HText_selectAnchor(text, request->childAnchor);
622: } else {
623: HText_select(text);
624: }
625: free(full_address);
1.1 timbl 626: return YES;
627: }
1.17 timbl 628:
1.34 frystyk 629: /* Check the Cache */
630: /* Caching is ONLY done if (char*) HTCacheDir is set. Henrik 09/03-94 */
1.17 timbl 631: /* Bug: for each format, we only check whether it is ok, we
632: don't check them all and chose the best */
1.38 timbl 633: if (/* HTCacheDir && */ request->anchor->cacheItems) {
1.17 timbl 634: HTList * list = request->anchor->cacheItems;
1.20 luotonen 635: HTList * cur = list;
636: HTCacheItem * item;
637:
638: while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
1.18 timbl 639: HTStream * s;
640:
641: request->using_cache = item;
642:
1.37 luotonen 643: s = HTStreamStack(item->format, request, NO);
1.17 timbl 644: if (s) { /* format was suitable */
645: FILE * fp = fopen(item->filename, "r");
1.18 timbl 646: if (TRACE) fprintf(stderr, "Cache: HIT file %s for %s\n",
1.20 luotonen 647: item->filename,
648: full_address);
1.17 timbl 649: if (fp) {
650: HTFileCopy(fp, s);
1.53 ! duns 651: (*s->isa->_free)(s); /* close up pipeline */
1.17 timbl 652: fclose(fp);
1.19 timbl 653: free(full_address);
1.17 timbl 654: return YES;
655: } else {
656: fprintf(stderr, "***** Can't read cache file %s !\n",
1.20 luotonen 657: item->filename);
1.17 timbl 658: } /* file open ok */
659: } /* stream ok */
660: } /* next cache item */
661: } /* if cache available for this anchor */
1.1 timbl 662:
1.52 frystyk 663: status = HTLoad(request, keep_error_stack);
1.2 timbl 664:
1.1 timbl 665: /* Log the access if necessary
666: */
1.42 frystyk 667: if (HTlogfile) {
1.1 timbl 668: time_t theTime;
669: time(&theTime);
1.42 frystyk 670: fprintf(HTlogfile, "%24.24s %s %s %s\n",
1.1 timbl 671: ctime(&theTime),
672: HTClientHost ? HTClientHost : "local",
673: status<0 ? "FAIL" : "GET",
674: full_address);
1.42 frystyk 675: fflush(HTlogfile); /* Actually update it on disk */
1.1 timbl 676: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
677: ctime(&theTime),
678: HTClientHost ? HTClientHost : "local",
679: status<0 ? "FAIL" : "GET",
680: full_address);
681: }
682:
1.52 frystyk 683: /* The error stack might contain general information to the client
684: about what has been going on in the library (not only errors) */
685: if (request->error_stack)
686: HTErrorMsg(request);
687:
1.1 timbl 688: if (status == HT_LOADED) {
689: if (TRACE) {
690: fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
691: full_address);
692: }
1.19 timbl 693: free(full_address);
1.1 timbl 694: return YES;
695: }
696:
697: if (status == HT_NO_DATA) {
698: if (TRACE) {
699: fprintf(stderr,
700: "HTAccess: `%s' has been accessed, No data left.\n",
701: full_address);
702: }
1.19 timbl 703: free(full_address);
1.1 timbl 704: return NO;
705: }
706:
1.34 frystyk 707: /* Bug fix thanks to Lou Montulli. Henrik 10/03-94 */
708: if (status<=0) { /* Failure in accessing a document */
1.1 timbl 709: #ifdef CURSES
710: user_message("Can't access `%s'", full_address);
711: #else
1.5 timbl 712: if (TRACE) fprintf(stderr,
713: "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 714: #endif
1.19 timbl 715: free(full_address);
1.1 timbl 716: return NO;
717: }
1.9 timbl 718:
719: /* If you get this, then please find which routine is returning
720: a positive unrecognised error code! */
1.1 timbl 721: fprintf(stderr,
1.50 frystyk 722: "**** HTAccess: Internal software error in CERN WWWLib version %s ****\n\nPlease mail www-bug@info.cern.ch quoting what software and what version you are using\nand the URL: %s that caused the problem, thanks!\n",
723: HTLibraryVersion,
724: full_address);
1.19 timbl 725: free(full_address);
726:
1.1 timbl 727: exit(-6996);
1.20 luotonen 728: return NO; /* For gcc :-( */
1.2 timbl 729: } /* HTLoadDocument */
1.1 timbl 730:
731:
732:
733: /* Load a document from absolute name
734: ** ---------------
735: **
736: ** On Entry,
737: ** addr The absolute address of the document to be accessed.
738: ** filter if YES, treat document as HTML
739: **
740: ** On Exit,
741: ** returns YES Success in opening document
742: ** NO Failure
743: **
744: **
745: */
746:
1.15 timbl 747: PUBLIC BOOL HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2 timbl 748: {
1.19 timbl 749: HTAnchor * anchor = HTAnchor_findAddress(addr);
750: request->anchor = HTAnchor_parent(anchor);
751: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
752: NULL : (HTChildAnchor*) anchor;
1.52 frystyk 753: return HTLoadDocument(request, NO);
1.2 timbl 754: }
755:
756:
757: /* Load a document from absolute name to stream
758: ** --------------------------------------------
759: **
760: ** On Entry,
761: ** addr The absolute address of the document to be accessed.
1.15 timbl 762: ** request->output_stream if non-NULL, send data down this stream
1.2 timbl 763: **
764: ** On Exit,
765: ** returns YES Success in opening document
766: ** NO Failure
767: **
768: **
769: */
770:
771: PUBLIC BOOL HTLoadToStream ARGS3(
772: CONST char *, addr,
773: BOOL, filter,
1.15 timbl 774: HTRequest*, request)
1.1 timbl 775: {
1.19 timbl 776: HTAnchor * anchor = HTAnchor_findAddress(addr);
777: request->anchor = HTAnchor_parent(anchor);
778: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
779: (HTChildAnchor*) anchor;
1.15 timbl 780: request->output_stream = request->output_stream;
1.52 frystyk 781: return HTLoadDocument(request, NO);
1.1 timbl 782: }
783:
784:
1.2 timbl 785:
786:
1.1 timbl 787: /* Load a document from relative name
788: ** ---------------
789: **
790: ** On Entry,
1.2 timbl 791: ** relative_name The relative address of the document
792: ** to be accessed.
1.1 timbl 793: **
794: ** On Exit,
795: ** returns YES Success in opening document
796: ** NO Failure
797: **
798: **
799: */
800:
1.15 timbl 801: PUBLIC BOOL HTLoadRelative ARGS3(
1.2 timbl 802: CONST char *, relative_name,
1.15 timbl 803: HTParentAnchor *, here,
1.20 luotonen 804: HTRequest *, request)
1.1 timbl 805: {
806: char * full_address = 0;
807: BOOL result;
808: char * mycopy = 0;
809: char * stripped = 0;
810: char * current_address =
1.2 timbl 811: HTAnchor_address((HTAnchor*)here);
1.1 timbl 812:
813: StrAllocCopy(mycopy, relative_name);
814:
815: stripped = HTStrip(mycopy);
816: full_address = HTParse(stripped,
817: current_address,
818: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15 timbl 819: result = HTLoadAbsolute(full_address, request);
1.1 timbl 820: free(full_address);
821: free(current_address);
822: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
823: return result;
824: }
825:
826:
827: /* Load if necessary, and select an anchor
828: ** --------------------------------------
829: **
830: ** On Entry,
831: ** destination The child or parenet anchor to be loaded.
832: **
833: ** On Exit,
834: ** returns YES Success
835: ** NO Failure
836: **
837: */
838:
1.15 timbl 839: PUBLIC BOOL HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1 timbl 840: {
1.15 timbl 841: if (!anchor) return NO; /* No link */
1.1 timbl 842:
1.15 timbl 843: request->anchor = HTAnchor_parent(anchor);
1.19 timbl 844: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
845: : (HTChildAnchor*) anchor;
1.1 timbl 846:
1.52 frystyk 847: return HTLoadDocument(request, NO) ? YES : NO;
848:
849: } /* HTLoadAnchor */
850:
851:
852: /* Load if necessary, and select an anchor
853: ** --------------------------------------
854: **
855: ** This function is almost identical to HTLoadAnchor, but it doesn't
856: ** clear the error stack so that the information in there is kept.
857: **
858: ** On Entry,
859: ** destination The child or parenet anchor to be loaded.
860: **
861: ** On Exit,
862: ** returns YES Success
863: ** NO Failure
864: **
865: */
866:
867: PUBLIC BOOL HTLoadAnchorRecursive ARGS2(HTAnchor*, anchor,
868: HTRequest *, request)
869: {
870: if (!anchor) return NO; /* No link */
871:
872: request->anchor = HTAnchor_parent(anchor);
873: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
874: : (HTChildAnchor*) anchor;
875:
876: return HTLoadDocument(request, YES) ? YES : NO;
1.1 timbl 877:
878: } /* HTLoadAnchor */
879:
880:
881: /* Search
882: ** ------
883: ** Performs a keyword search on word given by the user. Adds the keyword to
884: ** the end of the current address and attempts to open the new address.
885: **
886: ** On Entry,
887: ** *keywords space-separated keyword list or similar search list
1.2 timbl 888: ** here is anchor search is to be done on.
1.1 timbl 889: */
890:
1.2 timbl 891: PRIVATE char hex(i)
892: int i;
893: {
1.13 timbl 894: char * hexchars = "0123456789ABCDEF";
895: return hexchars[i];
1.2 timbl 896: }
1.1 timbl 897:
1.15 timbl 898: PUBLIC BOOL HTSearch ARGS3(
1.2 timbl 899: CONST char *, keywords,
1.15 timbl 900: HTParentAnchor *, here,
901: HTRequest *, request)
1.1 timbl 902: {
1.2 timbl 903:
904: #define acceptable \
905: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
906:
907: char *q, *u;
908: CONST char * p, *s, *e; /* Pointers into keywords */
909: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 910: BOOL result;
1.2 timbl 911: char * escaped = malloc(strlen(keywords)*3+1);
912:
1.29 frystyk 913: /* static CONST BOOL isAcceptable[96] = */
914: /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30 luotonen 915: static BOOL isAcceptable[96] =
1.2 timbl 916: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
917: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
918: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
919: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
920: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
921: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
922: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
923:
924: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
925:
1.29 frystyk 926: /* Convert spaces to + and hex escape unacceptable characters */
1.2 timbl 927:
1.29 frystyk 928: for(s=keywords; *s && WHITE(*s); s++); /*scan */ /* Skip white space */
929: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
930: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
1.2 timbl 931: int c = (int)TOASCII(*p);
932: if (WHITE(*p)) {
933: *q++ = '+';
1.29 frystyk 934: } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13 timbl 935: *q++ = *p; /* 930706 TBL for MVS bug */
1.2 timbl 936: } else {
937: *q++ = '%';
938: *q++ = hex(c / 16);
939: *q++ = hex(c % 16);
940: }
941: } /* Loop over string */
1.1 timbl 942:
1.2 timbl 943: *q=0;
944: /* terminate escaped sctring */
945: u=strchr(address, '?'); /* Find old search string */
946: if (u) *u = 0; /* Chop old search off */
1.1 timbl 947:
948: StrAllocCat(address, "?");
1.2 timbl 949: StrAllocCat(address, escaped);
950: free(escaped);
1.15 timbl 951: result = HTLoadRelative(address, here, request);
1.1 timbl 952: free(address);
1.2 timbl 953:
1.1 timbl 954: return result;
1.2 timbl 955: }
956:
957:
958: /* Search Given Indexname
959: ** ------
960: ** Performs a keyword search on word given by the user. Adds the keyword to
961: ** the end of the current address and attempts to open the new address.
962: **
963: ** On Entry,
964: ** *keywords space-separated keyword list or similar search list
965: ** *addres is name of object search is to be done on.
966: */
967:
1.15 timbl 968: PUBLIC BOOL HTSearchAbsolute ARGS3(
1.2 timbl 969: CONST char *, keywords,
1.15 timbl 970: CONST char *, indexname,
971: HTRequest *, request)
1.2 timbl 972: {
973: HTParentAnchor * anchor =
974: (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15 timbl 975: return HTSearch(keywords, anchor, request);
1.2 timbl 976: }
977:
978:
979: /* Generate the anchor for the home page
980: ** -------------------------------------
981: **
982: ** As it involves file access, this should only be done once
983: ** when the program first runs.
1.10 timbl 984: ** This is a default algorithm -- browser don't HAVE to use this.
985: ** But consistency betwen browsers is STRONGLY recommended!
1.2 timbl 986: **
1.10 timbl 987: ** Priority order is:
988: **
989: ** 1 WWW_HOME environment variable (logical name, etc)
990: ** 2 ~/WWW/default.html
991: ** 3 /usr/local/bin/default.html
992: ** 4 http://info.cern.ch/default.html
993: **
1.2 timbl 994: */
995: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
996: {
1.12 timbl 997: char * my_home_document = NULL;
998: char * home = (char *)getenv(LOGICAL_DEFAULT);
1.2 timbl 999: char * ref;
1000: HTParentAnchor * anchor;
1.1 timbl 1001:
1.12 timbl 1002: if (home) {
1003: StrAllocCopy(my_home_document, home);
1004:
1005: /* Someone telnets in, they get a special home.
1006: */
1007: #define MAX_FILE_NAME 1024 /* @@@ */
1008: } else if (HTClientHost) { /* Telnet server */
1009: FILE * fp = fopen(REMOTE_POINTER, "r");
1010: char * status;
1011: if (fp) {
1012: my_home_document = (char*) malloc(MAX_FILE_NAME);
1013: status = fgets(my_home_document, MAX_FILE_NAME, fp);
1014: if (!status) {
1015: free(my_home_document);
1016: my_home_document = NULL;
1017: }
1018: fclose(fp);
1019: }
1020: if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
1021: }
1022:
1023:
1024:
1.2 timbl 1025: #ifdef unix
1.12 timbl 1026:
1.10 timbl 1027: if (!my_home_document) {
1028: FILE * fp = NULL;
1029: CONST char * home = (CONST char*)getenv("HOME");
1030: if (home) {
1031: my_home_document = (char *)malloc(
1032: strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
1033: if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
1034: sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
1035: fp = fopen(my_home_document, "r");
1036: }
1037:
1038: if (!fp) {
1039: StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
1040: fp = fopen(my_home_document, "r");
1041: }
1.2 timbl 1042: if (fp) {
1043: fclose(fp);
1044: } else {
1045: if (TRACE) fprintf(stderr,
1.10 timbl 1046: "HTBrowse: No local home document ~/%s or %s\n",
1047: PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11 timbl 1048: free(my_home_document);
1049: my_home_document = NULL;
1.2 timbl 1050: }
1051: }
1052: #endif
1.10 timbl 1053: ref = HTParse( my_home_document ? my_home_document :
1054: HTClientHost ? REMOTE_ADDRESS
1055: : LAST_RESORT,
1056: "file:",
1.2 timbl 1057: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10 timbl 1058: if (my_home_document) {
1.2 timbl 1059: if (TRACE) fprintf(stderr,
1060: "HTAccess: Using custom home page %s i.e. address %s\n",
1.10 timbl 1061: my_home_document, ref);
1062: free(my_home_document);
1.2 timbl 1063: }
1064: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
1065: free(ref);
1066: return anchor;
1.1 timbl 1067: }
1.26 frystyk 1068:
1069:
1070: /* Bind an Anchor to the request structure
1071: ** ---------------------------------------
1072: **
1073: ** On Entry,
1074: ** anchor The child or parenet anchor to be binded
1075: ** request The request sturcture
1076: ** On Exit,
1077: ** returns YES Success
1078: ** NO Failure
1079: **
1080: ** Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
1081: ** Henrik Frystyk 17/02-94
1082: */
1083:
1084: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1085: {
1086: if (!anchor) return NO; /* No link */
1087:
1088: request->anchor = HTAnchor_parent(anchor);
1089: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
1090: : (HTChildAnchor*) anchor;
1091:
1.29 frystyk 1092: return YES;
1.26 frystyk 1093: } /* HTBindAnchor */
1094:
Webmaster