Annotation of libwww/Library/src/HTAccess.c, revision 1.55
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42 frystyk 11: ** 6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1 timbl 12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
1.9 timbl 14: ** PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
15: ** 28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19 timbl 16: ** Dec 93 Bug change around, more reentrant, etc
1.42 frystyk 17: ** 09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.53 duns 18: ** 8 Jul 94 Insulate free() from _free structure element.
1.2 timbl 19: ** Bugs
20: ** This module assumes that that the graphic object is hypertext, as it
1.9 timbl 21: ** needs to select it when it has been loaded. A superclass needs to be
1.2 timbl 22: ** defined which accepts select and select_anchor.
1.1 timbl 23: */
24:
1.9 timbl 25: #ifndef DEFAULT_WAIS_GATEWAY
1.8 timbl 26: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.54 frystyk 27: #endif
1.8 timbl 28:
1.1 timbl 29: /* Implements:
30: */
31: #include "HTAccess.h"
32:
33: /* Uses:
34: */
35:
36: #include "HTParse.h"
37: #include "HTUtils.h"
1.4 timbl 38: #include "HTML.h" /* SCW */
1.2 timbl 39:
40: #ifndef NO_RULES
41: #include "HTRules.h"
42: #endif
43:
44: #include "HTList.h"
45: #include "HText.h" /* See bugs above */
46: #include "HTAlert.h"
1.17 timbl 47: #include "HTFWriter.h" /* for cache stuff */
48: #include "HTTee.h"
1.46 frystyk 49: #include "HTError.h"
1.2 timbl 50:
1.54 frystyk 51: /* These flags may be set to modify the operation of this module */
52: PUBLIC char * HTCacheDir = NULL; /* Root for cached files or 0 for no cache */
53: PUBLIC char * HTSaveLocallyDir = SAVE_LOCALLY_HOME_DIR; /* Save & exe files */
54: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
55: PUBLIC FILE * HTlogfile = 0; /* File to which to output one-liners */
1.41 luotonen 56:
1.34 frystyk 57: PUBLIC BOOL HTForceReload = NO; /* Force reload from cache or net */
1.12 timbl 58: PUBLIC BOOL HTSecure = NO; /* Disable access for telnet users? */
1.27 luotonen 59: PUBLIC BOOL using_proxy = NO; /* are we using a proxy gateway? */
1.43 luotonen 60: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.27 luotonen 61: PUBLIC BOOL HTImProxy = NO; /* cern_httpd as a proxy? */
1.1 timbl 62:
1.43 luotonen 63:
1.2 timbl 64: /* To generate other things, play with these:
65: */
66:
1.15 timbl 67: /* PUBLIC HTFormat HTOutputFormat = NULL; use request->output_format */
68: /* PUBLIC HTStream* HTOutputStream = NULL; use request->output_stream */
1.1 timbl 69:
70: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
71:
1.24 timbl 72: /* Superclass defn */
1.1 timbl 73:
1.24 timbl 74: struct _HTStream {
75: HTStreamClass * isa;
76: /* ... */
77: };
78:
1.15 timbl 79: /* Create a request structure
80: ** ---------------------------
81: */
82:
83: PUBLIC HTRequest * HTRequest_new NOARGS
84: {
1.28 luotonen 85: HTRequest * me = (HTRequest*) calloc(1, sizeof(*me)); /* zero fill */
1.15 timbl 86: if (!me) outofmem(__FILE__, "HTRequest_new()");
87:
1.20 luotonen 88: me->conversions = HTList_new(); /* No conversions registerd yet */
89: me->output_format = WWW_PRESENT; /* default it to present to user */
90:
1.15 timbl 91: return me;
92: }
93:
94:
1.49 frystyk 95: /* Clear a request structure
96: ** ---------------------------
97: ** This function clears the reguest structure so that only the
98: ** conversions remain. Everything else is as if it was created from
99: ** scratch.
100: */
101: PUBLIC void HTRequest_clear ARGS1(HTRequest *, req)
102: {
103: HTList *conversions;
104: if (!req) {
105: if (TRACE)
106: fprintf(stderr, "Clear....... request: Bad argument!\n");
107: return;
108: }
109: conversions = req->conversions; /* Save the conversions */
110: HTErrorFree(req);
111: HTAACleanup(req);
112: FREE(req->from);
113: memset(req, '\0', sizeof(HTRequest));
114:
115: /* Now initialize as from scratch but with the old list of conversions */
116: req->conversions = conversions;
117: req->output_format = WWW_PRESENT; /* default it to present to user */
118: }
119:
120:
1.20 luotonen 121: /* Delete a request structure
122: ** --------------------------
123: */
124: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
125: {
126: if (req) {
1.34 frystyk 127: HTFormatDelete(req->conversions);
1.46 frystyk 128: HTErrorFree(req);
1.34 frystyk 129: HTAACleanup(req);
1.37 luotonen 130: FREE(req->from);
1.34 frystyk 131: FREE(req);
1.20 luotonen 132: }
133: }
134:
135:
1.22 luotonen 136: PRIVATE char * method_names[(int)MAX_METHODS + 1] =
137: {
138: "INVALID-METHOD",
139: "GET",
140: "HEAD",
141: "POST",
142: "PUT",
143: "DELETE",
144: "CHECKOUT",
145: "CHECKIN",
146: "SHOWMETHOD",
147: "LINK",
148: "UNLINK",
149: NULL
150: };
151:
152: /* Get method enum value
153: ** ---------------------
154: */
155: PUBLIC HTMethod HTMethod_enum ARGS1(char *, name)
156: {
157: if (name) {
158: int i;
159: for (i=1; i < (int)MAX_METHODS; i++)
160: if (!strcmp(name, method_names[i]))
161: return (HTMethod)i;
162: }
163: return METHOD_INVALID;
164: }
165:
166:
167: /* Get method name
168: ** ---------------
169: */
170: PUBLIC char * HTMethod_name ARGS1(HTMethod, method)
171: {
172: if ((int)method > (int)METHOD_INVALID &&
173: (int)method < (int)MAX_METHODS)
174: return method_names[(int)method];
175: else
176: return method_names[(int)METHOD_INVALID];
177: }
178:
179:
180: /* Is method in a list of method names?
181: ** -----------------------------------
182: */
183: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod, method,
184: HTList *, list)
185: {
186: char * method_name = HTMethod_name(method);
187: HTList *cur = list;
188: char *item;
189:
190: while (NULL != (item = (char*)HTList_nextObject(cur))) {
191: CTRACE(stderr, " %s", item);
192: if (0==strcasecomp(item, method_name))
193: return YES;
194: }
195: return NO; /* Not found */
196: }
197:
198:
199:
200:
1.20 luotonen 201:
1.1 timbl 202: /* Register a Protocol HTRegisterProtocol
203: ** -------------------
204: */
205:
206: PUBLIC BOOL HTRegisterProtocol(protocol)
207: HTProtocol * protocol;
208: {
209: if (!protocols) protocols = HTList_new();
210: HTList_addObject(protocols, protocol);
211: return YES;
212: }
213:
214:
215: /* Register all known protocols
216: ** ----------------------------
217: **
218: ** Add to or subtract from this list if you add or remove protocol modules.
219: ** This routine is called the first time the protocol list is needed,
1.52 frystyk 220: ** unless any protocols are already registered, in which case it is not
221: ** called. Therefore the application can override this list.
1.1 timbl 222: **
223: ** Compiling with NO_INIT prevents all known protocols from being forced
224: ** in at link time.
225: */
226: #ifndef NO_INIT
227: PRIVATE void HTAccessInit NOARGS /* Call me once */
228: {
1.14 duns 229: GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 230: #ifndef DECNET
1.54 frystyk 231: #ifdef NEW_CODE
232: GLOBALREF HTProtocol HTFTP, HTNews, HTNNTP, HTGopher;
233: #endif
1.14 duns 234: GLOBALREF HTProtocol HTFTP, HTNews, HTGopher;
1.42 frystyk 235:
1.3 timbl 236: #ifdef DIRECT_WAIS
1.14 duns 237: GLOBALREF HTProtocol HTWAIS;
1.3 timbl 238: #endif
1.2 timbl 239: HTRegisterProtocol(&HTFTP);
240: HTRegisterProtocol(&HTNews);
1.54 frystyk 241: #ifdef NEW_CODE
242: HTRegisterProtocol(&HTNNTP);
243: #endif
1.2 timbl 244: HTRegisterProtocol(&HTGopher);
1.42 frystyk 245:
1.3 timbl 246: #ifdef DIRECT_WAIS
247: HTRegisterProtocol(&HTWAIS);
248: #endif
1.1 timbl 249:
1.54 frystyk 250: #endif /* DECNET */
1.2 timbl 251: HTRegisterProtocol(&HTTP);
252: HTRegisterProtocol(&HTFile);
253: HTRegisterProtocol(&HTTelnet);
254: HTRegisterProtocol(&HTTn3270);
255: HTRegisterProtocol(&HTRlogin);
1.1 timbl 256: }
257: #endif
258:
259:
1.33 luotonen 260:
261: /* override_proxy()
262: **
263: ** Check the no_proxy environment variable to get the list
264: ** of hosts for which proxy server is not consulted.
265: **
266: ** no_proxy is a comma- or space-separated list of machine
267: ** or domain names, with optional :port part. If no :port
268: ** part is present, it applies to all ports on that domain.
269: **
270: ** Example:
271: ** no_proxy="cern.ch,some.domain:8001"
272: **
273: */
274: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
275: {
276: CONST char * no_proxy = getenv("no_proxy");
277: char * p = NULL;
278: char * host = NULL;
279: int port = 0;
280: int h_len = 0;
281:
282: if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
283: return NO;
284: if (!*host) { free(host); return NO; }
285:
1.34 frystyk 286: if ((p = strchr(host, ':')) != NULL) { /* Port specified */
1.33 luotonen 287: *p++ = 0; /* Chop off port */
288: port = atoi(p);
289: }
290: else { /* Use default port */
291: char * access = HTParse(addr, "", PARSE_ACCESS);
292: if (access) {
293: if (!strcmp(access,"http")) port = 80;
294: else if (!strcmp(access,"gopher")) port = 70;
295: else if (!strcmp(access,"ftp")) port = 21;
296: free(access);
297: }
298: }
299: if (!port) port = 80; /* Default */
300: h_len = strlen(host);
301:
302: while (*no_proxy) {
303: CONST char * end;
304: CONST char * colon = NULL;
305: int templ_port = 0;
306: int t_len;
307:
308: while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
309: no_proxy++; /* Skip whitespace and separators */
310:
311: end = no_proxy;
312: while (*end && !WHITE(*end) && *end != ',') { /* Find separator */
313: if (*end==':') colon = end; /* Port number given */
314: end++;
315: }
316:
317: if (colon) {
318: templ_port = atoi(colon+1);
319: t_len = colon - no_proxy;
320: }
321: else {
322: t_len = end - no_proxy;
323: }
324:
325: if ((!templ_port || templ_port == port) &&
326: (t_len > 0 && t_len <= h_len &&
327: !strncmp(host + h_len - t_len, no_proxy, t_len))) {
328: free(host);
329: return YES;
330: }
331: if (*end) no_proxy = end+1;
332: else break;
333: }
334:
335: free(host);
336: return NO;
337: }
338:
339:
340:
1.2 timbl 341: /* Find physical name and access protocol
342: ** --------------------------------------
1.1 timbl 343: **
344: **
345: ** On entry,
346: ** addr must point to the fully qualified hypertext reference.
347: ** anchor a pareent anchor with whose address is addr
348: **
349: ** On exit,
1.2 timbl 350: ** returns HT_NO_ACCESS Error has occured.
351: ** HT_OK Success
1.1 timbl 352: **
353: */
1.21 luotonen 354: PRIVATE int get_physical ARGS1(HTRequest *, req)
355: {
1.1 timbl 356: char * access=0; /* Name of access method */
1.21 luotonen 357: char * addr = HTAnchor_address((HTAnchor*)req->anchor); /* free me */
1.27 luotonen 358:
1.35 luotonen 359: /*
360: ** This HACK is here until we have redirection implemented.
361: ** This is used when we are recursively calling HTLoad().
362: ** We then take the physical address, because currently the
363: ** virtual address is kept in a hash table so it can't be
364: ** changed -- otherwise it wouldn't be found anymore.
365: */
1.36 luotonen 366: if (HTAnchor_physical(req->anchor))
367: StrAllocCopy(addr, HTAnchor_physical(req->anchor));
1.35 luotonen 368:
1.2 timbl 369: #ifndef NO_RULES
1.47 luotonen 370: if (HTImServer) { /* cern_httpd has already done its own translations */
1.45 luotonen 371: HTAnchor_setPhysical(req->anchor, HTImServer);
1.47 luotonen 372: StrAllocCopy(addr, HTImServer); /* Oops, queries thru many proxies */
373: /* didn't work without this -- AL */
374: }
1.21 luotonen 375: else {
1.27 luotonen 376: char * physical = HTTranslate(addr);
1.21 luotonen 377: if (!physical) {
1.47 luotonen 378: free(addr);
1.21 luotonen 379: return HT_FORBIDDEN;
380: }
381: HTAnchor_setPhysical(req->anchor, physical);
382: free(physical); /* free our copy */
1.2 timbl 383: }
384: #else
1.21 luotonen 385: HTAnchor_setPhysical(req->anchor, addr);
1.2 timbl 386: #endif
387:
1.21 luotonen 388: access = HTParse(HTAnchor_physical(req->anchor),
1.27 luotonen 389: "file:", PARSE_ACCESS);
1.1 timbl 390:
391: /* Check whether gateway access has been set up for this
1.8 timbl 392: **
393: ** This function can be replaced by the rule system above.
1.1 timbl 394: */
1.8 timbl 395: #define USE_GATEWAYS
1.1 timbl 396: #ifdef USE_GATEWAYS
1.39 luotonen 397:
398: /* make sure the using_proxy variable is false */
399: using_proxy = NO;
400:
1.33 luotonen 401: if (!override_proxy(addr)) {
1.27 luotonen 402: char * gateway_parameter, *gateway, *proxy;
403:
1.2 timbl 404: gateway_parameter = (char *)malloc(strlen(access)+20);
405: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27 luotonen 406:
407: /* search for proxy gateways */
1.2 timbl 408: strcpy(gateway_parameter, "WWW_");
409: strcat(gateway_parameter, access);
410: strcat(gateway_parameter, "_GATEWAY");
411: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27 luotonen 412:
413: /* search for proxy servers */
414: strcpy(gateway_parameter, access);
415: strcat(gateway_parameter, "_proxy");
416: proxy = (char *)getenv(gateway_parameter);
417:
1.2 timbl 418: free(gateway_parameter);
1.27 luotonen 419:
420: if (TRACE && gateway)
421: fprintf(stderr,"Gateway found: %s\n",gateway);
422: if (TRACE && proxy)
423: fprintf(stderr,"Proxy server found: %s\n",proxy);
424:
1.8 timbl 425: #ifndef DIRECT_WAIS
1.9 timbl 426: if (!gateway && 0==strcmp(access, "wais")) {
1.8 timbl 427: gateway = DEFAULT_WAIS_GATEWAY;
428: }
429: #endif
1.27 luotonen 430:
431: /* proxy servers have precedence over gateway servers */
432: if (proxy) {
433: char * gatewayed=0;
434:
435: StrAllocCopy(gatewayed,proxy);
436: StrAllocCat(gatewayed,addr);
437: using_proxy = YES;
438: HTAnchor_setPhysical(req->anchor, gatewayed);
439: free(gatewayed);
440: free(access);
441:
442: access = HTParse(HTAnchor_physical(req->anchor),
443: "http:", PARSE_ACCESS);
444: } else if (gateway) {
1.9 timbl 445: char * path = HTParse(addr, "",
446: PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
447: /* Chop leading / off to make host into part of path */
448: char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
449: free(path);
1.21 luotonen 450: HTAnchor_setPhysical(req->anchor, gatewayed);
1.9 timbl 451: free(gatewayed);
1.2 timbl 452: free(access);
1.9 timbl 453:
1.21 luotonen 454: access = HTParse(HTAnchor_physical(req->anchor),
1.8 timbl 455: "http:", PARSE_ACCESS);
1.2 timbl 456: }
457: }
1.1 timbl 458: #endif
459:
1.19 timbl 460: free(addr);
1.1 timbl 461:
462:
463: /* Search registered protocols to find suitable one
464: */
465: {
1.20 luotonen 466: HTList *cur;
467: HTProtocol *p;
1.1 timbl 468: #ifndef NO_INIT
1.2 timbl 469: if (!protocols) HTAccessInit();
1.1 timbl 470: #endif
1.20 luotonen 471: cur = protocols;
472: while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.2 timbl 473: if (strcmp(p->name, access)==0) {
1.21 luotonen 474: HTAnchor_setProtocol(req->anchor, p);
1.2 timbl 475: free(access);
476: return (HT_OK);
1.1 timbl 477: }
478: }
479: }
480:
481: free(access);
1.2 timbl 482: return HT_NO_ACCESS;
1.1 timbl 483: }
484:
485:
486: /* Load a document
487: ** ---------------
488: **
1.2 timbl 489: ** This is an internal routine, which has an address AND a matching
490: ** anchor. (The public routines are called with one OR the other.)
491: **
492: ** On entry,
1.15 timbl 493: ** request->
1.35 luotonen 494: ** anchor a parent anchor with fully qualified
495: ** hypertext reference as its address set
1.15 timbl 496: ** output_format valid
497: ** output_stream valid on NULL
1.2 timbl 498: **
499: ** On exit,
500: ** returns <0 Error has occured.
501: ** HT_LOADED Success
502: ** HT_NO_DATA Success, but no document loaded.
1.8 timbl 503: ** (telnet sesssion started etc)
1.2 timbl 504: **
505: */
1.52 frystyk 506: PUBLIC int HTLoad ARGS2(HTRequest *, request, BOOL, keep_error_stack)
1.2 timbl 507: {
1.25 frystyk 508: char *arg = NULL;
509: HTProtocol *p;
510: int status;
511:
1.22 luotonen 512: if (request->method == METHOD_INVALID)
513: request->method = METHOD_GET;
1.52 frystyk 514: if (!keep_error_stack) {
515: HTErrorFree(request);
516: request->error_block = NO;
517: }
518:
1.21 luotonen 519: status = get_physical(request);
1.2 timbl 520: if (status == HT_FORBIDDEN) {
1.49 frystyk 521: char *url = HTAnchor_address((HTAnchor *) request->anchor);
522: if (url) {
523: HTUnEscape(url);
524: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
525: (void *) url, (int) strlen(url), "HTLoad");
526: free(url);
527: } else {
528: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
529: NULL, 0, "HTLoad");
530: }
531: return -1;
1.2 timbl 532: }
533: if (status < 0) return status; /* Can't resolve or forbidden */
1.25 frystyk 534:
535: if(!(arg = HTAnchor_physical(request->anchor)) || !*arg)
536: return (-1);
1.27 luotonen 537:
1.15 timbl 538: p = HTAnchor_protocol(request->anchor);
1.17 timbl 539: return (*(p->load))(request);
1.2 timbl 540: }
541:
542:
543: /* Get a save stream for a document
544: ** --------------------------------
545: */
1.19 timbl 546: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
1.15 timbl 547: {
548: HTProtocol * p;
1.19 timbl 549: int status;
1.22 luotonen 550: request->method = METHOD_PUT;
1.21 luotonen 551: status = get_physical(request);
1.19 timbl 552: if (status == HT_FORBIDDEN) {
1.49 frystyk 553: char *url = HTAnchor_address((HTAnchor *) request->anchor);
554: if (url) {
555: HTUnEscape(url);
556: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
557: (void *) url, (int) strlen(url), "HTLoad");
558: free(url);
559: } else {
560: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
561: NULL, 0, "HTLoad");
562: }
563: return NULL; /* should return error status? */
1.19 timbl 564: }
565: if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
566:
1.15 timbl 567: p = HTAnchor_protocol(request->anchor);
1.2 timbl 568: if (!p) return NULL;
569:
1.15 timbl 570: return (*p->saveStream)(request);
1.2 timbl 571:
572: }
573:
574:
575: /* Load a document - with logging etc
576: ** ----------------------------------
577: **
578: ** - Checks or documents already loaded
579: ** - Logs the access
580: ** - Allows stdin filter option
581: ** - Trace ouput and error messages
582: **
1.1 timbl 583: ** On Entry,
1.19 timbl 584: ** request->anchor valid for of the document to be accessed.
585: ** request->childAnchor optional anchor within doc to be selected
586: **
1.2 timbl 587: ** filter if YES, treat stdin as HTML
1.1 timbl 588: **
1.15 timbl 589: ** request->anchor is the node_anchor for the document
590: ** request->output_format is valid
591: **
1.1 timbl 592: ** On Exit,
593: ** returns YES Success in opening document
594: ** NO Failure
595: **
596: */
597:
1.52 frystyk 598: PRIVATE BOOL HTLoadDocument ARGS2(HTRequest *, request,
599: BOOL, keep_error_stack)
1.1 timbl 600:
601: {
602: int status;
603: HText * text;
1.19 timbl 604: char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
1.54 frystyk 605:
1.49 frystyk 606: if (TRACE) fprintf (stderr, "HTAccess.... Loading document %s\n",
607: full_address);
1.1 timbl 608:
1.18 timbl 609: request->using_cache = NULL;
610:
1.15 timbl 611: if (!request->output_format) request->output_format = WWW_PRESENT;
1.25 frystyk 612:
1.31 frystyk 613: if (!HTForceReload && (text=(HText *)HTAnchor_document(request->anchor)))
1.15 timbl 614: { /* Already loaded */
1.1 timbl 615: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
1.19 timbl 616: if (request->childAnchor) {
617: HText_selectAnchor(text, request->childAnchor);
618: } else {
619: HText_select(text);
620: }
621: free(full_address);
1.1 timbl 622: return YES;
623: }
1.17 timbl 624:
1.34 frystyk 625: /* Check the Cache */
1.17 timbl 626: /* Bug: for each format, we only check whether it is ok, we
627: don't check them all and chose the best */
1.54 frystyk 628: if (request->anchor->cacheItems) {
1.17 timbl 629: HTList * list = request->anchor->cacheItems;
1.20 luotonen 630: HTList * cur = list;
631: HTCacheItem * item;
632:
633: while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
1.18 timbl 634: HTStream * s;
635:
636: request->using_cache = item;
637:
1.37 luotonen 638: s = HTStreamStack(item->format, request, NO);
1.17 timbl 639: if (s) { /* format was suitable */
640: FILE * fp = fopen(item->filename, "r");
1.18 timbl 641: if (TRACE) fprintf(stderr, "Cache: HIT file %s for %s\n",
1.20 luotonen 642: item->filename,
643: full_address);
1.17 timbl 644: if (fp) {
645: HTFileCopy(fp, s);
1.53 duns 646: (*s->isa->_free)(s); /* close up pipeline */
1.17 timbl 647: fclose(fp);
1.19 timbl 648: free(full_address);
1.17 timbl 649: return YES;
650: } else {
651: fprintf(stderr, "***** Can't read cache file %s !\n",
1.20 luotonen 652: item->filename);
1.17 timbl 653: } /* file open ok */
654: } /* stream ok */
655: } /* next cache item */
656: } /* if cache available for this anchor */
1.1 timbl 657:
1.52 frystyk 658: status = HTLoad(request, keep_error_stack);
1.2 timbl 659:
1.1 timbl 660: /* Log the access if necessary
661: */
1.42 frystyk 662: if (HTlogfile) {
1.1 timbl 663: time_t theTime;
664: time(&theTime);
1.42 frystyk 665: fprintf(HTlogfile, "%24.24s %s %s %s\n",
1.1 timbl 666: ctime(&theTime),
667: HTClientHost ? HTClientHost : "local",
668: status<0 ? "FAIL" : "GET",
669: full_address);
1.42 frystyk 670: fflush(HTlogfile); /* Actually update it on disk */
1.1 timbl 671: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
672: ctime(&theTime),
673: HTClientHost ? HTClientHost : "local",
674: status<0 ? "FAIL" : "GET",
675: full_address);
676: }
677:
1.52 frystyk 678: /* The error stack might contain general information to the client
679: about what has been going on in the library (not only errors) */
680: if (request->error_stack)
681: HTErrorMsg(request);
682:
1.1 timbl 683: if (status == HT_LOADED) {
684: if (TRACE) {
1.54 frystyk 685: fprintf(stderr, "HTAccess.... `%s' has been accessed.\n",
1.1 timbl 686: full_address);
687: }
1.19 timbl 688: free(full_address);
1.1 timbl 689: return YES;
690: }
691:
692: if (status == HT_NO_DATA) {
693: if (TRACE) {
694: fprintf(stderr,
1.54 frystyk 695: "HTAccess.... `%s' has been accessed, No data left.\n",
1.1 timbl 696: full_address);
697: }
1.19 timbl 698: free(full_address);
1.1 timbl 699: return NO;
700: }
701:
1.34 frystyk 702: /* Bug fix thanks to Lou Montulli. Henrik 10/03-94 */
703: if (status<=0) { /* Failure in accessing a document */
1.1 timbl 704: #ifdef CURSES
705: user_message("Can't access `%s'", full_address);
706: #else
1.5 timbl 707: if (TRACE) fprintf(stderr,
708: "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 709: #endif
1.19 timbl 710: free(full_address);
1.1 timbl 711: return NO;
712: }
1.9 timbl 713:
714: /* If you get this, then please find which routine is returning
715: a positive unrecognised error code! */
1.1 timbl 716: fprintf(stderr,
1.50 frystyk 717: "**** HTAccess: Internal software error in CERN WWWLib version %s ****\n\nPlease mail www-bug@info.cern.ch quoting what software and what version you are using\nand the URL: %s that caused the problem, thanks!\n",
718: HTLibraryVersion,
719: full_address);
1.19 timbl 720: free(full_address);
721:
1.1 timbl 722: exit(-6996);
1.20 luotonen 723: return NO; /* For gcc :-( */
1.2 timbl 724: } /* HTLoadDocument */
1.1 timbl 725:
726:
727:
728: /* Load a document from absolute name
729: ** ---------------
730: **
731: ** On Entry,
732: ** addr The absolute address of the document to be accessed.
733: ** filter if YES, treat document as HTML
734: **
735: ** On Exit,
736: ** returns YES Success in opening document
737: ** NO Failure
738: **
739: **
740: */
741:
1.15 timbl 742: PUBLIC BOOL HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2 timbl 743: {
1.19 timbl 744: HTAnchor * anchor = HTAnchor_findAddress(addr);
745: request->anchor = HTAnchor_parent(anchor);
746: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
747: NULL : (HTChildAnchor*) anchor;
1.52 frystyk 748: return HTLoadDocument(request, NO);
1.2 timbl 749: }
750:
751:
752: /* Load a document from absolute name to stream
753: ** --------------------------------------------
754: **
755: ** On Entry,
756: ** addr The absolute address of the document to be accessed.
1.15 timbl 757: ** request->output_stream if non-NULL, send data down this stream
1.2 timbl 758: **
759: ** On Exit,
760: ** returns YES Success in opening document
761: ** NO Failure
762: **
763: **
764: */
765:
766: PUBLIC BOOL HTLoadToStream ARGS3(
767: CONST char *, addr,
768: BOOL, filter,
1.15 timbl 769: HTRequest*, request)
1.1 timbl 770: {
1.19 timbl 771: HTAnchor * anchor = HTAnchor_findAddress(addr);
772: request->anchor = HTAnchor_parent(anchor);
773: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
774: (HTChildAnchor*) anchor;
1.15 timbl 775: request->output_stream = request->output_stream;
1.52 frystyk 776: return HTLoadDocument(request, NO);
1.1 timbl 777: }
778:
779:
1.2 timbl 780:
781:
1.1 timbl 782: /* Load a document from relative name
783: ** ---------------
784: **
785: ** On Entry,
1.2 timbl 786: ** relative_name The relative address of the document
787: ** to be accessed.
1.1 timbl 788: **
789: ** On Exit,
790: ** returns YES Success in opening document
791: ** NO Failure
792: **
793: **
794: */
795:
1.15 timbl 796: PUBLIC BOOL HTLoadRelative ARGS3(
1.2 timbl 797: CONST char *, relative_name,
1.15 timbl 798: HTParentAnchor *, here,
1.20 luotonen 799: HTRequest *, request)
1.1 timbl 800: {
801: char * full_address = 0;
802: BOOL result;
803: char * mycopy = 0;
804: char * stripped = 0;
805: char * current_address =
1.2 timbl 806: HTAnchor_address((HTAnchor*)here);
1.1 timbl 807:
808: StrAllocCopy(mycopy, relative_name);
809:
810: stripped = HTStrip(mycopy);
811: full_address = HTParse(stripped,
812: current_address,
813: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15 timbl 814: result = HTLoadAbsolute(full_address, request);
1.1 timbl 815: free(full_address);
816: free(current_address);
817: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
818: return result;
819: }
820:
821:
822: /* Load if necessary, and select an anchor
823: ** --------------------------------------
824: **
825: ** On Entry,
826: ** destination The child or parenet anchor to be loaded.
827: **
828: ** On Exit,
829: ** returns YES Success
830: ** NO Failure
831: **
832: */
833:
1.15 timbl 834: PUBLIC BOOL HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1 timbl 835: {
1.15 timbl 836: if (!anchor) return NO; /* No link */
1.1 timbl 837:
1.15 timbl 838: request->anchor = HTAnchor_parent(anchor);
1.19 timbl 839: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
840: : (HTChildAnchor*) anchor;
1.1 timbl 841:
1.52 frystyk 842: return HTLoadDocument(request, NO) ? YES : NO;
843:
844: } /* HTLoadAnchor */
845:
846:
847: /* Load if necessary, and select an anchor
848: ** --------------------------------------
849: **
850: ** This function is almost identical to HTLoadAnchor, but it doesn't
851: ** clear the error stack so that the information in there is kept.
852: **
853: ** On Entry,
854: ** destination The child or parenet anchor to be loaded.
855: **
856: ** On Exit,
857: ** returns YES Success
858: ** NO Failure
859: **
860: */
861:
862: PUBLIC BOOL HTLoadAnchorRecursive ARGS2(HTAnchor*, anchor,
863: HTRequest *, request)
864: {
865: if (!anchor) return NO; /* No link */
866:
867: request->anchor = HTAnchor_parent(anchor);
868: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
869: : (HTChildAnchor*) anchor;
870:
871: return HTLoadDocument(request, YES) ? YES : NO;
1.1 timbl 872:
873: } /* HTLoadAnchor */
874:
875:
876: /* Search
877: ** ------
878: ** Performs a keyword search on word given by the user. Adds the keyword to
879: ** the end of the current address and attempts to open the new address.
880: **
881: ** On Entry,
882: ** *keywords space-separated keyword list or similar search list
1.2 timbl 883: ** here is anchor search is to be done on.
1.1 timbl 884: */
885:
1.2 timbl 886: PRIVATE char hex(i)
887: int i;
888: {
1.13 timbl 889: char * hexchars = "0123456789ABCDEF";
890: return hexchars[i];
1.2 timbl 891: }
1.1 timbl 892:
1.15 timbl 893: PUBLIC BOOL HTSearch ARGS3(
1.2 timbl 894: CONST char *, keywords,
1.15 timbl 895: HTParentAnchor *, here,
896: HTRequest *, request)
1.1 timbl 897: {
1.2 timbl 898:
899: #define acceptable \
900: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
901:
902: char *q, *u;
903: CONST char * p, *s, *e; /* Pointers into keywords */
904: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 905: BOOL result;
1.2 timbl 906: char * escaped = malloc(strlen(keywords)*3+1);
907:
1.29 frystyk 908: /* static CONST BOOL isAcceptable[96] = */
909: /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30 luotonen 910: static BOOL isAcceptable[96] =
1.2 timbl 911: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
912: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
913: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
914: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
915: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
916: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
917: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
918:
919: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
920:
1.29 frystyk 921: /* Convert spaces to + and hex escape unacceptable characters */
1.2 timbl 922:
1.29 frystyk 923: for(s=keywords; *s && WHITE(*s); s++); /*scan */ /* Skip white space */
924: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
925: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
1.2 timbl 926: int c = (int)TOASCII(*p);
927: if (WHITE(*p)) {
928: *q++ = '+';
1.29 frystyk 929: } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13 timbl 930: *q++ = *p; /* 930706 TBL for MVS bug */
1.2 timbl 931: } else {
932: *q++ = '%';
933: *q++ = hex(c / 16);
934: *q++ = hex(c % 16);
935: }
936: } /* Loop over string */
1.1 timbl 937:
1.2 timbl 938: *q=0;
939: /* terminate escaped sctring */
940: u=strchr(address, '?'); /* Find old search string */
941: if (u) *u = 0; /* Chop old search off */
1.1 timbl 942:
943: StrAllocCat(address, "?");
1.2 timbl 944: StrAllocCat(address, escaped);
945: free(escaped);
1.15 timbl 946: result = HTLoadRelative(address, here, request);
1.1 timbl 947: free(address);
1.2 timbl 948:
1.1 timbl 949: return result;
1.2 timbl 950: }
951:
952:
953: /* Search Given Indexname
954: ** ------
955: ** Performs a keyword search on word given by the user. Adds the keyword to
956: ** the end of the current address and attempts to open the new address.
957: **
958: ** On Entry,
959: ** *keywords space-separated keyword list or similar search list
960: ** *addres is name of object search is to be done on.
961: */
962:
1.15 timbl 963: PUBLIC BOOL HTSearchAbsolute ARGS3(
1.2 timbl 964: CONST char *, keywords,
1.15 timbl 965: CONST char *, indexname,
966: HTRequest *, request)
1.2 timbl 967: {
968: HTParentAnchor * anchor =
969: (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15 timbl 970: return HTSearch(keywords, anchor, request);
1.2 timbl 971: }
972:
973:
974: /* Generate the anchor for the home page
975: ** -------------------------------------
976: **
977: ** As it involves file access, this should only be done once
978: ** when the program first runs.
1.10 timbl 979: ** This is a default algorithm -- browser don't HAVE to use this.
980: ** But consistency betwen browsers is STRONGLY recommended!
1.2 timbl 981: **
1.10 timbl 982: ** Priority order is:
983: **
984: ** 1 WWW_HOME environment variable (logical name, etc)
985: ** 2 ~/WWW/default.html
986: ** 3 /usr/local/bin/default.html
987: ** 4 http://info.cern.ch/default.html
988: **
1.2 timbl 989: */
990: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
991: {
1.12 timbl 992: char * my_home_document = NULL;
993: char * home = (char *)getenv(LOGICAL_DEFAULT);
1.2 timbl 994: char * ref;
995: HTParentAnchor * anchor;
1.1 timbl 996:
1.12 timbl 997: if (home) {
998: StrAllocCopy(my_home_document, home);
999:
1000: /* Someone telnets in, they get a special home.
1001: */
1002: #define MAX_FILE_NAME 1024 /* @@@ */
1003: } else if (HTClientHost) { /* Telnet server */
1004: FILE * fp = fopen(REMOTE_POINTER, "r");
1005: char * status;
1006: if (fp) {
1007: my_home_document = (char*) malloc(MAX_FILE_NAME);
1008: status = fgets(my_home_document, MAX_FILE_NAME, fp);
1009: if (!status) {
1010: free(my_home_document);
1011: my_home_document = NULL;
1012: }
1013: fclose(fp);
1014: }
1015: if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
1016: }
1017:
1018:
1019:
1.2 timbl 1020: #ifdef unix
1.12 timbl 1021:
1.10 timbl 1022: if (!my_home_document) {
1023: FILE * fp = NULL;
1024: CONST char * home = (CONST char*)getenv("HOME");
1025: if (home) {
1026: my_home_document = (char *)malloc(
1027: strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
1028: if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
1029: sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
1030: fp = fopen(my_home_document, "r");
1031: }
1032:
1033: if (!fp) {
1034: StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
1035: fp = fopen(my_home_document, "r");
1036: }
1.2 timbl 1037: if (fp) {
1038: fclose(fp);
1039: } else {
1040: if (TRACE) fprintf(stderr,
1.10 timbl 1041: "HTBrowse: No local home document ~/%s or %s\n",
1042: PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11 timbl 1043: free(my_home_document);
1044: my_home_document = NULL;
1.2 timbl 1045: }
1046: }
1047: #endif
1.10 timbl 1048: ref = HTParse( my_home_document ? my_home_document :
1049: HTClientHost ? REMOTE_ADDRESS
1050: : LAST_RESORT,
1051: "file:",
1.2 timbl 1052: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10 timbl 1053: if (my_home_document) {
1.2 timbl 1054: if (TRACE) fprintf(stderr,
1055: "HTAccess: Using custom home page %s i.e. address %s\n",
1.10 timbl 1056: my_home_document, ref);
1057: free(my_home_document);
1.2 timbl 1058: }
1059: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
1060: free(ref);
1061: return anchor;
1.1 timbl 1062: }
1.26 frystyk 1063:
1064:
1065: /* Bind an Anchor to the request structure
1066: ** ---------------------------------------
1067: **
1068: ** On Entry,
1069: ** anchor The child or parenet anchor to be binded
1070: ** request The request sturcture
1071: ** On Exit,
1072: ** returns YES Success
1073: ** NO Failure
1074: **
1075: ** Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
1076: ** Henrik Frystyk 17/02-94
1077: */
1078:
1079: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1080: {
1081: if (!anchor) return NO; /* No link */
1082:
1083: request->anchor = HTAnchor_parent(anchor);
1084: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
1085: : (HTChildAnchor*) anchor;
1086:
1.29 frystyk 1087: return YES;
1.26 frystyk 1088: } /* HTBindAnchor */
1089:
Webmaster