Annotation of libwww/Library/src/HTAccess.c, revision 1.58
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42 frystyk 11: ** 6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1 timbl 12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
1.9 timbl 14: ** PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
15: ** 28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19 timbl 16: ** Dec 93 Bug change around, more reentrant, etc
1.42 frystyk 17: ** 09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.53 duns 18: ** 8 Jul 94 Insulate free() from _free structure element.
1.2 timbl 19: ** Bugs
20: ** This module assumes that that the graphic object is hypertext, as it
1.9 timbl 21: ** needs to select it when it has been loaded. A superclass needs to be
1.2 timbl 22: ** defined which accepts select and select_anchor.
1.1 timbl 23: */
24:
1.9 timbl 25: #ifndef DEFAULT_WAIS_GATEWAY
1.8 timbl 26: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.54 frystyk 27: #endif
1.8 timbl 28:
1.1 timbl 29: /* Implements:
30: */
31: #include "HTAccess.h"
32:
33: /* Uses:
34: */
35:
36: #include "HTParse.h"
37: #include "HTUtils.h"
1.4 timbl 38: #include "HTML.h" /* SCW */
1.2 timbl 39:
40: #ifndef NO_RULES
41: #include "HTRules.h"
42: #endif
43:
44: #include "HTList.h"
45: #include "HText.h" /* See bugs above */
46: #include "HTAlert.h"
1.17 timbl 47: #include "HTFWriter.h" /* for cache stuff */
48: #include "HTTee.h"
1.46 frystyk 49: #include "HTError.h"
1.57 howcome 50: #include "HTTCP.h" /* HWL: for HTFindRelatedName */
1.2 timbl 51:
1.54 frystyk 52: /* These flags may be set to modify the operation of this module */
53: PUBLIC char * HTCacheDir = NULL; /* Root for cached files or 0 for no cache */
54: PUBLIC char * HTSaveLocallyDir = SAVE_LOCALLY_HOME_DIR; /* Save & exe files */
55: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
56: PUBLIC FILE * HTlogfile = 0; /* File to which to output one-liners */
1.41 luotonen 57:
1.34 frystyk 58: PUBLIC BOOL HTForceReload = NO; /* Force reload from cache or net */
1.12 timbl 59: PUBLIC BOOL HTSecure = NO; /* Disable access for telnet users? */
1.27 luotonen 60: PUBLIC BOOL using_proxy = NO; /* are we using a proxy gateway? */
1.43 luotonen 61: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.27 luotonen 62: PUBLIC BOOL HTImProxy = NO; /* cern_httpd as a proxy? */
1.1 timbl 63:
1.43 luotonen 64:
1.2 timbl 65: /* To generate other things, play with these:
66: */
67:
1.15 timbl 68: /* PUBLIC HTFormat HTOutputFormat = NULL; use request->output_format */
69: /* PUBLIC HTStream* HTOutputStream = NULL; use request->output_stream */
1.1 timbl 70:
71: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
72:
1.24 timbl 73: /* Superclass defn */
1.1 timbl 74:
1.24 timbl 75: struct _HTStream {
76: HTStreamClass * isa;
77: /* ... */
78: };
79:
1.15 timbl 80: /* Create a request structure
81: ** ---------------------------
82: */
83:
84: PUBLIC HTRequest * HTRequest_new NOARGS
85: {
1.28 luotonen 86: HTRequest * me = (HTRequest*) calloc(1, sizeof(*me)); /* zero fill */
1.15 timbl 87: if (!me) outofmem(__FILE__, "HTRequest_new()");
88:
1.20 luotonen 89: me->conversions = HTList_new(); /* No conversions registerd yet */
90: me->output_format = WWW_PRESENT; /* default it to present to user */
91:
1.15 timbl 92: return me;
93: }
94:
95:
1.49 frystyk 96: /* Clear a request structure
97: ** ---------------------------
98: ** This function clears the reguest structure so that only the
99: ** conversions remain. Everything else is as if it was created from
100: ** scratch.
101: */
102: PUBLIC void HTRequest_clear ARGS1(HTRequest *, req)
103: {
104: HTList *conversions;
105: if (!req) {
106: if (TRACE)
107: fprintf(stderr, "Clear....... request: Bad argument!\n");
108: return;
109: }
110: conversions = req->conversions; /* Save the conversions */
111: HTErrorFree(req);
112: HTAACleanup(req);
113: FREE(req->from);
114: memset(req, '\0', sizeof(HTRequest));
115:
116: /* Now initialize as from scratch but with the old list of conversions */
117: req->conversions = conversions;
118: req->output_format = WWW_PRESENT; /* default it to present to user */
119: }
120:
121:
1.20 luotonen 122: /* Delete a request structure
123: ** --------------------------
124: */
125: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
126: {
127: if (req) {
1.34 frystyk 128: HTFormatDelete(req->conversions);
1.46 frystyk 129: HTErrorFree(req);
1.34 frystyk 130: HTAACleanup(req);
1.37 luotonen 131: FREE(req->from);
1.34 frystyk 132: FREE(req);
1.20 luotonen 133: }
134: }
135:
136:
1.22 luotonen 137: PRIVATE char * method_names[(int)MAX_METHODS + 1] =
138: {
139: "INVALID-METHOD",
140: "GET",
141: "HEAD",
142: "POST",
143: "PUT",
144: "DELETE",
145: "CHECKOUT",
146: "CHECKIN",
147: "SHOWMETHOD",
148: "LINK",
149: "UNLINK",
150: NULL
151: };
152:
153: /* Get method enum value
154: ** ---------------------
155: */
156: PUBLIC HTMethod HTMethod_enum ARGS1(char *, name)
157: {
158: if (name) {
159: int i;
160: for (i=1; i < (int)MAX_METHODS; i++)
161: if (!strcmp(name, method_names[i]))
162: return (HTMethod)i;
163: }
164: return METHOD_INVALID;
165: }
166:
167:
168: /* Get method name
169: ** ---------------
170: */
171: PUBLIC char * HTMethod_name ARGS1(HTMethod, method)
172: {
173: if ((int)method > (int)METHOD_INVALID &&
174: (int)method < (int)MAX_METHODS)
175: return method_names[(int)method];
176: else
177: return method_names[(int)METHOD_INVALID];
178: }
179:
180:
181: /* Is method in a list of method names?
182: ** -----------------------------------
183: */
184: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod, method,
185: HTList *, list)
186: {
187: char * method_name = HTMethod_name(method);
188: HTList *cur = list;
189: char *item;
190:
191: while (NULL != (item = (char*)HTList_nextObject(cur))) {
192: CTRACE(stderr, " %s", item);
193: if (0==strcasecomp(item, method_name))
194: return YES;
195: }
196: return NO; /* Not found */
197: }
198:
199:
200:
1.1 timbl 201: /* Register a Protocol HTRegisterProtocol
202: ** -------------------
203: */
204:
1.56 frystyk 205: PUBLIC BOOL HTRegisterProtocol ARGS1(HTProtocol *, protocol)
1.1 timbl 206: {
207: if (!protocols) protocols = HTList_new();
208: HTList_addObject(protocols, protocol);
209: return YES;
210: }
211:
212:
213: /* Register all known protocols
214: ** ----------------------------
215: **
216: ** Add to or subtract from this list if you add or remove protocol modules.
217: ** This routine is called the first time the protocol list is needed,
1.52 frystyk 218: ** unless any protocols are already registered, in which case it is not
219: ** called. Therefore the application can override this list.
1.1 timbl 220: **
221: ** Compiling with NO_INIT prevents all known protocols from being forced
222: ** in at link time.
223: */
224: #ifndef NO_INIT
225: PRIVATE void HTAccessInit NOARGS /* Call me once */
226: {
1.14 duns 227: GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 228: #ifndef DECNET
1.54 frystyk 229: #ifdef NEW_CODE
230: GLOBALREF HTProtocol HTFTP, HTNews, HTNNTP, HTGopher;
231: #endif
1.14 duns 232: GLOBALREF HTProtocol HTFTP, HTNews, HTGopher;
1.42 frystyk 233:
1.3 timbl 234: #ifdef DIRECT_WAIS
1.14 duns 235: GLOBALREF HTProtocol HTWAIS;
1.3 timbl 236: #endif
1.2 timbl 237: HTRegisterProtocol(&HTFTP);
238: HTRegisterProtocol(&HTNews);
1.54 frystyk 239: #ifdef NEW_CODE
240: HTRegisterProtocol(&HTNNTP);
241: #endif
1.2 timbl 242: HTRegisterProtocol(&HTGopher);
1.42 frystyk 243:
1.3 timbl 244: #ifdef DIRECT_WAIS
245: HTRegisterProtocol(&HTWAIS);
246: #endif
1.1 timbl 247:
1.54 frystyk 248: #endif /* DECNET */
1.2 timbl 249: HTRegisterProtocol(&HTTP);
250: HTRegisterProtocol(&HTFile);
251: HTRegisterProtocol(&HTTelnet);
252: HTRegisterProtocol(&HTTn3270);
253: HTRegisterProtocol(&HTRlogin);
1.1 timbl 254: }
255: #endif
256:
257:
1.33 luotonen 258:
259: /* override_proxy()
260: **
261: ** Check the no_proxy environment variable to get the list
262: ** of hosts for which proxy server is not consulted.
263: **
264: ** no_proxy is a comma- or space-separated list of machine
265: ** or domain names, with optional :port part. If no :port
266: ** part is present, it applies to all ports on that domain.
267: **
268: ** Example:
269: ** no_proxy="cern.ch,some.domain:8001"
270: **
271: */
272: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
273: {
274: CONST char * no_proxy = getenv("no_proxy");
275: char * p = NULL;
276: char * host = NULL;
277: int port = 0;
278: int h_len = 0;
279:
280: if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
281: return NO;
282: if (!*host) { free(host); return NO; }
283:
1.34 frystyk 284: if ((p = strchr(host, ':')) != NULL) { /* Port specified */
1.33 luotonen 285: *p++ = 0; /* Chop off port */
286: port = atoi(p);
287: }
288: else { /* Use default port */
289: char * access = HTParse(addr, "", PARSE_ACCESS);
290: if (access) {
291: if (!strcmp(access,"http")) port = 80;
292: else if (!strcmp(access,"gopher")) port = 70;
293: else if (!strcmp(access,"ftp")) port = 21;
294: free(access);
295: }
296: }
297: if (!port) port = 80; /* Default */
298: h_len = strlen(host);
299:
300: while (*no_proxy) {
301: CONST char * end;
302: CONST char * colon = NULL;
303: int templ_port = 0;
304: int t_len;
305:
306: while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
307: no_proxy++; /* Skip whitespace and separators */
308:
309: end = no_proxy;
310: while (*end && !WHITE(*end) && *end != ',') { /* Find separator */
311: if (*end==':') colon = end; /* Port number given */
312: end++;
313: }
314:
315: if (colon) {
316: templ_port = atoi(colon+1);
317: t_len = colon - no_proxy;
318: }
319: else {
320: t_len = end - no_proxy;
321: }
322:
323: if ((!templ_port || templ_port == port) &&
324: (t_len > 0 && t_len <= h_len &&
325: !strncmp(host + h_len - t_len, no_proxy, t_len))) {
326: free(host);
327: return YES;
328: }
329: if (*end) no_proxy = end+1;
330: else break;
331: }
332:
333: free(host);
334: return NO;
335: }
336:
337:
338:
1.2 timbl 339: /* Find physical name and access protocol
340: ** --------------------------------------
1.1 timbl 341: **
342: **
343: ** On entry,
344: ** addr must point to the fully qualified hypertext reference.
345: ** anchor a pareent anchor with whose address is addr
346: **
347: ** On exit,
1.2 timbl 348: ** returns HT_NO_ACCESS Error has occured.
349: ** HT_OK Success
1.1 timbl 350: **
351: */
1.21 luotonen 352: PRIVATE int get_physical ARGS1(HTRequest *, req)
353: {
1.1 timbl 354: char * access=0; /* Name of access method */
1.21 luotonen 355: char * addr = HTAnchor_address((HTAnchor*)req->anchor); /* free me */
1.27 luotonen 356:
1.35 luotonen 357: /*
358: ** This HACK is here until we have redirection implemented.
359: ** This is used when we are recursively calling HTLoad().
360: ** We then take the physical address, because currently the
361: ** virtual address is kept in a hash table so it can't be
362: ** changed -- otherwise it wouldn't be found anymore.
363: */
1.36 luotonen 364: if (HTAnchor_physical(req->anchor))
365: StrAllocCopy(addr, HTAnchor_physical(req->anchor));
1.35 luotonen 366:
1.2 timbl 367: #ifndef NO_RULES
1.47 luotonen 368: if (HTImServer) { /* cern_httpd has already done its own translations */
1.45 luotonen 369: HTAnchor_setPhysical(req->anchor, HTImServer);
1.47 luotonen 370: StrAllocCopy(addr, HTImServer); /* Oops, queries thru many proxies */
371: /* didn't work without this -- AL */
372: }
1.21 luotonen 373: else {
1.27 luotonen 374: char * physical = HTTranslate(addr);
1.21 luotonen 375: if (!physical) {
1.47 luotonen 376: free(addr);
1.21 luotonen 377: return HT_FORBIDDEN;
378: }
379: HTAnchor_setPhysical(req->anchor, physical);
380: free(physical); /* free our copy */
1.2 timbl 381: }
382: #else
1.21 luotonen 383: HTAnchor_setPhysical(req->anchor, addr);
1.2 timbl 384: #endif
385:
1.21 luotonen 386: access = HTParse(HTAnchor_physical(req->anchor),
1.27 luotonen 387: "file:", PARSE_ACCESS);
1.1 timbl 388:
389: /* Check whether gateway access has been set up for this
1.8 timbl 390: **
391: ** This function can be replaced by the rule system above.
1.1 timbl 392: */
1.8 timbl 393: #define USE_GATEWAYS
1.1 timbl 394: #ifdef USE_GATEWAYS
1.39 luotonen 395:
396: /* make sure the using_proxy variable is false */
397: using_proxy = NO;
398:
1.33 luotonen 399: if (!override_proxy(addr)) {
1.27 luotonen 400: char * gateway_parameter, *gateway, *proxy;
401:
1.2 timbl 402: gateway_parameter = (char *)malloc(strlen(access)+20);
403: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27 luotonen 404:
405: /* search for proxy gateways */
1.2 timbl 406: strcpy(gateway_parameter, "WWW_");
407: strcat(gateway_parameter, access);
408: strcat(gateway_parameter, "_GATEWAY");
409: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27 luotonen 410:
411: /* search for proxy servers */
412: strcpy(gateway_parameter, access);
413: strcat(gateway_parameter, "_proxy");
414: proxy = (char *)getenv(gateway_parameter);
415:
1.2 timbl 416: free(gateway_parameter);
1.27 luotonen 417:
418: if (TRACE && gateway)
419: fprintf(stderr,"Gateway found: %s\n",gateway);
420: if (TRACE && proxy)
421: fprintf(stderr,"Proxy server found: %s\n",proxy);
422:
1.8 timbl 423: #ifndef DIRECT_WAIS
1.9 timbl 424: if (!gateway && 0==strcmp(access, "wais")) {
1.8 timbl 425: gateway = DEFAULT_WAIS_GATEWAY;
426: }
427: #endif
1.27 luotonen 428:
429: /* proxy servers have precedence over gateway servers */
430: if (proxy) {
431: char * gatewayed=0;
432:
433: StrAllocCopy(gatewayed,proxy);
434: StrAllocCat(gatewayed,addr);
435: using_proxy = YES;
436: HTAnchor_setPhysical(req->anchor, gatewayed);
437: free(gatewayed);
438: free(access);
439:
440: access = HTParse(HTAnchor_physical(req->anchor),
441: "http:", PARSE_ACCESS);
442: } else if (gateway) {
1.9 timbl 443: char * path = HTParse(addr, "",
444: PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
445: /* Chop leading / off to make host into part of path */
446: char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
447: free(path);
1.21 luotonen 448: HTAnchor_setPhysical(req->anchor, gatewayed);
1.9 timbl 449: free(gatewayed);
1.2 timbl 450: free(access);
1.9 timbl 451:
1.21 luotonen 452: access = HTParse(HTAnchor_physical(req->anchor),
1.8 timbl 453: "http:", PARSE_ACCESS);
1.2 timbl 454: }
455: }
1.1 timbl 456: #endif
457:
1.19 timbl 458: free(addr);
1.1 timbl 459:
460:
461: /* Search registered protocols to find suitable one
462: */
463: {
1.20 luotonen 464: HTList *cur;
465: HTProtocol *p;
1.1 timbl 466: #ifndef NO_INIT
1.2 timbl 467: if (!protocols) HTAccessInit();
1.1 timbl 468: #endif
1.20 luotonen 469: cur = protocols;
470: while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.2 timbl 471: if (strcmp(p->name, access)==0) {
1.21 luotonen 472: HTAnchor_setProtocol(req->anchor, p);
1.2 timbl 473: free(access);
474: return (HT_OK);
1.1 timbl 475: }
476: }
477: }
478:
479: free(access);
1.2 timbl 480: return HT_NO_ACCESS;
1.1 timbl 481: }
482:
483:
484: /* Load a document
485: ** ---------------
486: **
1.2 timbl 487: ** This is an internal routine, which has an address AND a matching
488: ** anchor. (The public routines are called with one OR the other.)
489: **
490: ** On entry,
1.15 timbl 491: ** request->
1.35 luotonen 492: ** anchor a parent anchor with fully qualified
493: ** hypertext reference as its address set
1.15 timbl 494: ** output_format valid
495: ** output_stream valid on NULL
1.2 timbl 496: **
497: ** On exit,
498: ** returns <0 Error has occured.
499: ** HT_LOADED Success
500: ** HT_NO_DATA Success, but no document loaded.
1.8 timbl 501: ** (telnet sesssion started etc)
1.2 timbl 502: **
503: */
1.52 frystyk 504: PUBLIC int HTLoad ARGS2(HTRequest *, request, BOOL, keep_error_stack)
1.2 timbl 505: {
1.25 frystyk 506: char *arg = NULL;
507: HTProtocol *p;
508: int status;
509:
1.22 luotonen 510: if (request->method == METHOD_INVALID)
511: request->method = METHOD_GET;
1.52 frystyk 512: if (!keep_error_stack) {
513: HTErrorFree(request);
514: request->error_block = NO;
515: }
516:
1.21 luotonen 517: status = get_physical(request);
1.2 timbl 518: if (status == HT_FORBIDDEN) {
1.49 frystyk 519: char *url = HTAnchor_address((HTAnchor *) request->anchor);
520: if (url) {
521: HTUnEscape(url);
522: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
523: (void *) url, (int) strlen(url), "HTLoad");
524: free(url);
525: } else {
526: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
527: NULL, 0, "HTLoad");
528: }
529: return -1;
1.2 timbl 530: }
531: if (status < 0) return status; /* Can't resolve or forbidden */
1.25 frystyk 532:
533: if(!(arg = HTAnchor_physical(request->anchor)) || !*arg)
534: return (-1);
1.27 luotonen 535:
1.56 frystyk 536: p = (HTProtocol *) HTAnchor_protocol(request->anchor);
1.17 timbl 537: return (*(p->load))(request);
1.2 timbl 538: }
539:
540:
541: /* Get a save stream for a document
542: ** --------------------------------
543: */
1.19 timbl 544: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
1.15 timbl 545: {
546: HTProtocol * p;
1.19 timbl 547: int status;
1.22 luotonen 548: request->method = METHOD_PUT;
1.21 luotonen 549: status = get_physical(request);
1.19 timbl 550: if (status == HT_FORBIDDEN) {
1.49 frystyk 551: char *url = HTAnchor_address((HTAnchor *) request->anchor);
552: if (url) {
553: HTUnEscape(url);
554: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
555: (void *) url, (int) strlen(url), "HTLoad");
556: free(url);
557: } else {
558: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
559: NULL, 0, "HTLoad");
560: }
561: return NULL; /* should return error status? */
1.19 timbl 562: }
563: if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
564:
1.56 frystyk 565: p = (HTProtocol *) HTAnchor_protocol(request->anchor);
1.2 timbl 566: if (!p) return NULL;
567:
1.15 timbl 568: return (*p->saveStream)(request);
1.2 timbl 569:
570: }
571:
572:
573: /* Load a document - with logging etc
574: ** ----------------------------------
575: **
576: ** - Checks or documents already loaded
577: ** - Logs the access
578: ** - Allows stdin filter option
579: ** - Trace ouput and error messages
580: **
1.1 timbl 581: ** On Entry,
1.19 timbl 582: ** request->anchor valid for of the document to be accessed.
583: ** request->childAnchor optional anchor within doc to be selected
584: **
1.2 timbl 585: ** filter if YES, treat stdin as HTML
1.1 timbl 586: **
1.15 timbl 587: ** request->anchor is the node_anchor for the document
588: ** request->output_format is valid
589: **
1.1 timbl 590: ** On Exit,
591: ** returns YES Success in opening document
592: ** NO Failure
593: **
594: */
595:
1.52 frystyk 596: PRIVATE BOOL HTLoadDocument ARGS2(HTRequest *, request,
597: BOOL, keep_error_stack)
1.1 timbl 598:
599: {
600: int status;
601: HText * text;
1.19 timbl 602: char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
1.54 frystyk 603:
1.49 frystyk 604: if (TRACE) fprintf (stderr, "HTAccess.... Loading document %s\n",
605: full_address);
1.1 timbl 606:
1.18 timbl 607: request->using_cache = NULL;
608:
1.15 timbl 609: if (!request->output_format) request->output_format = WWW_PRESENT;
1.25 frystyk 610:
1.31 frystyk 611: if (!HTForceReload && (text=(HText *)HTAnchor_document(request->anchor)))
1.15 timbl 612: { /* Already loaded */
1.1 timbl 613: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
1.19 timbl 614: if (request->childAnchor) {
615: HText_selectAnchor(text, request->childAnchor);
616: } else {
617: HText_select(text);
618: }
619: free(full_address);
1.1 timbl 620: return YES;
621: }
1.17 timbl 622:
1.34 frystyk 623: /* Check the Cache */
1.17 timbl 624: /* Bug: for each format, we only check whether it is ok, we
625: don't check them all and chose the best */
1.54 frystyk 626: if (request->anchor->cacheItems) {
1.17 timbl 627: HTList * list = request->anchor->cacheItems;
1.20 luotonen 628: HTList * cur = list;
629: HTCacheItem * item;
630:
631: while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
1.18 timbl 632: HTStream * s;
633:
634: request->using_cache = item;
635:
1.37 luotonen 636: s = HTStreamStack(item->format, request, NO);
1.17 timbl 637: if (s) { /* format was suitable */
638: FILE * fp = fopen(item->filename, "r");
1.57 howcome 639: if (TRACE)
640: fprintf(stderr, "Cache: HIT file %s for %s\n",
1.20 luotonen 641: item->filename,
642: full_address);
1.17 timbl 643: if (fp) {
644: HTFileCopy(fp, s);
1.53 duns 645: (*s->isa->_free)(s); /* close up pipeline */
1.17 timbl 646: fclose(fp);
1.19 timbl 647: free(full_address);
1.17 timbl 648: return YES;
649: } else {
650: fprintf(stderr, "***** Can't read cache file %s !\n",
1.20 luotonen 651: item->filename);
1.17 timbl 652: } /* file open ok */
653: } /* stream ok */
654: } /* next cache item */
655: } /* if cache available for this anchor */
1.1 timbl 656:
1.52 frystyk 657: status = HTLoad(request, keep_error_stack);
1.2 timbl 658:
1.1 timbl 659: /* Log the access if necessary
660: */
1.42 frystyk 661: if (HTlogfile) {
1.1 timbl 662: time_t theTime;
663: time(&theTime);
1.42 frystyk 664: fprintf(HTlogfile, "%24.24s %s %s %s\n",
1.1 timbl 665: ctime(&theTime),
666: HTClientHost ? HTClientHost : "local",
667: status<0 ? "FAIL" : "GET",
668: full_address);
1.42 frystyk 669: fflush(HTlogfile); /* Actually update it on disk */
1.1 timbl 670: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
671: ctime(&theTime),
672: HTClientHost ? HTClientHost : "local",
673: status<0 ? "FAIL" : "GET",
674: full_address);
675: }
676:
1.52 frystyk 677: /* The error stack might contain general information to the client
678: about what has been going on in the library (not only errors) */
1.58 ! frystyk 679: if (!HTImProxy && request->error_stack)
1.52 frystyk 680: HTErrorMsg(request);
681:
1.1 timbl 682: if (status == HT_LOADED) {
683: if (TRACE) {
1.54 frystyk 684: fprintf(stderr, "HTAccess.... `%s' has been accessed.\n",
1.1 timbl 685: full_address);
686: }
1.19 timbl 687: free(full_address);
1.1 timbl 688: return YES;
689: }
690:
691: if (status == HT_NO_DATA) {
692: if (TRACE) {
693: fprintf(stderr,
1.54 frystyk 694: "HTAccess.... `%s' has been accessed, No data left.\n",
1.1 timbl 695: full_address);
696: }
1.19 timbl 697: free(full_address);
1.1 timbl 698: return NO;
699: }
700:
1.34 frystyk 701: /* Bug fix thanks to Lou Montulli. Henrik 10/03-94 */
1.58 ! frystyk 702: if (status <= 0) { /* Failure in accessing a document */
! 703: if (HTImProxy)
! 704: HTErrorMsg(request); /* Only on a real error */
! 705: if (PROT_TRACE)
! 706: fprintf(stderr, "HTAccess.... Can't access `%s'\n", full_address);
1.19 timbl 707: free(full_address);
1.1 timbl 708: return NO;
709: }
1.9 timbl 710:
711: /* If you get this, then please find which routine is returning
712: a positive unrecognised error code! */
1.1 timbl 713: fprintf(stderr,
1.50 frystyk 714: "**** HTAccess: Internal software error in CERN WWWLib version %s ****\n\nPlease mail www-bug@info.cern.ch quoting what software and what version you are using\nand the URL: %s that caused the problem, thanks!\n",
715: HTLibraryVersion,
716: full_address);
1.19 timbl 717: free(full_address);
718:
1.1 timbl 719: exit(-6996);
1.58 ! frystyk 720: return NO; /* For gcc :-( */
! 721: }
1.1 timbl 722:
723:
724: /* Load a document from absolute name
725: ** ---------------
726: **
727: ** On Entry,
728: ** addr The absolute address of the document to be accessed.
729: ** filter if YES, treat document as HTML
730: **
731: ** On Exit,
732: ** returns YES Success in opening document
733: ** NO Failure
734: **
735: **
736: */
737:
1.15 timbl 738: PUBLIC BOOL HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2 timbl 739: {
1.19 timbl 740: HTAnchor * anchor = HTAnchor_findAddress(addr);
741: request->anchor = HTAnchor_parent(anchor);
742: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
743: NULL : (HTChildAnchor*) anchor;
1.52 frystyk 744: return HTLoadDocument(request, NO);
1.2 timbl 745: }
746:
747:
748: /* Load a document from absolute name to stream
749: ** --------------------------------------------
750: **
751: ** On Entry,
752: ** addr The absolute address of the document to be accessed.
1.15 timbl 753: ** request->output_stream if non-NULL, send data down this stream
1.2 timbl 754: **
755: ** On Exit,
756: ** returns YES Success in opening document
757: ** NO Failure
758: **
759: **
760: */
761:
762: PUBLIC BOOL HTLoadToStream ARGS3(
763: CONST char *, addr,
764: BOOL, filter,
1.15 timbl 765: HTRequest*, request)
1.1 timbl 766: {
1.19 timbl 767: HTAnchor * anchor = HTAnchor_findAddress(addr);
768: request->anchor = HTAnchor_parent(anchor);
769: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
770: (HTChildAnchor*) anchor;
1.15 timbl 771: request->output_stream = request->output_stream;
1.52 frystyk 772: return HTLoadDocument(request, NO);
1.1 timbl 773: }
774:
775:
1.2 timbl 776:
777:
1.1 timbl 778: /* Load a document from relative name
779: ** ---------------
780: **
781: ** On Entry,
1.2 timbl 782: ** relative_name The relative address of the document
783: ** to be accessed.
1.1 timbl 784: **
785: ** On Exit,
786: ** returns YES Success in opening document
787: ** NO Failure
788: **
789: **
790: */
791:
1.15 timbl 792: PUBLIC BOOL HTLoadRelative ARGS3(
1.2 timbl 793: CONST char *, relative_name,
1.15 timbl 794: HTParentAnchor *, here,
1.20 luotonen 795: HTRequest *, request)
1.1 timbl 796: {
797: char * full_address = 0;
798: BOOL result;
799: char * mycopy = 0;
800: char * stripped = 0;
801: char * current_address =
1.2 timbl 802: HTAnchor_address((HTAnchor*)here);
1.1 timbl 803:
804: StrAllocCopy(mycopy, relative_name);
805:
806: stripped = HTStrip(mycopy);
807: full_address = HTParse(stripped,
808: current_address,
809: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15 timbl 810: result = HTLoadAbsolute(full_address, request);
1.1 timbl 811: free(full_address);
812: free(current_address);
813: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
814: return result;
815: }
816:
817:
818: /* Load if necessary, and select an anchor
819: ** --------------------------------------
820: **
821: ** On Entry,
822: ** destination The child or parenet anchor to be loaded.
823: **
824: ** On Exit,
825: ** returns YES Success
826: ** NO Failure
827: **
828: */
829:
1.15 timbl 830: PUBLIC BOOL HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1 timbl 831: {
1.15 timbl 832: if (!anchor) return NO; /* No link */
1.1 timbl 833:
1.15 timbl 834: request->anchor = HTAnchor_parent(anchor);
1.19 timbl 835: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
836: : (HTChildAnchor*) anchor;
1.1 timbl 837:
1.52 frystyk 838: return HTLoadDocument(request, NO) ? YES : NO;
839:
840: } /* HTLoadAnchor */
841:
842:
843: /* Load if necessary, and select an anchor
844: ** --------------------------------------
845: **
846: ** This function is almost identical to HTLoadAnchor, but it doesn't
847: ** clear the error stack so that the information in there is kept.
848: **
849: ** On Entry,
850: ** destination The child or parenet anchor to be loaded.
851: **
852: ** On Exit,
853: ** returns YES Success
854: ** NO Failure
855: **
856: */
857:
858: PUBLIC BOOL HTLoadAnchorRecursive ARGS2(HTAnchor*, anchor,
859: HTRequest *, request)
860: {
861: if (!anchor) return NO; /* No link */
862:
863: request->anchor = HTAnchor_parent(anchor);
864: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
865: : (HTChildAnchor*) anchor;
866:
867: return HTLoadDocument(request, YES) ? YES : NO;
1.1 timbl 868:
869: } /* HTLoadAnchor */
870:
871:
872: /* Search
873: ** ------
874: ** Performs a keyword search on word given by the user. Adds the keyword to
875: ** the end of the current address and attempts to open the new address.
876: **
877: ** On Entry,
878: ** *keywords space-separated keyword list or similar search list
1.2 timbl 879: ** here is anchor search is to be done on.
1.1 timbl 880: */
881:
1.56 frystyk 882: PRIVATE char hex ARGS1(int, i)
1.2 timbl 883: {
1.13 timbl 884: char * hexchars = "0123456789ABCDEF";
885: return hexchars[i];
1.2 timbl 886: }
1.1 timbl 887:
1.15 timbl 888: PUBLIC BOOL HTSearch ARGS3(
1.2 timbl 889: CONST char *, keywords,
1.15 timbl 890: HTParentAnchor *, here,
891: HTRequest *, request)
1.1 timbl 892: {
1.2 timbl 893:
894: #define acceptable \
895: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
896:
897: char *q, *u;
898: CONST char * p, *s, *e; /* Pointers into keywords */
899: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 900: BOOL result;
1.56 frystyk 901: char * escaped = (char *) malloc(strlen(keywords)*3+1);
1.2 timbl 902:
1.29 frystyk 903: /* static CONST BOOL isAcceptable[96] = */
904: /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30 luotonen 905: static BOOL isAcceptable[96] =
1.2 timbl 906: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
907: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
908: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
909: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
910: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
911: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
912: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
913:
914: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
915:
1.29 frystyk 916: /* Convert spaces to + and hex escape unacceptable characters */
1.2 timbl 917:
1.29 frystyk 918: for(s=keywords; *s && WHITE(*s); s++); /*scan */ /* Skip white space */
919: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
920: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
1.2 timbl 921: int c = (int)TOASCII(*p);
922: if (WHITE(*p)) {
923: *q++ = '+';
1.29 frystyk 924: } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13 timbl 925: *q++ = *p; /* 930706 TBL for MVS bug */
1.2 timbl 926: } else {
927: *q++ = '%';
928: *q++ = hex(c / 16);
929: *q++ = hex(c % 16);
930: }
931: } /* Loop over string */
1.1 timbl 932:
1.2 timbl 933: *q=0;
934: /* terminate escaped sctring */
935: u=strchr(address, '?'); /* Find old search string */
936: if (u) *u = 0; /* Chop old search off */
1.1 timbl 937:
938: StrAllocCat(address, "?");
1.2 timbl 939: StrAllocCat(address, escaped);
940: free(escaped);
1.15 timbl 941: result = HTLoadRelative(address, here, request);
1.1 timbl 942: free(address);
1.2 timbl 943:
1.1 timbl 944: return result;
1.2 timbl 945: }
946:
947:
948: /* Search Given Indexname
949: ** ------
950: ** Performs a keyword search on word given by the user. Adds the keyword to
951: ** the end of the current address and attempts to open the new address.
952: **
953: ** On Entry,
954: ** *keywords space-separated keyword list or similar search list
955: ** *addres is name of object search is to be done on.
956: */
957:
1.15 timbl 958: PUBLIC BOOL HTSearchAbsolute ARGS3(
1.2 timbl 959: CONST char *, keywords,
1.15 timbl 960: CONST char *, indexname,
961: HTRequest *, request)
1.2 timbl 962: {
963: HTParentAnchor * anchor =
964: (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15 timbl 965: return HTSearch(keywords, anchor, request);
1.57 howcome 966: }
967:
968:
969: /*
970: ** Find Related Name
971: **
972: ** Creates a string that can be used as a related name when
973: ** calling HTParse initially.
974: **
975: ** The code for this routine originates from the Linemode
976: ** browser and was moved here by howcome@dxcern.cern.ch
977: ** in order for all clients to take advantage.
978: **
979: */
980:
981:
982: PUBLIC char * HTFindRelatedName NOARGS
983: {
984: char* default_default=0; /* Parse home relative to this */
985:
986: StrAllocCopy(default_default, "file://");
987: StrAllocCat(default_default, HTGetHostName()); /*eg file://dxcern.cern.ch*/
988:
989: #ifndef MAXPATHLEN
990: #define NO_GETWD /* Assume no getwd() if no MAXPATHLEN */
991: #endif
992:
993: #ifdef NO_GETWD /* No getwd() on this machine */
994: #ifdef HAS_GETCWD /* System V variant SIGN CHANGED TBL 921006 !! */
995:
996: {
997: char wd[1024]; /*!! Arbitrary*/
998: char * result = getcwd(wd, sizeof(wd));
999: if (result) {
1000:
1001: #ifdef VMS
1002: /* convert directory name to Unix-style syntax */
1003: char * disk = strchr (wd, ':');
1004: char * dir = strchr (wd, '[');
1005: if (disk) {
1006: *disk = '\0';
1007: StrAllocCat (default_default, "/"); /* needs delimiter */
1008: StrAllocCat (default_default, wd);
1009: }
1010: if (dir) {
1011: char *p;
1012: *dir = '/'; /* Convert leading '[' */
1013: for (p = dir ; *p != ']'; ++p)
1014: if (*p == '.') *p = '/';
1015: *p = '\0'; /* Cut on final ']' */
1016: StrAllocCat (default_default, dir);
1017: }
1018: #else /* not VMS */
1019: StrAllocCat (default_default, wd);
1020: #endif /* not VMS */
1021: } else {
1022: fprintf(stderr,"Can't read working directory (getcwd)", NULL);
1023: }
1024: } /* end if good getcwd result */
1025:
1026: #else /* has NO getcwd */
1027:
1028: fprintf(stderr,"This platform does not support getwd() or getcwd()", NULL);
1029:
1030: #endif /* has no getcwd */
1031:
1032: #else /* has getwd */
1033: {
1034: char wd[MAXPATHLEN];
1035: char * result = (char *) getwd(wd);
1036: if (result) {
1037: StrAllocCat(default_default, wd);
1038: } else {
1039: fprintf(stderr,"Can't read working directory.");
1040: }
1041: }
1042: #endif
1043:
1044: StrAllocCat(default_default, "/default.html");
1045: return default_default;
1.2 timbl 1046: }
1047:
1048:
1049: /* Generate the anchor for the home page
1050: ** -------------------------------------
1051: **
1052: ** As it involves file access, this should only be done once
1053: ** when the program first runs.
1.10 timbl 1054: ** This is a default algorithm -- browser don't HAVE to use this.
1055: ** But consistency betwen browsers is STRONGLY recommended!
1.2 timbl 1056: **
1.10 timbl 1057: ** Priority order is:
1058: **
1059: ** 1 WWW_HOME environment variable (logical name, etc)
1060: ** 2 ~/WWW/default.html
1061: ** 3 /usr/local/bin/default.html
1062: ** 4 http://info.cern.ch/default.html
1063: **
1.2 timbl 1064: */
1065: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
1066: {
1.12 timbl 1067: char * my_home_document = NULL;
1068: char * home = (char *)getenv(LOGICAL_DEFAULT);
1.2 timbl 1069: char * ref;
1070: HTParentAnchor * anchor;
1.1 timbl 1071:
1.12 timbl 1072: if (home) {
1073: StrAllocCopy(my_home_document, home);
1074:
1075: /* Someone telnets in, they get a special home.
1076: */
1077: #define MAX_FILE_NAME 1024 /* @@@ */
1078: } else if (HTClientHost) { /* Telnet server */
1079: FILE * fp = fopen(REMOTE_POINTER, "r");
1080: char * status;
1081: if (fp) {
1082: my_home_document = (char*) malloc(MAX_FILE_NAME);
1083: status = fgets(my_home_document, MAX_FILE_NAME, fp);
1084: if (!status) {
1085: free(my_home_document);
1086: my_home_document = NULL;
1087: }
1088: fclose(fp);
1089: }
1090: if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
1091: }
1092:
1093:
1094:
1.2 timbl 1095: #ifdef unix
1.12 timbl 1096:
1.10 timbl 1097: if (!my_home_document) {
1098: FILE * fp = NULL;
1099: CONST char * home = (CONST char*)getenv("HOME");
1100: if (home) {
1101: my_home_document = (char *)malloc(
1102: strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
1103: if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
1104: sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
1105: fp = fopen(my_home_document, "r");
1106: }
1107:
1108: if (!fp) {
1109: StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
1110: fp = fopen(my_home_document, "r");
1111: }
1.2 timbl 1112: if (fp) {
1113: fclose(fp);
1114: } else {
1115: if (TRACE) fprintf(stderr,
1.10 timbl 1116: "HTBrowse: No local home document ~/%s or %s\n",
1117: PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11 timbl 1118: free(my_home_document);
1119: my_home_document = NULL;
1.2 timbl 1120: }
1121: }
1122: #endif
1.10 timbl 1123: ref = HTParse( my_home_document ? my_home_document :
1124: HTClientHost ? REMOTE_ADDRESS
1125: : LAST_RESORT,
1126: "file:",
1.2 timbl 1127: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10 timbl 1128: if (my_home_document) {
1.2 timbl 1129: if (TRACE) fprintf(stderr,
1130: "HTAccess: Using custom home page %s i.e. address %s\n",
1.10 timbl 1131: my_home_document, ref);
1132: free(my_home_document);
1.2 timbl 1133: }
1134: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
1135: free(ref);
1136: return anchor;
1.1 timbl 1137: }
1.26 frystyk 1138:
1139:
1140: /* Bind an Anchor to the request structure
1141: ** ---------------------------------------
1142: **
1143: ** On Entry,
1144: ** anchor The child or parenet anchor to be binded
1145: ** request The request sturcture
1146: ** On Exit,
1147: ** returns YES Success
1148: ** NO Failure
1149: **
1150: ** Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
1151: ** Henrik Frystyk 17/02-94
1152: */
1153:
1154: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1155: {
1156: if (!anchor) return NO; /* No link */
1157:
1158: request->anchor = HTAnchor_parent(anchor);
1159: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
1160: : (HTChildAnchor*) anchor;
1161:
1.29 frystyk 1162: return YES;
1.26 frystyk 1163: } /* HTBindAnchor */
1164:
Webmaster