Annotation of libwww/Library/src/HTAccess.c, revision 1.57
1.1 timbl 1: /* Access Manager HTAccess.c
2: ** ==============
3: **
4: ** Authors
5: ** TBL Tim Berners-Lee timbl@info.cern.ch
1.4 timbl 6: ** JFG Jean-Francois Groff jfg@dxcern.cern.ch
1.1 timbl 7: ** DD Denis DeLaRoca (310) 825-4580 <CSP1DWD@mvs.oac.ucla.edu>
8: ** History
9: ** 8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
10: ** 26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42 frystyk 11: ** 6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1 timbl 12: ** 17 Dec 92 Tn3270 added, bug fix. DD
1.2 timbl 13: ** 4 Feb 93 Access registration, Search escapes bad chars TBL
1.9 timbl 14: ** PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
15: ** 28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19 timbl 16: ** Dec 93 Bug change around, more reentrant, etc
1.42 frystyk 17: ** 09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.53 duns 18: ** 8 Jul 94 Insulate free() from _free structure element.
1.2 timbl 19: ** Bugs
20: ** This module assumes that that the graphic object is hypertext, as it
1.9 timbl 21: ** needs to select it when it has been loaded. A superclass needs to be
1.2 timbl 22: ** defined which accepts select and select_anchor.
1.1 timbl 23: */
24:
1.9 timbl 25: #ifndef DEFAULT_WAIS_GATEWAY
1.8 timbl 26: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.54 frystyk 27: #endif
1.8 timbl 28:
1.1 timbl 29: /* Implements:
30: */
31: #include "HTAccess.h"
32:
33: /* Uses:
34: */
35:
36: #include "HTParse.h"
37: #include "HTUtils.h"
1.4 timbl 38: #include "HTML.h" /* SCW */
1.2 timbl 39:
40: #ifndef NO_RULES
41: #include "HTRules.h"
42: #endif
43:
44: #include "HTList.h"
45: #include "HText.h" /* See bugs above */
46: #include "HTAlert.h"
1.17 timbl 47: #include "HTFWriter.h" /* for cache stuff */
48: #include "HTTee.h"
1.46 frystyk 49: #include "HTError.h"
1.57 ! howcome 50: #include "HTTCP.h" /* HWL: for HTFindRelatedName */
1.2 timbl 51:
1.54 frystyk 52: /* These flags may be set to modify the operation of this module */
53: PUBLIC char * HTCacheDir = NULL; /* Root for cached files or 0 for no cache */
54: PUBLIC char * HTSaveLocallyDir = SAVE_LOCALLY_HOME_DIR; /* Save & exe files */
55: PUBLIC char * HTClientHost = 0; /* Name of remote login host if any */
56: PUBLIC FILE * HTlogfile = 0; /* File to which to output one-liners */
1.41 luotonen 57:
1.34 frystyk 58: PUBLIC BOOL HTForceReload = NO; /* Force reload from cache or net */
1.12 timbl 59: PUBLIC BOOL HTSecure = NO; /* Disable access for telnet users? */
1.27 luotonen 60: PUBLIC BOOL using_proxy = NO; /* are we using a proxy gateway? */
1.43 luotonen 61: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.27 luotonen 62: PUBLIC BOOL HTImProxy = NO; /* cern_httpd as a proxy? */
1.1 timbl 63:
1.43 luotonen 64:
1.2 timbl 65: /* To generate other things, play with these:
66: */
67:
1.15 timbl 68: /* PUBLIC HTFormat HTOutputFormat = NULL; use request->output_format */
69: /* PUBLIC HTStream* HTOutputStream = NULL; use request->output_stream */
1.1 timbl 70:
71: PRIVATE HTList * protocols = NULL; /* List of registered protocol descriptors */
72:
1.24 timbl 73: /* Superclass defn */
1.1 timbl 74:
1.24 timbl 75: struct _HTStream {
76: HTStreamClass * isa;
77: /* ... */
78: };
79:
1.15 timbl 80: /* Create a request structure
81: ** ---------------------------
82: */
83:
84: PUBLIC HTRequest * HTRequest_new NOARGS
85: {
1.28 luotonen 86: HTRequest * me = (HTRequest*) calloc(1, sizeof(*me)); /* zero fill */
1.15 timbl 87: if (!me) outofmem(__FILE__, "HTRequest_new()");
88:
1.20 luotonen 89: me->conversions = HTList_new(); /* No conversions registerd yet */
90: me->output_format = WWW_PRESENT; /* default it to present to user */
91:
1.15 timbl 92: return me;
93: }
94:
95:
1.49 frystyk 96: /* Clear a request structure
97: ** ---------------------------
98: ** This function clears the reguest structure so that only the
99: ** conversions remain. Everything else is as if it was created from
100: ** scratch.
101: */
102: PUBLIC void HTRequest_clear ARGS1(HTRequest *, req)
103: {
104: HTList *conversions;
105: if (!req) {
106: if (TRACE)
107: fprintf(stderr, "Clear....... request: Bad argument!\n");
108: return;
109: }
110: conversions = req->conversions; /* Save the conversions */
111: HTErrorFree(req);
112: HTAACleanup(req);
113: FREE(req->from);
114: memset(req, '\0', sizeof(HTRequest));
115:
116: /* Now initialize as from scratch but with the old list of conversions */
117: req->conversions = conversions;
118: req->output_format = WWW_PRESENT; /* default it to present to user */
119: }
120:
121:
1.20 luotonen 122: /* Delete a request structure
123: ** --------------------------
124: */
125: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
126: {
127: if (req) {
1.34 frystyk 128: HTFormatDelete(req->conversions);
1.46 frystyk 129: HTErrorFree(req);
1.34 frystyk 130: HTAACleanup(req);
1.37 luotonen 131: FREE(req->from);
1.34 frystyk 132: FREE(req);
1.20 luotonen 133: }
134: }
135:
136:
1.22 luotonen 137: PRIVATE char * method_names[(int)MAX_METHODS + 1] =
138: {
139: "INVALID-METHOD",
140: "GET",
141: "HEAD",
142: "POST",
143: "PUT",
144: "DELETE",
145: "CHECKOUT",
146: "CHECKIN",
147: "SHOWMETHOD",
148: "LINK",
149: "UNLINK",
150: NULL
151: };
152:
153: /* Get method enum value
154: ** ---------------------
155: */
156: PUBLIC HTMethod HTMethod_enum ARGS1(char *, name)
157: {
158: if (name) {
159: int i;
160: for (i=1; i < (int)MAX_METHODS; i++)
161: if (!strcmp(name, method_names[i]))
162: return (HTMethod)i;
163: }
164: return METHOD_INVALID;
165: }
166:
167:
168: /* Get method name
169: ** ---------------
170: */
171: PUBLIC char * HTMethod_name ARGS1(HTMethod, method)
172: {
173: if ((int)method > (int)METHOD_INVALID &&
174: (int)method < (int)MAX_METHODS)
175: return method_names[(int)method];
176: else
177: return method_names[(int)METHOD_INVALID];
178: }
179:
180:
181: /* Is method in a list of method names?
182: ** -----------------------------------
183: */
184: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod, method,
185: HTList *, list)
186: {
187: char * method_name = HTMethod_name(method);
188: HTList *cur = list;
189: char *item;
190:
191: while (NULL != (item = (char*)HTList_nextObject(cur))) {
192: CTRACE(stderr, " %s", item);
193: if (0==strcasecomp(item, method_name))
194: return YES;
195: }
196: return NO; /* Not found */
197: }
198:
199:
200:
1.1 timbl 201: /* Register a Protocol HTRegisterProtocol
202: ** -------------------
203: */
204:
1.56 frystyk 205: PUBLIC BOOL HTRegisterProtocol ARGS1(HTProtocol *, protocol)
1.1 timbl 206: {
207: if (!protocols) protocols = HTList_new();
208: HTList_addObject(protocols, protocol);
209: return YES;
210: }
211:
212:
213: /* Register all known protocols
214: ** ----------------------------
215: **
216: ** Add to or subtract from this list if you add or remove protocol modules.
217: ** This routine is called the first time the protocol list is needed,
1.52 frystyk 218: ** unless any protocols are already registered, in which case it is not
219: ** called. Therefore the application can override this list.
1.1 timbl 220: **
221: ** Compiling with NO_INIT prevents all known protocols from being forced
222: ** in at link time.
223: */
224: #ifndef NO_INIT
225: PRIVATE void HTAccessInit NOARGS /* Call me once */
226: {
1.14 duns 227: GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1 timbl 228: #ifndef DECNET
1.54 frystyk 229: #ifdef NEW_CODE
230: GLOBALREF HTProtocol HTFTP, HTNews, HTNNTP, HTGopher;
231: #endif
1.14 duns 232: GLOBALREF HTProtocol HTFTP, HTNews, HTGopher;
1.42 frystyk 233:
1.3 timbl 234: #ifdef DIRECT_WAIS
1.14 duns 235: GLOBALREF HTProtocol HTWAIS;
1.3 timbl 236: #endif
1.2 timbl 237: HTRegisterProtocol(&HTFTP);
238: HTRegisterProtocol(&HTNews);
1.54 frystyk 239: #ifdef NEW_CODE
240: HTRegisterProtocol(&HTNNTP);
241: #endif
1.2 timbl 242: HTRegisterProtocol(&HTGopher);
1.42 frystyk 243:
1.3 timbl 244: #ifdef DIRECT_WAIS
245: HTRegisterProtocol(&HTWAIS);
246: #endif
1.1 timbl 247:
1.54 frystyk 248: #endif /* DECNET */
1.2 timbl 249: HTRegisterProtocol(&HTTP);
250: HTRegisterProtocol(&HTFile);
251: HTRegisterProtocol(&HTTelnet);
252: HTRegisterProtocol(&HTTn3270);
253: HTRegisterProtocol(&HTRlogin);
1.1 timbl 254: }
255: #endif
256:
257:
1.33 luotonen 258:
259: /* override_proxy()
260: **
261: ** Check the no_proxy environment variable to get the list
262: ** of hosts for which proxy server is not consulted.
263: **
264: ** no_proxy is a comma- or space-separated list of machine
265: ** or domain names, with optional :port part. If no :port
266: ** part is present, it applies to all ports on that domain.
267: **
268: ** Example:
269: ** no_proxy="cern.ch,some.domain:8001"
270: **
271: */
272: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
273: {
274: CONST char * no_proxy = getenv("no_proxy");
275: char * p = NULL;
276: char * host = NULL;
277: int port = 0;
278: int h_len = 0;
279:
280: if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
281: return NO;
282: if (!*host) { free(host); return NO; }
283:
1.34 frystyk 284: if ((p = strchr(host, ':')) != NULL) { /* Port specified */
1.33 luotonen 285: *p++ = 0; /* Chop off port */
286: port = atoi(p);
287: }
288: else { /* Use default port */
289: char * access = HTParse(addr, "", PARSE_ACCESS);
290: if (access) {
291: if (!strcmp(access,"http")) port = 80;
292: else if (!strcmp(access,"gopher")) port = 70;
293: else if (!strcmp(access,"ftp")) port = 21;
294: free(access);
295: }
296: }
297: if (!port) port = 80; /* Default */
298: h_len = strlen(host);
299:
300: while (*no_proxy) {
301: CONST char * end;
302: CONST char * colon = NULL;
303: int templ_port = 0;
304: int t_len;
305:
306: while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
307: no_proxy++; /* Skip whitespace and separators */
308:
309: end = no_proxy;
310: while (*end && !WHITE(*end) && *end != ',') { /* Find separator */
311: if (*end==':') colon = end; /* Port number given */
312: end++;
313: }
314:
315: if (colon) {
316: templ_port = atoi(colon+1);
317: t_len = colon - no_proxy;
318: }
319: else {
320: t_len = end - no_proxy;
321: }
322:
323: if ((!templ_port || templ_port == port) &&
324: (t_len > 0 && t_len <= h_len &&
325: !strncmp(host + h_len - t_len, no_proxy, t_len))) {
326: free(host);
327: return YES;
328: }
329: if (*end) no_proxy = end+1;
330: else break;
331: }
332:
333: free(host);
334: return NO;
335: }
336:
337:
338:
1.2 timbl 339: /* Find physical name and access protocol
340: ** --------------------------------------
1.1 timbl 341: **
342: **
343: ** On entry,
344: ** addr must point to the fully qualified hypertext reference.
345: ** anchor a pareent anchor with whose address is addr
346: **
347: ** On exit,
1.2 timbl 348: ** returns HT_NO_ACCESS Error has occured.
349: ** HT_OK Success
1.1 timbl 350: **
351: */
1.21 luotonen 352: PRIVATE int get_physical ARGS1(HTRequest *, req)
353: {
1.1 timbl 354: char * access=0; /* Name of access method */
1.21 luotonen 355: char * addr = HTAnchor_address((HTAnchor*)req->anchor); /* free me */
1.27 luotonen 356:
1.35 luotonen 357: /*
358: ** This HACK is here until we have redirection implemented.
359: ** This is used when we are recursively calling HTLoad().
360: ** We then take the physical address, because currently the
361: ** virtual address is kept in a hash table so it can't be
362: ** changed -- otherwise it wouldn't be found anymore.
363: */
1.36 luotonen 364: if (HTAnchor_physical(req->anchor))
365: StrAllocCopy(addr, HTAnchor_physical(req->anchor));
1.35 luotonen 366:
1.2 timbl 367: #ifndef NO_RULES
1.47 luotonen 368: if (HTImServer) { /* cern_httpd has already done its own translations */
1.45 luotonen 369: HTAnchor_setPhysical(req->anchor, HTImServer);
1.47 luotonen 370: StrAllocCopy(addr, HTImServer); /* Oops, queries thru many proxies */
371: /* didn't work without this -- AL */
372: }
1.21 luotonen 373: else {
1.27 luotonen 374: char * physical = HTTranslate(addr);
1.21 luotonen 375: if (!physical) {
1.47 luotonen 376: free(addr);
1.21 luotonen 377: return HT_FORBIDDEN;
378: }
379: HTAnchor_setPhysical(req->anchor, physical);
380: free(physical); /* free our copy */
1.2 timbl 381: }
382: #else
1.21 luotonen 383: HTAnchor_setPhysical(req->anchor, addr);
1.2 timbl 384: #endif
385:
1.21 luotonen 386: access = HTParse(HTAnchor_physical(req->anchor),
1.27 luotonen 387: "file:", PARSE_ACCESS);
1.1 timbl 388:
389: /* Check whether gateway access has been set up for this
1.8 timbl 390: **
391: ** This function can be replaced by the rule system above.
1.1 timbl 392: */
1.8 timbl 393: #define USE_GATEWAYS
1.1 timbl 394: #ifdef USE_GATEWAYS
1.39 luotonen 395:
396: /* make sure the using_proxy variable is false */
397: using_proxy = NO;
398:
1.33 luotonen 399: if (!override_proxy(addr)) {
1.27 luotonen 400: char * gateway_parameter, *gateway, *proxy;
401:
1.2 timbl 402: gateway_parameter = (char *)malloc(strlen(access)+20);
403: if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27 luotonen 404:
405: /* search for proxy gateways */
1.2 timbl 406: strcpy(gateway_parameter, "WWW_");
407: strcat(gateway_parameter, access);
408: strcat(gateway_parameter, "_GATEWAY");
409: gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27 luotonen 410:
411: /* search for proxy servers */
412: strcpy(gateway_parameter, access);
413: strcat(gateway_parameter, "_proxy");
414: proxy = (char *)getenv(gateway_parameter);
415:
1.2 timbl 416: free(gateway_parameter);
1.27 luotonen 417:
418: if (TRACE && gateway)
419: fprintf(stderr,"Gateway found: %s\n",gateway);
420: if (TRACE && proxy)
421: fprintf(stderr,"Proxy server found: %s\n",proxy);
422:
1.8 timbl 423: #ifndef DIRECT_WAIS
1.9 timbl 424: if (!gateway && 0==strcmp(access, "wais")) {
1.8 timbl 425: gateway = DEFAULT_WAIS_GATEWAY;
426: }
427: #endif
1.27 luotonen 428:
429: /* proxy servers have precedence over gateway servers */
430: if (proxy) {
431: char * gatewayed=0;
432:
433: StrAllocCopy(gatewayed,proxy);
434: StrAllocCat(gatewayed,addr);
435: using_proxy = YES;
436: HTAnchor_setPhysical(req->anchor, gatewayed);
437: free(gatewayed);
438: free(access);
439:
440: access = HTParse(HTAnchor_physical(req->anchor),
441: "http:", PARSE_ACCESS);
442: } else if (gateway) {
1.9 timbl 443: char * path = HTParse(addr, "",
444: PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
445: /* Chop leading / off to make host into part of path */
446: char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
447: free(path);
1.21 luotonen 448: HTAnchor_setPhysical(req->anchor, gatewayed);
1.9 timbl 449: free(gatewayed);
1.2 timbl 450: free(access);
1.9 timbl 451:
1.21 luotonen 452: access = HTParse(HTAnchor_physical(req->anchor),
1.8 timbl 453: "http:", PARSE_ACCESS);
1.2 timbl 454: }
455: }
1.1 timbl 456: #endif
457:
1.19 timbl 458: free(addr);
1.1 timbl 459:
460:
461: /* Search registered protocols to find suitable one
462: */
463: {
1.20 luotonen 464: HTList *cur;
465: HTProtocol *p;
1.1 timbl 466: #ifndef NO_INIT
1.2 timbl 467: if (!protocols) HTAccessInit();
1.1 timbl 468: #endif
1.20 luotonen 469: cur = protocols;
470: while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.2 timbl 471: if (strcmp(p->name, access)==0) {
1.21 luotonen 472: HTAnchor_setProtocol(req->anchor, p);
1.2 timbl 473: free(access);
474: return (HT_OK);
1.1 timbl 475: }
476: }
477: }
478:
479: free(access);
1.2 timbl 480: return HT_NO_ACCESS;
1.1 timbl 481: }
482:
483:
484: /* Load a document
485: ** ---------------
486: **
1.2 timbl 487: ** This is an internal routine, which has an address AND a matching
488: ** anchor. (The public routines are called with one OR the other.)
489: **
490: ** On entry,
1.15 timbl 491: ** request->
1.35 luotonen 492: ** anchor a parent anchor with fully qualified
493: ** hypertext reference as its address set
1.15 timbl 494: ** output_format valid
495: ** output_stream valid on NULL
1.2 timbl 496: **
497: ** On exit,
498: ** returns <0 Error has occured.
499: ** HT_LOADED Success
500: ** HT_NO_DATA Success, but no document loaded.
1.8 timbl 501: ** (telnet sesssion started etc)
1.2 timbl 502: **
503: */
1.52 frystyk 504: PUBLIC int HTLoad ARGS2(HTRequest *, request, BOOL, keep_error_stack)
1.2 timbl 505: {
1.25 frystyk 506: char *arg = NULL;
507: HTProtocol *p;
508: int status;
509:
1.22 luotonen 510: if (request->method == METHOD_INVALID)
511: request->method = METHOD_GET;
1.52 frystyk 512: if (!keep_error_stack) {
513: HTErrorFree(request);
514: request->error_block = NO;
515: }
516:
1.21 luotonen 517: status = get_physical(request);
1.2 timbl 518: if (status == HT_FORBIDDEN) {
1.49 frystyk 519: char *url = HTAnchor_address((HTAnchor *) request->anchor);
520: if (url) {
521: HTUnEscape(url);
522: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
523: (void *) url, (int) strlen(url), "HTLoad");
524: free(url);
525: } else {
526: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
527: NULL, 0, "HTLoad");
528: }
529: return -1;
1.2 timbl 530: }
531: if (status < 0) return status; /* Can't resolve or forbidden */
1.25 frystyk 532:
533: if(!(arg = HTAnchor_physical(request->anchor)) || !*arg)
534: return (-1);
1.27 luotonen 535:
1.56 frystyk 536: p = (HTProtocol *) HTAnchor_protocol(request->anchor);
1.17 timbl 537: return (*(p->load))(request);
1.2 timbl 538: }
539:
540:
541: /* Get a save stream for a document
542: ** --------------------------------
543: */
1.19 timbl 544: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
1.15 timbl 545: {
546: HTProtocol * p;
1.19 timbl 547: int status;
1.22 luotonen 548: request->method = METHOD_PUT;
1.21 luotonen 549: status = get_physical(request);
1.19 timbl 550: if (status == HT_FORBIDDEN) {
1.49 frystyk 551: char *url = HTAnchor_address((HTAnchor *) request->anchor);
552: if (url) {
553: HTUnEscape(url);
554: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
555: (void *) url, (int) strlen(url), "HTLoad");
556: free(url);
557: } else {
558: HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
559: NULL, 0, "HTLoad");
560: }
561: return NULL; /* should return error status? */
1.19 timbl 562: }
563: if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
564:
1.56 frystyk 565: p = (HTProtocol *) HTAnchor_protocol(request->anchor);
1.2 timbl 566: if (!p) return NULL;
567:
1.15 timbl 568: return (*p->saveStream)(request);
1.2 timbl 569:
570: }
571:
572:
573: /* Load a document - with logging etc
574: ** ----------------------------------
575: **
576: ** - Checks or documents already loaded
577: ** - Logs the access
578: ** - Allows stdin filter option
579: ** - Trace ouput and error messages
580: **
1.1 timbl 581: ** On Entry,
1.19 timbl 582: ** request->anchor valid for of the document to be accessed.
583: ** request->childAnchor optional anchor within doc to be selected
584: **
1.2 timbl 585: ** filter if YES, treat stdin as HTML
1.1 timbl 586: **
1.15 timbl 587: ** request->anchor is the node_anchor for the document
588: ** request->output_format is valid
589: **
1.1 timbl 590: ** On Exit,
591: ** returns YES Success in opening document
592: ** NO Failure
593: **
594: */
595:
1.52 frystyk 596: PRIVATE BOOL HTLoadDocument ARGS2(HTRequest *, request,
597: BOOL, keep_error_stack)
1.1 timbl 598:
599: {
600: int status;
601: HText * text;
1.19 timbl 602: char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
1.54 frystyk 603:
1.49 frystyk 604: if (TRACE) fprintf (stderr, "HTAccess.... Loading document %s\n",
605: full_address);
1.1 timbl 606:
1.18 timbl 607: request->using_cache = NULL;
608:
1.15 timbl 609: if (!request->output_format) request->output_format = WWW_PRESENT;
1.25 frystyk 610:
1.31 frystyk 611: if (!HTForceReload && (text=(HText *)HTAnchor_document(request->anchor)))
1.15 timbl 612: { /* Already loaded */
1.1 timbl 613: if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
1.19 timbl 614: if (request->childAnchor) {
615: HText_selectAnchor(text, request->childAnchor);
616: } else {
617: HText_select(text);
618: }
619: free(full_address);
1.1 timbl 620: return YES;
621: }
1.17 timbl 622:
1.34 frystyk 623: /* Check the Cache */
1.17 timbl 624: /* Bug: for each format, we only check whether it is ok, we
625: don't check them all and chose the best */
1.54 frystyk 626: if (request->anchor->cacheItems) {
1.17 timbl 627: HTList * list = request->anchor->cacheItems;
1.20 luotonen 628: HTList * cur = list;
629: HTCacheItem * item;
630:
631: while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
1.18 timbl 632: HTStream * s;
633:
634: request->using_cache = item;
635:
1.37 luotonen 636: s = HTStreamStack(item->format, request, NO);
1.17 timbl 637: if (s) { /* format was suitable */
638: FILE * fp = fopen(item->filename, "r");
1.57 ! howcome 639: if (TRACE)
! 640: fprintf(stderr, "Cache: HIT file %s for %s\n",
1.20 luotonen 641: item->filename,
642: full_address);
1.17 timbl 643: if (fp) {
644: HTFileCopy(fp, s);
1.53 duns 645: (*s->isa->_free)(s); /* close up pipeline */
1.17 timbl 646: fclose(fp);
1.19 timbl 647: free(full_address);
1.17 timbl 648: return YES;
649: } else {
650: fprintf(stderr, "***** Can't read cache file %s !\n",
1.20 luotonen 651: item->filename);
1.17 timbl 652: } /* file open ok */
653: } /* stream ok */
654: } /* next cache item */
655: } /* if cache available for this anchor */
1.1 timbl 656:
1.52 frystyk 657: status = HTLoad(request, keep_error_stack);
1.2 timbl 658:
1.1 timbl 659: /* Log the access if necessary
660: */
1.42 frystyk 661: if (HTlogfile) {
1.1 timbl 662: time_t theTime;
663: time(&theTime);
1.42 frystyk 664: fprintf(HTlogfile, "%24.24s %s %s %s\n",
1.1 timbl 665: ctime(&theTime),
666: HTClientHost ? HTClientHost : "local",
667: status<0 ? "FAIL" : "GET",
668: full_address);
1.42 frystyk 669: fflush(HTlogfile); /* Actually update it on disk */
1.1 timbl 670: if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
671: ctime(&theTime),
672: HTClientHost ? HTClientHost : "local",
673: status<0 ? "FAIL" : "GET",
674: full_address);
675: }
676:
1.52 frystyk 677: /* The error stack might contain general information to the client
678: about what has been going on in the library (not only errors) */
679: if (request->error_stack)
680: HTErrorMsg(request);
681:
1.1 timbl 682: if (status == HT_LOADED) {
683: if (TRACE) {
1.54 frystyk 684: fprintf(stderr, "HTAccess.... `%s' has been accessed.\n",
1.1 timbl 685: full_address);
686: }
1.19 timbl 687: free(full_address);
1.1 timbl 688: return YES;
689: }
690:
691: if (status == HT_NO_DATA) {
692: if (TRACE) {
693: fprintf(stderr,
1.54 frystyk 694: "HTAccess.... `%s' has been accessed, No data left.\n",
1.1 timbl 695: full_address);
696: }
1.19 timbl 697: free(full_address);
1.1 timbl 698: return NO;
699: }
700:
1.34 frystyk 701: /* Bug fix thanks to Lou Montulli. Henrik 10/03-94 */
702: if (status<=0) { /* Failure in accessing a document */
1.1 timbl 703: #ifdef CURSES
704: user_message("Can't access `%s'", full_address);
705: #else
1.5 timbl 706: if (TRACE) fprintf(stderr,
707: "HTAccess: Can't access `%s'\n", full_address);
1.1 timbl 708: #endif
1.19 timbl 709: free(full_address);
1.1 timbl 710: return NO;
711: }
1.9 timbl 712:
713: /* If you get this, then please find which routine is returning
714: a positive unrecognised error code! */
1.1 timbl 715: fprintf(stderr,
1.50 frystyk 716: "**** HTAccess: Internal software error in CERN WWWLib version %s ****\n\nPlease mail www-bug@info.cern.ch quoting what software and what version you are using\nand the URL: %s that caused the problem, thanks!\n",
717: HTLibraryVersion,
718: full_address);
1.19 timbl 719: free(full_address);
720:
1.1 timbl 721: exit(-6996);
1.20 luotonen 722: return NO; /* For gcc :-( */
1.2 timbl 723: } /* HTLoadDocument */
1.1 timbl 724:
725:
726:
727: /* Load a document from absolute name
728: ** ---------------
729: **
730: ** On Entry,
731: ** addr The absolute address of the document to be accessed.
732: ** filter if YES, treat document as HTML
733: **
734: ** On Exit,
735: ** returns YES Success in opening document
736: ** NO Failure
737: **
738: **
739: */
740:
1.15 timbl 741: PUBLIC BOOL HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2 timbl 742: {
1.19 timbl 743: HTAnchor * anchor = HTAnchor_findAddress(addr);
744: request->anchor = HTAnchor_parent(anchor);
745: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
746: NULL : (HTChildAnchor*) anchor;
1.52 frystyk 747: return HTLoadDocument(request, NO);
1.2 timbl 748: }
749:
750:
751: /* Load a document from absolute name to stream
752: ** --------------------------------------------
753: **
754: ** On Entry,
755: ** addr The absolute address of the document to be accessed.
1.15 timbl 756: ** request->output_stream if non-NULL, send data down this stream
1.2 timbl 757: **
758: ** On Exit,
759: ** returns YES Success in opening document
760: ** NO Failure
761: **
762: **
763: */
764:
765: PUBLIC BOOL HTLoadToStream ARGS3(
766: CONST char *, addr,
767: BOOL, filter,
1.15 timbl 768: HTRequest*, request)
1.1 timbl 769: {
1.19 timbl 770: HTAnchor * anchor = HTAnchor_findAddress(addr);
771: request->anchor = HTAnchor_parent(anchor);
772: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
773: (HTChildAnchor*) anchor;
1.15 timbl 774: request->output_stream = request->output_stream;
1.52 frystyk 775: return HTLoadDocument(request, NO);
1.1 timbl 776: }
777:
778:
1.2 timbl 779:
780:
1.1 timbl 781: /* Load a document from relative name
782: ** ---------------
783: **
784: ** On Entry,
1.2 timbl 785: ** relative_name The relative address of the document
786: ** to be accessed.
1.1 timbl 787: **
788: ** On Exit,
789: ** returns YES Success in opening document
790: ** NO Failure
791: **
792: **
793: */
794:
1.15 timbl 795: PUBLIC BOOL HTLoadRelative ARGS3(
1.2 timbl 796: CONST char *, relative_name,
1.15 timbl 797: HTParentAnchor *, here,
1.20 luotonen 798: HTRequest *, request)
1.1 timbl 799: {
800: char * full_address = 0;
801: BOOL result;
802: char * mycopy = 0;
803: char * stripped = 0;
804: char * current_address =
1.2 timbl 805: HTAnchor_address((HTAnchor*)here);
1.1 timbl 806:
807: StrAllocCopy(mycopy, relative_name);
808:
809: stripped = HTStrip(mycopy);
810: full_address = HTParse(stripped,
811: current_address,
812: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15 timbl 813: result = HTLoadAbsolute(full_address, request);
1.1 timbl 814: free(full_address);
815: free(current_address);
816: free(mycopy); /* Memory leak fixed 10/7/92 -- JFG */
817: return result;
818: }
819:
820:
821: /* Load if necessary, and select an anchor
822: ** --------------------------------------
823: **
824: ** On Entry,
825: ** destination The child or parenet anchor to be loaded.
826: **
827: ** On Exit,
828: ** returns YES Success
829: ** NO Failure
830: **
831: */
832:
1.15 timbl 833: PUBLIC BOOL HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1 timbl 834: {
1.15 timbl 835: if (!anchor) return NO; /* No link */
1.1 timbl 836:
1.15 timbl 837: request->anchor = HTAnchor_parent(anchor);
1.19 timbl 838: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
839: : (HTChildAnchor*) anchor;
1.1 timbl 840:
1.52 frystyk 841: return HTLoadDocument(request, NO) ? YES : NO;
842:
843: } /* HTLoadAnchor */
844:
845:
846: /* Load if necessary, and select an anchor
847: ** --------------------------------------
848: **
849: ** This function is almost identical to HTLoadAnchor, but it doesn't
850: ** clear the error stack so that the information in there is kept.
851: **
852: ** On Entry,
853: ** destination The child or parenet anchor to be loaded.
854: **
855: ** On Exit,
856: ** returns YES Success
857: ** NO Failure
858: **
859: */
860:
861: PUBLIC BOOL HTLoadAnchorRecursive ARGS2(HTAnchor*, anchor,
862: HTRequest *, request)
863: {
864: if (!anchor) return NO; /* No link */
865:
866: request->anchor = HTAnchor_parent(anchor);
867: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
868: : (HTChildAnchor*) anchor;
869:
870: return HTLoadDocument(request, YES) ? YES : NO;
1.1 timbl 871:
872: } /* HTLoadAnchor */
873:
874:
875: /* Search
876: ** ------
877: ** Performs a keyword search on word given by the user. Adds the keyword to
878: ** the end of the current address and attempts to open the new address.
879: **
880: ** On Entry,
881: ** *keywords space-separated keyword list or similar search list
1.2 timbl 882: ** here is anchor search is to be done on.
1.1 timbl 883: */
884:
1.56 frystyk 885: PRIVATE char hex ARGS1(int, i)
1.2 timbl 886: {
1.13 timbl 887: char * hexchars = "0123456789ABCDEF";
888: return hexchars[i];
1.2 timbl 889: }
1.1 timbl 890:
1.15 timbl 891: PUBLIC BOOL HTSearch ARGS3(
1.2 timbl 892: CONST char *, keywords,
1.15 timbl 893: HTParentAnchor *, here,
894: HTRequest *, request)
1.1 timbl 895: {
1.2 timbl 896:
897: #define acceptable \
898: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
899:
900: char *q, *u;
901: CONST char * p, *s, *e; /* Pointers into keywords */
902: char * address = HTAnchor_address((HTAnchor*)here);
1.1 timbl 903: BOOL result;
1.56 frystyk 904: char * escaped = (char *) malloc(strlen(keywords)*3+1);
1.2 timbl 905:
1.29 frystyk 906: /* static CONST BOOL isAcceptable[96] = */
907: /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30 luotonen 908: static BOOL isAcceptable[96] =
1.2 timbl 909: /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
910: { 0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0, /* 2x !"#$%&'()*+,-./ */
911: 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
912: 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
913: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
914: 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
915: 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
916:
917: if (escaped == NULL) outofmem(__FILE__, "HTSearch");
918:
1.29 frystyk 919: /* Convert spaces to + and hex escape unacceptable characters */
1.2 timbl 920:
1.29 frystyk 921: for(s=keywords; *s && WHITE(*s); s++); /*scan */ /* Skip white space */
922: for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--); /* Skip trailers */
923: for(q=escaped, p=s; p<e; p++) { /* scan stripped field */
1.2 timbl 924: int c = (int)TOASCII(*p);
925: if (WHITE(*p)) {
926: *q++ = '+';
1.29 frystyk 927: } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13 timbl 928: *q++ = *p; /* 930706 TBL for MVS bug */
1.2 timbl 929: } else {
930: *q++ = '%';
931: *q++ = hex(c / 16);
932: *q++ = hex(c % 16);
933: }
934: } /* Loop over string */
1.1 timbl 935:
1.2 timbl 936: *q=0;
937: /* terminate escaped sctring */
938: u=strchr(address, '?'); /* Find old search string */
939: if (u) *u = 0; /* Chop old search off */
1.1 timbl 940:
941: StrAllocCat(address, "?");
1.2 timbl 942: StrAllocCat(address, escaped);
943: free(escaped);
1.15 timbl 944: result = HTLoadRelative(address, here, request);
1.1 timbl 945: free(address);
1.2 timbl 946:
1.1 timbl 947: return result;
1.2 timbl 948: }
949:
950:
951: /* Search Given Indexname
952: ** ------
953: ** Performs a keyword search on word given by the user. Adds the keyword to
954: ** the end of the current address and attempts to open the new address.
955: **
956: ** On Entry,
957: ** *keywords space-separated keyword list or similar search list
958: ** *addres is name of object search is to be done on.
959: */
960:
1.15 timbl 961: PUBLIC BOOL HTSearchAbsolute ARGS3(
1.2 timbl 962: CONST char *, keywords,
1.15 timbl 963: CONST char *, indexname,
964: HTRequest *, request)
1.2 timbl 965: {
966: HTParentAnchor * anchor =
967: (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15 timbl 968: return HTSearch(keywords, anchor, request);
1.57 ! howcome 969: }
! 970:
! 971:
! 972: /*
! 973: ** Find Related Name
! 974: **
! 975: ** Creates a string that can be used as a related name when
! 976: ** calling HTParse initially.
! 977: **
! 978: ** The code for this routine originates from the Linemode
! 979: ** browser and was moved here by howcome@dxcern.cern.ch
! 980: ** in order for all clients to take advantage.
! 981: **
! 982: */
! 983:
! 984:
! 985: PUBLIC char * HTFindRelatedName NOARGS
! 986: {
! 987: char* default_default=0; /* Parse home relative to this */
! 988:
! 989: StrAllocCopy(default_default, "file://");
! 990: StrAllocCat(default_default, HTGetHostName()); /*eg file://dxcern.cern.ch*/
! 991:
! 992: #ifndef MAXPATHLEN
! 993: #define NO_GETWD /* Assume no getwd() if no MAXPATHLEN */
! 994: #endif
! 995:
! 996: #ifdef NO_GETWD /* No getwd() on this machine */
! 997: #ifdef HAS_GETCWD /* System V variant SIGN CHANGED TBL 921006 !! */
! 998:
! 999: {
! 1000: char wd[1024]; /*!! Arbitrary*/
! 1001: char * result = getcwd(wd, sizeof(wd));
! 1002: if (result) {
! 1003:
! 1004: #ifdef VMS
! 1005: /* convert directory name to Unix-style syntax */
! 1006: char * disk = strchr (wd, ':');
! 1007: char * dir = strchr (wd, '[');
! 1008: if (disk) {
! 1009: *disk = '\0';
! 1010: StrAllocCat (default_default, "/"); /* needs delimiter */
! 1011: StrAllocCat (default_default, wd);
! 1012: }
! 1013: if (dir) {
! 1014: char *p;
! 1015: *dir = '/'; /* Convert leading '[' */
! 1016: for (p = dir ; *p != ']'; ++p)
! 1017: if (*p == '.') *p = '/';
! 1018: *p = '\0'; /* Cut on final ']' */
! 1019: StrAllocCat (default_default, dir);
! 1020: }
! 1021: #else /* not VMS */
! 1022: StrAllocCat (default_default, wd);
! 1023: #endif /* not VMS */
! 1024: } else {
! 1025: fprintf(stderr,"Can't read working directory (getcwd)", NULL);
! 1026: }
! 1027: } /* end if good getcwd result */
! 1028:
! 1029: #else /* has NO getcwd */
! 1030:
! 1031: fprintf(stderr,"This platform does not support getwd() or getcwd()", NULL);
! 1032:
! 1033: #endif /* has no getcwd */
! 1034:
! 1035: #else /* has getwd */
! 1036: {
! 1037: char wd[MAXPATHLEN];
! 1038: char * result = (char *) getwd(wd);
! 1039: if (result) {
! 1040: StrAllocCat(default_default, wd);
! 1041: } else {
! 1042: fprintf(stderr,"Can't read working directory.");
! 1043: }
! 1044: }
! 1045: #endif
! 1046:
! 1047: StrAllocCat(default_default, "/default.html");
! 1048: return default_default;
1.2 timbl 1049: }
1050:
1051:
1052: /* Generate the anchor for the home page
1053: ** -------------------------------------
1054: **
1055: ** As it involves file access, this should only be done once
1056: ** when the program first runs.
1.10 timbl 1057: ** This is a default algorithm -- browser don't HAVE to use this.
1058: ** But consistency betwen browsers is STRONGLY recommended!
1.2 timbl 1059: **
1.10 timbl 1060: ** Priority order is:
1061: **
1062: ** 1 WWW_HOME environment variable (logical name, etc)
1063: ** 2 ~/WWW/default.html
1064: ** 3 /usr/local/bin/default.html
1065: ** 4 http://info.cern.ch/default.html
1066: **
1.2 timbl 1067: */
1068: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
1069: {
1.12 timbl 1070: char * my_home_document = NULL;
1071: char * home = (char *)getenv(LOGICAL_DEFAULT);
1.2 timbl 1072: char * ref;
1073: HTParentAnchor * anchor;
1.1 timbl 1074:
1.12 timbl 1075: if (home) {
1076: StrAllocCopy(my_home_document, home);
1077:
1078: /* Someone telnets in, they get a special home.
1079: */
1080: #define MAX_FILE_NAME 1024 /* @@@ */
1081: } else if (HTClientHost) { /* Telnet server */
1082: FILE * fp = fopen(REMOTE_POINTER, "r");
1083: char * status;
1084: if (fp) {
1085: my_home_document = (char*) malloc(MAX_FILE_NAME);
1086: status = fgets(my_home_document, MAX_FILE_NAME, fp);
1087: if (!status) {
1088: free(my_home_document);
1089: my_home_document = NULL;
1090: }
1091: fclose(fp);
1092: }
1093: if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
1094: }
1095:
1096:
1097:
1.2 timbl 1098: #ifdef unix
1.12 timbl 1099:
1.10 timbl 1100: if (!my_home_document) {
1101: FILE * fp = NULL;
1102: CONST char * home = (CONST char*)getenv("HOME");
1103: if (home) {
1104: my_home_document = (char *)malloc(
1105: strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
1106: if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
1107: sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
1108: fp = fopen(my_home_document, "r");
1109: }
1110:
1111: if (!fp) {
1112: StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
1113: fp = fopen(my_home_document, "r");
1114: }
1.2 timbl 1115: if (fp) {
1116: fclose(fp);
1117: } else {
1118: if (TRACE) fprintf(stderr,
1.10 timbl 1119: "HTBrowse: No local home document ~/%s or %s\n",
1120: PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11 timbl 1121: free(my_home_document);
1122: my_home_document = NULL;
1.2 timbl 1123: }
1124: }
1125: #endif
1.10 timbl 1126: ref = HTParse( my_home_document ? my_home_document :
1127: HTClientHost ? REMOTE_ADDRESS
1128: : LAST_RESORT,
1129: "file:",
1.2 timbl 1130: PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10 timbl 1131: if (my_home_document) {
1.2 timbl 1132: if (TRACE) fprintf(stderr,
1133: "HTAccess: Using custom home page %s i.e. address %s\n",
1.10 timbl 1134: my_home_document, ref);
1135: free(my_home_document);
1.2 timbl 1136: }
1137: anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
1138: free(ref);
1139: return anchor;
1.1 timbl 1140: }
1.26 frystyk 1141:
1142:
1143: /* Bind an Anchor to the request structure
1144: ** ---------------------------------------
1145: **
1146: ** On Entry,
1147: ** anchor The child or parenet anchor to be binded
1148: ** request The request sturcture
1149: ** On Exit,
1150: ** returns YES Success
1151: ** NO Failure
1152: **
1153: ** Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
1154: ** Henrik Frystyk 17/02-94
1155: */
1156:
1157: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1158: {
1159: if (!anchor) return NO; /* No link */
1160:
1161: request->anchor = HTAnchor_parent(anchor);
1162: request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
1163: : (HTChildAnchor*) anchor;
1164:
1.29 frystyk 1165: return YES;
1.26 frystyk 1166: } /* HTBindAnchor */
1167:
Webmaster