Annotation of libwww/Library/src/HTAccess.c, revision 1.33

1.1       timbl       1: /*             Access Manager                                  HTAccess.c
                      2: **             ==============
                      3: **
                      4: ** Authors
                      5: **     TBL     Tim Berners-Lee timbl@info.cern.ch
1.4       timbl       6: **     JFG     Jean-Francois Groff jfg@dxcern.cern.ch
1.1       timbl       7: **     DD      Denis DeLaRoca (310) 825-4580  <CSP1DWD@mvs.oac.ucla.edu>
                      8: ** History
                      9: **       8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
                     10: **     26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
                     11: **      6 Oct 92 Moved HTClientHost and logfile into here. TBL
                     12: **     17 Dec 92 Tn3270 added, bug fix. DD
1.2       timbl      13: **      4 Feb 93 Access registration, Search escapes bad chars TBL
1.9       timbl      14: **               PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
                     15: **     28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19      timbl      16: **        Dec 93 Bug change around, more reentrant, etc
1.2       timbl      17: ** Bugs
                     18: **     This module assumes that that the graphic object is hypertext, as it
1.9       timbl      19: **     needs to select it when it has been loaded.  A superclass needs to be
1.2       timbl      20: **     defined which accepts select and select_anchor.
1.1       timbl      21: */
                     22: 
1.9       timbl      23: #ifndef DEFAULT_WAIS_GATEWAY
1.8       timbl      24: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.9       timbl      25: #endif
1.8       timbl      26: 
1.1       timbl      27: /* Implements:
                     28: */
                     29: #include "HTAccess.h"
                     30: 
                     31: /* Uses:
                     32: */
                     33: 
                     34: #include "HTParse.h"
                     35: #include "HTUtils.h"
1.4       timbl      36: #include "HTML.h"              /* SCW */
1.2       timbl      37: 
                     38: #ifndef NO_RULES
                     39: #include "HTRules.h"
                     40: #endif
                     41: 
1.1       timbl      42: #include <stdio.h>
                     43: 
1.2       timbl      44: #include "HTList.h"
                     45: #include "HText.h"     /* See bugs above */
                     46: #include "HTAlert.h"
1.17      timbl      47: #include "HTFWriter.h" /* for cache stuff */
                     48: #include "HTTee.h"
1.2       timbl      49: 
1.1       timbl      50: /*     These flags may be set to modify the operation of this module
                     51: */
                     52: PUBLIC char * HTClientHost = 0;        /* Name of remote login host if any */
                     53: PUBLIC FILE * logfile = 0;     /* File to which to output one-liners */
1.12      timbl      54: PUBLIC BOOL HTSecure = NO;     /* Disable access for telnet users? */
1.27      luotonen   55: PUBLIC BOOL using_proxy = NO;  /* are we using a proxy gateway? */
                     56: PUBLIC BOOL HTImServer = NO;   /* cern_httpd sets this */
                     57: PUBLIC BOOL HTImProxy = NO;    /* cern_httpd as a proxy? */
1.1       timbl      58: 
1.2       timbl      59: /*     To generate other things, play with these:
                     60: */
                     61: 
1.15      timbl      62: /* PUBLIC HTFormat HTOutputFormat = NULL;      use request->output_format */
                     63: /* PUBLIC HTStream* HTOutputStream = NULL;     use request->output_stream */ 
1.1       timbl      64: 
                     65: PRIVATE HTList * protocols = NULL;   /* List of registered protocol descriptors */
                     66: 
1.24      timbl      67: /*     Superclass defn */
1.1       timbl      68: 
1.24      timbl      69: struct _HTStream {
                     70:        HTStreamClass * isa;
                     71:        /* ... */
                     72: };
                     73: 
1.15      timbl      74: /*     Create  a request structure
                     75: **     ---------------------------
                     76: */
                     77: 
                     78: PUBLIC HTRequest * HTRequest_new NOARGS
                     79: {
1.28      luotonen   80:     HTRequest * me = (HTRequest*) calloc(1, sizeof(*me));  /* zero fill */
1.15      timbl      81:     if (!me) outofmem(__FILE__, "HTRequest_new()");
                     82:     
1.20      luotonen   83:     me->conversions    = HTList_new(); /* No conversions registerd yet */
                     84:     me->output_format  = WWW_PRESENT;  /* default it to present to user */
                     85: 
1.15      timbl      86:     return me;
                     87: }
                     88: 
                     89: 
1.20      luotonen   90: /*     Delete a request structure
                     91: **     --------------------------
                     92: */
                     93: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
                     94: {
                     95:     if (req) {
1.23      luotonen   96:        if (req->conversions) {
                     97:            HTList *cur = req->conversions;
                     98:            HTPresentation *pres;
1.20      luotonen   99: 
1.23      luotonen  100:            while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
                    101:                FREE(pres->command);            /* Leak fixed AL 6 Feb 1994 */
                    102:                free(pres);
                    103:            }
                    104:            HTList_delete(req->conversions);    /* Leak fixed AL 6 Feb 1994 */
                    105:        }
1.28      luotonen  106:        FREE(req->authorization);
1.20      luotonen  107:        free(req);
                    108:     }
                    109: }
                    110: 
                    111: 
1.22      luotonen  112: PRIVATE char * method_names[(int)MAX_METHODS + 1] =
                    113: {
                    114:     "INVALID-METHOD",
                    115:     "GET",
                    116:     "HEAD",
                    117:     "POST",
                    118:     "PUT",
                    119:     "DELETE",
                    120:     "CHECKOUT",
                    121:     "CHECKIN",
                    122:     "SHOWMETHOD",
                    123:     "LINK",
                    124:     "UNLINK",
                    125:     NULL
                    126: };
                    127: 
                    128: /*     Get method enum value
                    129: **     ---------------------
                    130: */
                    131: PUBLIC HTMethod HTMethod_enum ARGS1(char *, name)
                    132: {
                    133:     if (name) {
                    134:        int i;
                    135:        for (i=1; i < (int)MAX_METHODS; i++)
                    136:            if (!strcmp(name, method_names[i]))
                    137:                return (HTMethod)i;
                    138:     }
                    139:     return METHOD_INVALID;
                    140: }
                    141: 
                    142: 
                    143: /*     Get method name
                    144: **     ---------------
                    145: */
                    146: PUBLIC char * HTMethod_name ARGS1(HTMethod, method)
                    147: {
                    148:     if ((int)method > (int)METHOD_INVALID  && 
                    149:        (int)method < (int)MAX_METHODS)
                    150:        return method_names[(int)method];
                    151:     else
                    152:        return method_names[(int)METHOD_INVALID];
                    153: }
                    154: 
                    155: 
                    156: /*     Is method in a list of method names?
                    157: **     -----------------------------------
                    158: */
                    159: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod,    method,
                    160:                                  HTList *,     list)
                    161: {
                    162:     char * method_name = HTMethod_name(method);
                    163:     HTList *cur = list;
                    164:     char *item;
                    165: 
                    166:     while (NULL != (item = (char*)HTList_nextObject(cur))) {
                    167:        CTRACE(stderr, " %s", item);
                    168:        if (0==strcasecomp(item, method_name))
                    169:            return YES;
                    170:     }
                    171:     return NO; /* Not found */
                    172: }
                    173: 
                    174: 
                    175: 
                    176: 
1.20      luotonen  177: 
1.1       timbl     178: /*     Register a Protocol                             HTRegisterProtocol
                    179: **     -------------------
                    180: */
                    181: 
                    182: PUBLIC BOOL HTRegisterProtocol(protocol)
                    183:        HTProtocol * protocol;
                    184: {
                    185:     if (!protocols) protocols = HTList_new();
                    186:     HTList_addObject(protocols, protocol);
                    187:     return YES;
                    188: }
                    189: 
                    190: 
                    191: /*     Register all known protocols
                    192: **     ----------------------------
                    193: **
                    194: **     Add to or subtract from this list if you add or remove protocol modules.
                    195: **     This routine is called the first time the protocol list is needed,
                    196: **     unless any protocols are already registered, in which case it is not called.
                    197: **     Therefore the application can override this list.
                    198: **
                    199: **     Compiling with NO_INIT prevents all known protocols from being forced
                    200: **     in at link time.
                    201: */
                    202: #ifndef NO_INIT
                    203: PRIVATE void HTAccessInit NOARGS                       /* Call me once */
                    204: {
1.14      duns      205: GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1       timbl     206: #ifndef DECNET
1.14      duns      207: GLOBALREF  HTProtocol HTFTP, HTNews, HTGopher;
1.3       timbl     208: #ifdef DIRECT_WAIS
1.14      duns      209: GLOBALREF  HTProtocol HTWAIS;
1.3       timbl     210: #endif
1.2       timbl     211:     HTRegisterProtocol(&HTFTP);
                    212:     HTRegisterProtocol(&HTNews);
                    213:     HTRegisterProtocol(&HTGopher);
1.3       timbl     214: #ifdef DIRECT_WAIS
                    215:     HTRegisterProtocol(&HTWAIS);
                    216: #endif
1.1       timbl     217: #endif
                    218: 
1.2       timbl     219:     HTRegisterProtocol(&HTTP);
                    220:     HTRegisterProtocol(&HTFile);
                    221:     HTRegisterProtocol(&HTTelnet);
                    222:     HTRegisterProtocol(&HTTn3270);
                    223:     HTRegisterProtocol(&HTRlogin);
1.1       timbl     224: }
                    225: #endif
                    226: 
                    227: 
1.33    ! luotonen  228: 
        !           229: /*                                                     override_proxy()
        !           230: **
        !           231: **     Check the no_proxy environment variable to get the list
        !           232: **     of hosts for which proxy server is not consulted.
        !           233: **
        !           234: **     no_proxy is a comma- or space-separated list of machine
        !           235: **     or domain names, with optional :port part.  If no :port
        !           236: **     part is present, it applies to all ports on that domain.
        !           237: **
        !           238: **     Example:
        !           239: **             no_proxy="cern.ch,some.domain:8001"
        !           240: **
        !           241: */
        !           242: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
        !           243: {
        !           244:     CONST char * no_proxy = getenv("no_proxy");
        !           245:     char * p = NULL;
        !           246:     char * host = NULL;
        !           247:     int port = 0;
        !           248:     int h_len = 0;
        !           249: 
        !           250:     if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
        !           251:        return NO;
        !           252:     if (!*host) { free(host); return NO; }
        !           253: 
        !           254:     if (p = strchr(host, ':')) {       /* Port specified */
        !           255:        *p++ = 0;                       /* Chop off port */
        !           256:        port = atoi(p);
        !           257:     }
        !           258:     else {                             /* Use default port */
        !           259:        char * access = HTParse(addr, "", PARSE_ACCESS);
        !           260:        if (access) {
        !           261:            if      (!strcmp(access,"http"))    port = 80;
        !           262:            else if (!strcmp(access,"gopher"))  port = 70;
        !           263:            else if (!strcmp(access,"ftp"))     port = 21;
        !           264:            free(access);
        !           265:        }
        !           266:     }
        !           267:     if (!port) port = 80;              /* Default */
        !           268:     h_len = strlen(host);
        !           269: 
        !           270:     while (*no_proxy) {
        !           271:        CONST char * end;
        !           272:        CONST char * colon = NULL;
        !           273:        int templ_port = 0;
        !           274:        int t_len;
        !           275: 
        !           276:        while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
        !           277:            no_proxy++;                 /* Skip whitespace and separators */
        !           278: 
        !           279:        end = no_proxy;
        !           280:        while (*end && !WHITE(*end) && *end != ',') {   /* Find separator */
        !           281:            if (*end==':') colon = end;                 /* Port number given */
        !           282:            end++;
        !           283:        }
        !           284: 
        !           285:        if (colon) {
        !           286:            templ_port = atoi(colon+1);
        !           287:            t_len = colon - no_proxy;
        !           288:        }
        !           289:        else {
        !           290:            t_len = end - no_proxy;
        !           291:        }
        !           292: 
        !           293:        if ((!templ_port || templ_port == port)  &&
        !           294:            (t_len > 0  &&  t_len <= h_len  &&
        !           295:             !strncmp(host + h_len - t_len, no_proxy, t_len))) {
        !           296:            free(host);
        !           297:            return YES;
        !           298:        }
        !           299:        if (*end) no_proxy = end+1;
        !           300:        else break;
        !           301:     }
        !           302: 
        !           303:     free(host);
        !           304:     return NO;
        !           305: }
        !           306: 
        !           307: 
        !           308: 
1.2       timbl     309: /*             Find physical name and access protocol
                    310: **             --------------------------------------
1.1       timbl     311: **
                    312: **
                    313: ** On entry,
                    314: **     addr            must point to the fully qualified hypertext reference.
                    315: **     anchor          a pareent anchor with whose address is addr
                    316: **
                    317: ** On exit,
1.2       timbl     318: **     returns         HT_NO_ACCESS            Error has occured.
                    319: **                     HT_OK                   Success
1.1       timbl     320: **
                    321: */
1.21      luotonen  322: PRIVATE int get_physical ARGS1(HTRequest *, req)
                    323: {    
1.1       timbl     324:     char * access=0;   /* Name of access method */
1.21      luotonen  325:     char * addr = HTAnchor_address((HTAnchor*)req->anchor);    /* free me */
1.27      luotonen  326: 
1.2       timbl     327: #ifndef NO_RULES
1.27      luotonen  328:     if (HTImServer)    /* cern_httpd has already done its own translations */
                    329:        HTAnchor_setPhysical(req->anchor, addr);
1.21      luotonen  330:     else {
1.27      luotonen  331:        char * physical = HTTranslate(addr);
1.21      luotonen  332:        if (!physical) {
                    333:            free(addr);
                    334:            return HT_FORBIDDEN;
                    335:        }
                    336:        HTAnchor_setPhysical(req->anchor, physical);
                    337:        free(physical);                 /* free our copy */
1.2       timbl     338:     }
                    339: #else
1.21      luotonen  340:     HTAnchor_setPhysical(req->anchor, addr);
1.2       timbl     341: #endif
                    342: 
1.21      luotonen  343:     access =  HTParse(HTAnchor_physical(req->anchor),
1.27      luotonen  344:                      "file:", PARSE_ACCESS);
1.1       timbl     345: 
                    346: /*     Check whether gateway access has been set up for this
1.8       timbl     347: **
                    348: **     This function can be replaced by the rule system above.
1.1       timbl     349: */
1.8       timbl     350: #define USE_GATEWAYS
1.1       timbl     351: #ifdef USE_GATEWAYS
1.33    ! luotonen  352:     if (!override_proxy(addr)) {
1.27      luotonen  353:        char * gateway_parameter, *gateway, *proxy;
                    354: 
1.2       timbl     355:        gateway_parameter = (char *)malloc(strlen(access)+20);
                    356:        if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27      luotonen  357: 
                    358:        /* search for proxy gateways */
1.2       timbl     359:        strcpy(gateway_parameter, "WWW_");
                    360:        strcat(gateway_parameter, access);
                    361:        strcat(gateway_parameter, "_GATEWAY");
                    362:        gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27      luotonen  363: 
                    364:        /* search for proxy servers */
                    365:        strcpy(gateway_parameter, access);
                    366:        strcat(gateway_parameter, "_proxy");
                    367:        proxy = (char *)getenv(gateway_parameter);
                    368: 
1.2       timbl     369:        free(gateway_parameter);
1.27      luotonen  370: 
                    371:        if (TRACE && gateway)
                    372:            fprintf(stderr,"Gateway found: %s\n",gateway);
                    373:        if (TRACE && proxy)
                    374:            fprintf(stderr,"Proxy server found: %s\n",proxy);
                    375: 
1.8       timbl     376: #ifndef DIRECT_WAIS
1.9       timbl     377:        if (!gateway && 0==strcmp(access, "wais")) {
1.8       timbl     378:            gateway = DEFAULT_WAIS_GATEWAY;
                    379:        }
                    380: #endif
1.27      luotonen  381:        /* make sure the using_proxy variable is false */
                    382:        using_proxy = NO;
                    383: 
                    384:        /* proxy servers have precedence over gateway servers */
                    385:        if (proxy) {
                    386:            char * gatewayed=0;
                    387: 
                    388:             StrAllocCopy(gatewayed,proxy);
                    389:            StrAllocCat(gatewayed,addr);
                    390:            using_proxy = YES;
                    391:            HTAnchor_setPhysical(req->anchor, gatewayed);
                    392:            free(gatewayed);
                    393:            free(access);
                    394: 
                    395:            access =  HTParse(HTAnchor_physical(req->anchor),
                    396:                              "http:", PARSE_ACCESS);
                    397:        } else if (gateway) {
1.9       timbl     398:            char * path = HTParse(addr, "",
                    399:                PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
                    400:                /* Chop leading / off to make host into part of path */
                    401:            char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
                    402:            free(path);
1.21      luotonen  403:             HTAnchor_setPhysical(req->anchor, gatewayed);
1.9       timbl     404:            free(gatewayed);
1.2       timbl     405:            free(access);
1.9       timbl     406:            
1.21      luotonen  407:            access =  HTParse(HTAnchor_physical(req->anchor),
1.8       timbl     408:                "http:", PARSE_ACCESS);
1.2       timbl     409:        }
                    410:     }
1.1       timbl     411: #endif
                    412: 
1.19      timbl     413:     free(addr);
1.1       timbl     414: 
                    415: 
                    416: /*     Search registered protocols to find suitable one
                    417: */
                    418:     {
1.20      luotonen  419:        HTList *cur;
                    420:        HTProtocol *p;
1.1       timbl     421: #ifndef NO_INIT
1.2       timbl     422:         if (!protocols) HTAccessInit();
1.1       timbl     423: #endif
1.20      luotonen  424:        cur = protocols;
                    425:        while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.2       timbl     426:            if (strcmp(p->name, access)==0) {
1.21      luotonen  427:                HTAnchor_setProtocol(req->anchor, p);
1.2       timbl     428:                free(access);
                    429:                return (HT_OK);
1.1       timbl     430:            }
                    431:        }
                    432:     }
                    433: 
                    434:     free(access);
1.2       timbl     435:     return HT_NO_ACCESS;
1.1       timbl     436: }
                    437: 
                    438: 
                    439: /*             Load a document
                    440: **             ---------------
                    441: **
1.2       timbl     442: **     This is an internal routine, which has an address AND a matching
                    443: **     anchor.  (The public routines are called with one OR the other.)
                    444: **
                    445: ** On entry,
                    446: **     addr            must point to the fully qualified hypertext reference.
1.15      timbl     447: **     request->
                    448: **         anchor              a parent anchor with whose address is addr
                    449: **         output_format       valid
                    450: **         output_stream       valid on NULL
1.2       timbl     451: **
                    452: ** On exit,
                    453: **     returns         <0              Error has occured.
                    454: **                     HT_LOADED       Success
                    455: **                     HT_NO_DATA      Success, but no document loaded.
1.8       timbl     456: **                                     (telnet sesssion started etc)
1.2       timbl     457: **
                    458: */
1.15      timbl     459: PRIVATE int HTLoad ARGS2(
1.19      timbl     460:        CONST char *,           addr,   /* not used */
1.15      timbl     461:        HTRequest *,            request)
1.2       timbl     462: {
1.25      frystyk   463:     char       *arg = NULL;
                    464:     HTProtocol *p;
                    465:     int        status;
                    466: 
1.22      luotonen  467:     if (request->method == METHOD_INVALID)
                    468:        request->method = METHOD_GET;
1.21      luotonen  469:     status = get_physical(request);
1.2       timbl     470:     if (status == HT_FORBIDDEN) {
1.21      luotonen  471:         return HTLoadError(request, 500,
                    472:                           "Access forbidden by rule");
1.2       timbl     473:     }
                    474:     if (status < 0) return status;     /* Can't resolve or forbidden */
1.25      frystyk   475: 
                    476:     if(!(arg = HTAnchor_physical(request->anchor)) || !*arg) 
                    477:        return (-1);
1.27      luotonen  478: 
1.15      timbl     479:     p = HTAnchor_protocol(request->anchor);
1.17      timbl     480:     return (*(p->load))(request);
1.2       timbl     481: }
                    482: 
                    483: 
                    484: /*             Get a save stream for a document
                    485: **             --------------------------------
                    486: */
1.19      timbl     487: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
1.15      timbl     488: {
                    489:     HTProtocol * p;
1.19      timbl     490:     int status;
1.22      luotonen  491:     request->method = METHOD_PUT;
1.21      luotonen  492:     status = get_physical(request);
1.19      timbl     493:     if (status == HT_FORBIDDEN) {
1.21      luotonen  494:         HTLoadError(request, 500,
                    495:                    "Access forbidden by rule");
1.19      timbl     496:        return NULL;    /* should return error status? */
                    497:     }
                    498:     if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
                    499:     
1.15      timbl     500:     p = HTAnchor_protocol(request->anchor);
1.2       timbl     501:     if (!p) return NULL;
                    502:     
1.15      timbl     503:     return (*p->saveStream)(request);
1.2       timbl     504:     
                    505: }
                    506: 
                    507: 
                    508: /*             Load a document - with logging etc
                    509: **             ----------------------------------
                    510: **
                    511: **     - Checks or documents already loaded
                    512: **     - Logs the access
                    513: **     - Allows stdin filter option
                    514: **     - Trace ouput and error messages
                    515: **
1.1       timbl     516: **    On Entry,
1.19      timbl     517: **        request->anchor      valid for of the document to be accessed.
                    518: **      request->childAnchor   optional anchor within doc to be selected
                    519: **
1.2       timbl     520: **        filter            if YES, treat stdin as HTML
1.1       timbl     521: **
1.15      timbl     522: **       request->anchor   is the node_anchor for the document
                    523: **       request->output_format is valid
                    524: **
1.1       timbl     525: **    On Exit,
                    526: **        returns    YES     Success in opening document
                    527: **                   NO      Failure 
                    528: **
                    529: */
                    530: 
1.19      timbl     531: PRIVATE BOOL HTLoadDocument ARGS1(HTRequest *,         request)
1.1       timbl     532: 
                    533: {
                    534:     int                status;
                    535:     HText *    text;
1.19      timbl     536:     char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
                    537:     
1.1       timbl     538:     if (TRACE) fprintf (stderr,
                    539:       "HTAccess: loading document %s\n", full_address);
                    540: 
1.18      timbl     541:     request->using_cache = NULL;
                    542:     
1.15      timbl     543:     if (!request->output_format) request->output_format = WWW_PRESENT;
1.25      frystyk   544: 
1.31      frystyk   545:     if (!HTForceReload && (text=(HText *)HTAnchor_document(request->anchor)))
1.15      timbl     546:     {  /* Already loaded */
1.1       timbl     547:         if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
1.19      timbl     548:        if (request->childAnchor) {
                    549:            HText_selectAnchor(text, request->childAnchor);
                    550:        } else {
                    551:            HText_select(text); 
                    552:        }
                    553:        free(full_address);
1.1       timbl     554:        return YES;
                    555:     }
1.17      timbl     556:     
                    557:     /* Check the Cache
                    558:     */
                    559:     /* Bug: for each format, we only check whether it is ok, we
                    560:        don't check them all and chose the best */
                    561:     if (request->anchor->cacheItems) {
                    562:         HTList * list = request->anchor->cacheItems;
1.20      luotonen  563:        HTList * cur = list;
                    564:        HTCacheItem * item;
                    565: 
                    566:        while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
1.18      timbl     567:            HTStream * s;
                    568:            
                    569:            request->using_cache = item;
                    570:            
                    571:            s = HTStreamStack(item->format, request);
1.17      timbl     572:            if (s) {            /* format was suitable */
                    573:                FILE * fp = fopen(item->filename, "r");
1.18      timbl     574:                if (TRACE) fprintf(stderr, "Cache: HIT file %s for %s\n",
1.20      luotonen  575:                                   item->filename, 
                    576:                                   full_address);
1.17      timbl     577:                if (fp) {
                    578:                    HTFileCopy(fp, s);
1.24      timbl     579:                    (*s->isa->free)(s); /* close up pipeline */
1.17      timbl     580:                    fclose(fp);
1.19      timbl     581:                    free(full_address);
1.17      timbl     582:                    return YES;
                    583:                } else {
                    584:                    fprintf(stderr, "***** Can't read cache file %s !\n",
1.20      luotonen  585:                            item->filename);
1.17      timbl     586:                } /* file open ok */
                    587:            } /* stream ok */
                    588:        } /* next cache item */
                    589:     } /* if cache available for this anchor */
1.1       timbl     590:     
1.15      timbl     591:     status = HTLoad(full_address, request);
1.2       timbl     592: 
                    593:     
1.1       timbl     594: /*     Log the access if necessary
                    595: */
                    596:     if (logfile) {
                    597:        time_t theTime;
                    598:        time(&theTime);
                    599:        fprintf(logfile, "%24.24s %s %s %s\n",
                    600:            ctime(&theTime),
                    601:            HTClientHost ? HTClientHost : "local",
                    602:            status<0 ? "FAIL" : "GET",
                    603:            full_address);
                    604:        fflush(logfile);        /* Actually update it on disk */
                    605:        if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
                    606:            ctime(&theTime),
                    607:            HTClientHost ? HTClientHost : "local",
                    608:            status<0 ? "FAIL" : "GET",
                    609:            full_address);
                    610:     }
                    611:     
                    612: 
                    613:     if (status == HT_LOADED) {
                    614:        if (TRACE) {
                    615:            fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
                    616:            full_address);
                    617:        }
1.19      timbl     618:        free(full_address);
1.1       timbl     619:        return YES;
                    620:     }
                    621:     
                    622:     if (status == HT_NO_DATA) {
                    623:        if (TRACE) {
                    624:            fprintf(stderr, 
                    625:            "HTAccess: `%s' has been accessed, No data left.\n",
                    626:            full_address);
                    627:        }
1.19      timbl     628:        free(full_address);
1.1       timbl     629:        return NO;
                    630:     }
                    631:     
1.2       timbl     632:     if (status<0) {                  /* Failure in accessing a document */
1.1       timbl     633: #ifdef CURSES
                    634:         user_message("Can't access `%s'", full_address);
                    635: #else
1.5       timbl     636:        if (TRACE) fprintf(stderr, 
                    637:                "HTAccess: Can't access `%s'\n", full_address);
1.1       timbl     638: #endif
1.32      frystyk   639:        /* This is done in the specific load procedures... Henrik 07/03-94 */
                    640:        /* HTLoadError(request, 500, "Unable to access document."); */
1.19      timbl     641:        free(full_address);
1.1       timbl     642:        return NO;
                    643:     }
1.9       timbl     644:  
                    645:     /* If you get this, then please find which routine is returning
                    646:        a positive unrecognised error code! */
                    647:  
1.1       timbl     648:     fprintf(stderr,
1.2       timbl     649:     "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.9       timbl     650:     fprintf(stderr,
1.19      timbl     651:     "**** HTAccess: Internal software error. Please mail www-bug@info.cern.ch quoting the version number of this software and the URL: %s!\n",
                    652:        full_address);
                    653:     free(full_address);
                    654:    
1.1       timbl     655:     exit(-6996);
1.20      luotonen  656:     return NO; /* For gcc :-( */
1.2       timbl     657: } /* HTLoadDocument */
1.1       timbl     658: 
                    659: 
                    660: 
                    661: /*             Load a document from absolute name
                    662: **             ---------------
                    663: **
                    664: **    On Entry,
                    665: **        addr     The absolute address of the document to be accessed.
                    666: **        filter   if YES, treat document as HTML
                    667: **
                    668: **    On Exit,
                    669: **        returns    YES     Success in opening document
                    670: **                   NO      Failure 
                    671: **
                    672: **
                    673: */
                    674: 
1.15      timbl     675: PUBLIC BOOL HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2       timbl     676: {
1.19      timbl     677:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    678:    request->anchor = HTAnchor_parent(anchor);
                    679:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
                    680:                        NULL : (HTChildAnchor*) anchor;
                    681:    return HTLoadDocument(request);
1.2       timbl     682: }
                    683: 
                    684: 
                    685: /*             Load a document from absolute name to stream
                    686: **             --------------------------------------------
                    687: **
                    688: **    On Entry,
                    689: **        addr     The absolute address of the document to be accessed.
1.15      timbl     690: **        request->output_stream     if non-NULL, send data down this stream
1.2       timbl     691: **
                    692: **    On Exit,
                    693: **        returns    YES     Success in opening document
                    694: **                   NO      Failure 
                    695: **
                    696: **
                    697: */
                    698: 
                    699: PUBLIC BOOL HTLoadToStream ARGS3(
                    700:                CONST char *,   addr,
                    701:                BOOL,           filter,
1.15      timbl     702:                HTRequest*,     request)
1.1       timbl     703: {
1.19      timbl     704:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    705:    request->anchor = HTAnchor_parent(anchor);
                    706:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
                    707:        (HTChildAnchor*) anchor;
1.15      timbl     708:     request->output_stream = request->output_stream;
1.19      timbl     709:     return HTLoadDocument(request);
1.1       timbl     710: }
                    711: 
                    712: 
1.2       timbl     713: 
                    714: 
1.1       timbl     715: /*             Load a document from relative name
                    716: **             ---------------
                    717: **
                    718: **    On Entry,
1.2       timbl     719: **        relative_name     The relative address of the document
                    720: **                         to be accessed.
1.1       timbl     721: **
                    722: **    On Exit,
                    723: **        returns    YES     Success in opening document
                    724: **                   NO      Failure 
                    725: **
                    726: **
                    727: */
                    728: 
1.15      timbl     729: PUBLIC BOOL HTLoadRelative ARGS3(
1.2       timbl     730:                CONST char *,           relative_name,
1.15      timbl     731:                HTParentAnchor *,       here,
1.20      luotonen  732:                HTRequest *,            request)
1.1       timbl     733: {
                    734:     char *             full_address = 0;
                    735:     BOOL                       result;
                    736:     char *             mycopy = 0;
                    737:     char *             stripped = 0;
                    738:     char *             current_address =
1.2       timbl     739:                                HTAnchor_address((HTAnchor*)here);
1.1       timbl     740: 
                    741:     StrAllocCopy(mycopy, relative_name);
                    742: 
                    743:     stripped = HTStrip(mycopy);
                    744:     full_address = HTParse(stripped,
                    745:                   current_address,
                    746:                   PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15      timbl     747:     result = HTLoadAbsolute(full_address, request);
1.1       timbl     748:     free(full_address);
                    749:     free(current_address);
                    750:     free(mycopy);  /* Memory leak fixed 10/7/92 -- JFG */
                    751:     return result;
                    752: }
                    753: 
                    754: 
                    755: /*             Load if necessary, and select an anchor
                    756: **             --------------------------------------
                    757: **
                    758: **    On Entry,
                    759: **        destination              The child or parenet anchor to be loaded.
                    760: **
                    761: **    On Exit,
                    762: **        returns    YES     Success
                    763: **                   NO      Failure 
                    764: **
                    765: */
                    766: 
1.15      timbl     767: PUBLIC BOOL HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1       timbl     768: {
1.15      timbl     769:     if (!anchor) return NO;    /* No link */
1.1       timbl     770:     
1.15      timbl     771:     request->anchor  = HTAnchor_parent(anchor);
1.19      timbl     772:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                    773:                                        : (HTChildAnchor*) anchor;
1.1       timbl     774:     
1.19      timbl     775:     return HTLoadDocument(request) ? YES : NO;
1.1       timbl     776:        
                    777: } /* HTLoadAnchor */
                    778: 
                    779: 
                    780: /*             Search
                    781: **             ------
                    782: **  Performs a keyword search on word given by the user. Adds the keyword to 
                    783: **  the end of the current address and attempts to open the new address.
                    784: **
                    785: **  On Entry,
                    786: **       *keywords     space-separated keyword list or similar search list
1.2       timbl     787: **     here            is anchor search is to be done on.
1.1       timbl     788: */
                    789: 
1.2       timbl     790: PRIVATE char hex(i)
                    791:     int i;
                    792: {
1.13      timbl     793:     char * hexchars = "0123456789ABCDEF";
                    794:     return hexchars[i];
1.2       timbl     795: }
1.1       timbl     796: 
1.15      timbl     797: PUBLIC BOOL HTSearch ARGS3(
1.2       timbl     798:        CONST char *,           keywords,
1.15      timbl     799:        HTParentAnchor *,       here,
                    800:        HTRequest *,            request)
1.1       timbl     801: {
1.2       timbl     802: 
                    803: #define acceptable \
                    804: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
                    805: 
                    806:     char *q, *u;
                    807:     CONST char * p, *s, *e;            /* Pointers into keywords */
                    808:     char * address = HTAnchor_address((HTAnchor*)here);
1.1       timbl     809:     BOOL result;
1.2       timbl     810:     char * escaped = malloc(strlen(keywords)*3+1);
                    811: 
1.29      frystyk   812:     /* static CONST BOOL isAcceptable[96] = */
                    813:     /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30      luotonen  814:     static BOOL isAcceptable[96] =
1.2       timbl     815:     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
                    816:     {    0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,      /* 2x   !"#$%&'()*+,-./  */
                    817:          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,      /* 3x  0123456789:;<=>?  */
                    818:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 4x  @ABCDEFGHIJKLMNO  */
                    819:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /* 5X  PQRSTUVWXYZ[\]^_  */
                    820:         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 6x  `abcdefghijklmno  */
                    821:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };     /* 7X  pqrstuvwxyz{\}~  DEL */
                    822: 
                    823:     if (escaped == NULL) outofmem(__FILE__, "HTSearch");
                    824:     
1.29      frystyk   825: /* Convert spaces to + and hex escape unacceptable characters */
1.2       timbl     826: 
1.29      frystyk   827:     for(s=keywords; *s && WHITE(*s); s++); /*scan */    /* Skip white space */
                    828:     for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--);     /* Skip trailers */
                    829:     for(q=escaped, p=s; p<e; p++) {                  /* scan stripped field */
1.2       timbl     830:         int c = (int)TOASCII(*p);
                    831:         if (WHITE(*p)) {
                    832:            *q++ = '+';
1.29      frystyk   833:        } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13      timbl     834:            *q++ = *p;                  /* 930706 TBL for MVS bug */
1.2       timbl     835:        } else {
                    836:            *q++ = '%';
                    837:            *q++ = hex(c / 16);
                    838:            *q++ = hex(c % 16);
                    839:        }
                    840:     } /* Loop over string */
1.1       timbl     841:     
1.2       timbl     842:     *q=0;
                    843:                                /* terminate escaped sctring */
                    844:     u=strchr(address, '?');            /* Find old search string */
                    845:     if (u) *u = 0;                             /* Chop old search off */
1.1       timbl     846: 
                    847:     StrAllocCat(address, "?");
1.2       timbl     848:     StrAllocCat(address, escaped);
                    849:     free(escaped);
1.15      timbl     850:     result = HTLoadRelative(address, here, request);
1.1       timbl     851:     free(address);
1.2       timbl     852:     
1.1       timbl     853:     return result;
1.2       timbl     854: }
                    855: 
                    856: 
                    857: /*             Search Given Indexname
                    858: **             ------
                    859: **  Performs a keyword search on word given by the user. Adds the keyword to 
                    860: **  the end of the current address and attempts to open the new address.
                    861: **
                    862: **  On Entry,
                    863: **       *keywords     space-separated keyword list or similar search list
                    864: **     *addres         is name of object search is to be done on.
                    865: */
                    866: 
1.15      timbl     867: PUBLIC BOOL HTSearchAbsolute ARGS3(
1.2       timbl     868:        CONST char *,   keywords,
1.15      timbl     869:        CONST char *,   indexname,
                    870:        HTRequest *,    request)
1.2       timbl     871: {
                    872:     HTParentAnchor * anchor =
                    873:        (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15      timbl     874:     return HTSearch(keywords, anchor, request);
1.2       timbl     875: }
                    876: 
                    877: 
                    878: /*             Generate the anchor for the home page
                    879: **             -------------------------------------
                    880: **
                    881: **     As it involves file access, this should only be done once
                    882: **     when the program first runs.
1.10      timbl     883: **     This is a default algorithm -- browser don't HAVE to use this.
                    884: **     But consistency betwen browsers is STRONGLY recommended!
1.2       timbl     885: **
1.10      timbl     886: **     Priority order is:
                    887: **
                    888: **             1       WWW_HOME environment variable (logical name, etc)
                    889: **             2       ~/WWW/default.html
                    890: **             3       /usr/local/bin/default.html
                    891: **             4       http://info.cern.ch/default.html
                    892: **
1.2       timbl     893: */
                    894: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
                    895: {
1.12      timbl     896:     char * my_home_document = NULL;
                    897:     char * home = (char *)getenv(LOGICAL_DEFAULT);
1.2       timbl     898:     char * ref;
                    899:     HTParentAnchor * anchor;
1.1       timbl     900:     
1.12      timbl     901:     if (home) {
                    902:         StrAllocCopy(my_home_document, home);
                    903:     
                    904: /*     Someone telnets in, they get a special home.
                    905: */
                    906: #define MAX_FILE_NAME 1024                                     /* @@@ */
                    907:     } else  if (HTClientHost) {                        /* Telnet server */
                    908:        FILE * fp = fopen(REMOTE_POINTER, "r");
                    909:        char * status;
                    910:        if (fp) {
                    911:            my_home_document = (char*) malloc(MAX_FILE_NAME);
                    912:            status = fgets(my_home_document, MAX_FILE_NAME, fp);
                    913:            if (!status) {
                    914:                free(my_home_document);
                    915:                my_home_document = NULL;
                    916:            }
                    917:            fclose(fp);
                    918:        }
                    919:        if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
                    920:     }
                    921: 
                    922:     
                    923: 
1.2       timbl     924: #ifdef unix
1.12      timbl     925: 
1.10      timbl     926:     if (!my_home_document) {
                    927:        FILE * fp = NULL;
                    928:        CONST char * home =  (CONST char*)getenv("HOME");
                    929:        if (home) { 
                    930:            my_home_document = (char *)malloc(
                    931:                strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
                    932:            if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
                    933:            sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
                    934:            fp = fopen(my_home_document, "r");
                    935:        }
                    936:        
                    937:        if (!fp) {
                    938:            StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
                    939:            fp = fopen(my_home_document, "r");
                    940:        }
1.2       timbl     941:        if (fp) {
                    942:            fclose(fp);
                    943:        } else {
                    944:        if (TRACE) fprintf(stderr,
1.10      timbl     945:            "HTBrowse: No local home document ~/%s or %s\n",
                    946:            PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11      timbl     947:            free(my_home_document);
                    948:            my_home_document = NULL;
1.2       timbl     949:        }
                    950:     }
                    951: #endif
1.10      timbl     952:     ref = HTParse( my_home_document ?  my_home_document :
                    953:                                HTClientHost ? REMOTE_ADDRESS
                    954:                                : LAST_RESORT,
                    955:                    "file:",
1.2       timbl     956:                    PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10      timbl     957:     if (my_home_document) {
1.2       timbl     958:        if (TRACE) fprintf(stderr,
                    959:            "HTAccess: Using custom home page %s i.e. address %s\n",
1.10      timbl     960:            my_home_document, ref);
                    961:        free(my_home_document);
1.2       timbl     962:     }
                    963:     anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
                    964:     free(ref);
                    965:     return anchor;
1.1       timbl     966: }
1.26      frystyk   967: 
                    968: 
                    969: /*             Bind an Anchor to the request structure
                    970: **             ---------------------------------------
                    971: **
                    972: **    On Entry,
                    973: **     anchor          The child or parenet anchor to be binded
                    974: **     request         The request sturcture
                    975: **    On Exit,
                    976: **        returns    YES     Success
                    977: **                   NO      Failure 
                    978: **
                    979: **  Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
                    980: **                                             Henrik Frystyk 17/02-94
                    981: */
                    982: 
                    983: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
                    984: {
                    985:     if (!anchor) return NO;    /* No link */
                    986:     
                    987:     request->anchor  = HTAnchor_parent(anchor);
                    988:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                    989:                                        : (HTChildAnchor*) anchor;
                    990:        
1.29      frystyk   991:     return YES;
1.26      frystyk   992: } /* HTBindAnchor */
                    993: 

Webmaster