Annotation of libwww/Library/src/HTAccess.c, revision 1.34

1.1       timbl       1: /*             Access Manager                                  HTAccess.c
                      2: **             ==============
                      3: **
                      4: ** Authors
                      5: **     TBL     Tim Berners-Lee timbl@info.cern.ch
1.4       timbl       6: **     JFG     Jean-Francois Groff jfg@dxcern.cern.ch
1.1       timbl       7: **     DD      Denis DeLaRoca (310) 825-4580  <CSP1DWD@mvs.oac.ucla.edu>
                      8: ** History
                      9: **       8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
                     10: **     26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
                     11: **      6 Oct 92 Moved HTClientHost and logfile into here. TBL
                     12: **     17 Dec 92 Tn3270 added, bug fix. DD
1.2       timbl      13: **      4 Feb 93 Access registration, Search escapes bad chars TBL
1.9       timbl      14: **               PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
                     15: **     28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19      timbl      16: **        Dec 93 Bug change around, more reentrant, etc
1.2       timbl      17: ** Bugs
                     18: **     This module assumes that that the graphic object is hypertext, as it
1.9       timbl      19: **     needs to select it when it has been loaded.  A superclass needs to be
1.2       timbl      20: **     defined which accepts select and select_anchor.
1.1       timbl      21: */
                     22: 
1.9       timbl      23: #ifndef DEFAULT_WAIS_GATEWAY
1.8       timbl      24: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.9       timbl      25: #endif
1.8       timbl      26: 
1.1       timbl      27: /* Implements:
                     28: */
                     29: #include "HTAccess.h"
                     30: 
                     31: /* Uses:
                     32: */
                     33: 
                     34: #include "HTParse.h"
                     35: #include "HTUtils.h"
1.4       timbl      36: #include "HTML.h"              /* SCW */
1.2       timbl      37: 
                     38: #ifndef NO_RULES
                     39: #include "HTRules.h"
                     40: #endif
                     41: 
1.1       timbl      42: #include <stdio.h>
                     43: 
1.2       timbl      44: #include "HTList.h"
                     45: #include "HText.h"     /* See bugs above */
                     46: #include "HTAlert.h"
1.17      timbl      47: #include "HTFWriter.h" /* for cache stuff */
                     48: #include "HTTee.h"
1.2       timbl      49: 
1.1       timbl      50: /*     These flags may be set to modify the operation of this module
                     51: */
1.34    ! frystyk    52: PUBLIC char * HTCacheDir = 0;  /* Root for cached files or 0 for no cache */
        !            53: PUBLIC char * HTSaveLocallyDir = SAVE_LOCALLY_HOME_DIR;        /* Save & exe files */
1.1       timbl      54: PUBLIC char * HTClientHost = 0;        /* Name of remote login host if any */
                     55: PUBLIC FILE * logfile = 0;     /* File to which to output one-liners */
1.34    ! frystyk    56: PUBLIC BOOL HTForceReload = NO;        /* Force reload from cache or net */
1.12      timbl      57: PUBLIC BOOL HTSecure = NO;     /* Disable access for telnet users? */
1.27      luotonen   58: PUBLIC BOOL using_proxy = NO;  /* are we using a proxy gateway? */
                     59: PUBLIC BOOL HTImServer = NO;   /* cern_httpd sets this */
                     60: PUBLIC BOOL HTImProxy = NO;    /* cern_httpd as a proxy? */
1.1       timbl      61: 
1.2       timbl      62: /*     To generate other things, play with these:
                     63: */
                     64: 
1.15      timbl      65: /* PUBLIC HTFormat HTOutputFormat = NULL;      use request->output_format */
                     66: /* PUBLIC HTStream* HTOutputStream = NULL;     use request->output_stream */ 
1.1       timbl      67: 
                     68: PRIVATE HTList * protocols = NULL;   /* List of registered protocol descriptors */
                     69: 
1.24      timbl      70: /*     Superclass defn */
1.1       timbl      71: 
1.24      timbl      72: struct _HTStream {
                     73:        HTStreamClass * isa;
                     74:        /* ... */
                     75: };
                     76: 
1.15      timbl      77: /*     Create  a request structure
                     78: **     ---------------------------
                     79: */
                     80: 
                     81: PUBLIC HTRequest * HTRequest_new NOARGS
                     82: {
1.28      luotonen   83:     HTRequest * me = (HTRequest*) calloc(1, sizeof(*me));  /* zero fill */
1.15      timbl      84:     if (!me) outofmem(__FILE__, "HTRequest_new()");
                     85:     
1.20      luotonen   86:     me->conversions    = HTList_new(); /* No conversions registerd yet */
                     87:     me->output_format  = WWW_PRESENT;  /* default it to present to user */
                     88: 
1.15      timbl      89:     return me;
                     90: }
                     91: 
                     92: 
1.20      luotonen   93: /*     Delete a request structure
                     94: **     --------------------------
                     95: */
                     96: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
                     97: {
                     98:     if (req) {
1.34    ! frystyk    99:        HTFormatDelete(req->conversions);
        !           100:        HTAACleanup(req);
        !           101:        FREE(req);
1.20      luotonen  102:     }
                    103: }
                    104: 
                    105: 
1.22      luotonen  106: PRIVATE char * method_names[(int)MAX_METHODS + 1] =
                    107: {
                    108:     "INVALID-METHOD",
                    109:     "GET",
                    110:     "HEAD",
                    111:     "POST",
                    112:     "PUT",
                    113:     "DELETE",
                    114:     "CHECKOUT",
                    115:     "CHECKIN",
                    116:     "SHOWMETHOD",
                    117:     "LINK",
                    118:     "UNLINK",
                    119:     NULL
                    120: };
                    121: 
                    122: /*     Get method enum value
                    123: **     ---------------------
                    124: */
                    125: PUBLIC HTMethod HTMethod_enum ARGS1(char *, name)
                    126: {
                    127:     if (name) {
                    128:        int i;
                    129:        for (i=1; i < (int)MAX_METHODS; i++)
                    130:            if (!strcmp(name, method_names[i]))
                    131:                return (HTMethod)i;
                    132:     }
                    133:     return METHOD_INVALID;
                    134: }
                    135: 
                    136: 
                    137: /*     Get method name
                    138: **     ---------------
                    139: */
                    140: PUBLIC char * HTMethod_name ARGS1(HTMethod, method)
                    141: {
                    142:     if ((int)method > (int)METHOD_INVALID  && 
                    143:        (int)method < (int)MAX_METHODS)
                    144:        return method_names[(int)method];
                    145:     else
                    146:        return method_names[(int)METHOD_INVALID];
                    147: }
                    148: 
                    149: 
                    150: /*     Is method in a list of method names?
                    151: **     -----------------------------------
                    152: */
                    153: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod,    method,
                    154:                                  HTList *,     list)
                    155: {
                    156:     char * method_name = HTMethod_name(method);
                    157:     HTList *cur = list;
                    158:     char *item;
                    159: 
                    160:     while (NULL != (item = (char*)HTList_nextObject(cur))) {
                    161:        CTRACE(stderr, " %s", item);
                    162:        if (0==strcasecomp(item, method_name))
                    163:            return YES;
                    164:     }
                    165:     return NO; /* Not found */
                    166: }
                    167: 
                    168: 
                    169: 
                    170: 
1.20      luotonen  171: 
1.1       timbl     172: /*     Register a Protocol                             HTRegisterProtocol
                    173: **     -------------------
                    174: */
                    175: 
                    176: PUBLIC BOOL HTRegisterProtocol(protocol)
                    177:        HTProtocol * protocol;
                    178: {
                    179:     if (!protocols) protocols = HTList_new();
                    180:     HTList_addObject(protocols, protocol);
                    181:     return YES;
                    182: }
                    183: 
                    184: 
                    185: /*     Register all known protocols
                    186: **     ----------------------------
                    187: **
                    188: **     Add to or subtract from this list if you add or remove protocol modules.
                    189: **     This routine is called the first time the protocol list is needed,
                    190: **     unless any protocols are already registered, in which case it is not called.
                    191: **     Therefore the application can override this list.
                    192: **
                    193: **     Compiling with NO_INIT prevents all known protocols from being forced
                    194: **     in at link time.
                    195: */
                    196: #ifndef NO_INIT
                    197: PRIVATE void HTAccessInit NOARGS                       /* Call me once */
                    198: {
1.14      duns      199: GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1       timbl     200: #ifndef DECNET
1.14      duns      201: GLOBALREF  HTProtocol HTFTP, HTNews, HTGopher;
1.3       timbl     202: #ifdef DIRECT_WAIS
1.14      duns      203: GLOBALREF  HTProtocol HTWAIS;
1.3       timbl     204: #endif
1.2       timbl     205:     HTRegisterProtocol(&HTFTP);
                    206:     HTRegisterProtocol(&HTNews);
                    207:     HTRegisterProtocol(&HTGopher);
1.3       timbl     208: #ifdef DIRECT_WAIS
                    209:     HTRegisterProtocol(&HTWAIS);
                    210: #endif
1.1       timbl     211: #endif
                    212: 
1.2       timbl     213:     HTRegisterProtocol(&HTTP);
                    214:     HTRegisterProtocol(&HTFile);
                    215:     HTRegisterProtocol(&HTTelnet);
                    216:     HTRegisterProtocol(&HTTn3270);
                    217:     HTRegisterProtocol(&HTRlogin);
1.1       timbl     218: }
                    219: #endif
                    220: 
                    221: 
1.33      luotonen  222: 
                    223: /*                                                     override_proxy()
                    224: **
                    225: **     Check the no_proxy environment variable to get the list
                    226: **     of hosts for which proxy server is not consulted.
                    227: **
                    228: **     no_proxy is a comma- or space-separated list of machine
                    229: **     or domain names, with optional :port part.  If no :port
                    230: **     part is present, it applies to all ports on that domain.
                    231: **
                    232: **     Example:
                    233: **             no_proxy="cern.ch,some.domain:8001"
                    234: **
                    235: */
                    236: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
                    237: {
                    238:     CONST char * no_proxy = getenv("no_proxy");
                    239:     char * p = NULL;
                    240:     char * host = NULL;
                    241:     int port = 0;
                    242:     int h_len = 0;
                    243: 
                    244:     if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
                    245:        return NO;
                    246:     if (!*host) { free(host); return NO; }
                    247: 
1.34    ! frystyk   248:     if ((p = strchr(host, ':')) != NULL) {     /* Port specified */
1.33      luotonen  249:        *p++ = 0;                       /* Chop off port */
                    250:        port = atoi(p);
                    251:     }
                    252:     else {                             /* Use default port */
                    253:        char * access = HTParse(addr, "", PARSE_ACCESS);
                    254:        if (access) {
                    255:            if      (!strcmp(access,"http"))    port = 80;
                    256:            else if (!strcmp(access,"gopher"))  port = 70;
                    257:            else if (!strcmp(access,"ftp"))     port = 21;
                    258:            free(access);
                    259:        }
                    260:     }
                    261:     if (!port) port = 80;              /* Default */
                    262:     h_len = strlen(host);
                    263: 
                    264:     while (*no_proxy) {
                    265:        CONST char * end;
                    266:        CONST char * colon = NULL;
                    267:        int templ_port = 0;
                    268:        int t_len;
                    269: 
                    270:        while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
                    271:            no_proxy++;                 /* Skip whitespace and separators */
                    272: 
                    273:        end = no_proxy;
                    274:        while (*end && !WHITE(*end) && *end != ',') {   /* Find separator */
                    275:            if (*end==':') colon = end;                 /* Port number given */
                    276:            end++;
                    277:        }
                    278: 
                    279:        if (colon) {
                    280:            templ_port = atoi(colon+1);
                    281:            t_len = colon - no_proxy;
                    282:        }
                    283:        else {
                    284:            t_len = end - no_proxy;
                    285:        }
                    286: 
                    287:        if ((!templ_port || templ_port == port)  &&
                    288:            (t_len > 0  &&  t_len <= h_len  &&
                    289:             !strncmp(host + h_len - t_len, no_proxy, t_len))) {
                    290:            free(host);
                    291:            return YES;
                    292:        }
                    293:        if (*end) no_proxy = end+1;
                    294:        else break;
                    295:     }
                    296: 
                    297:     free(host);
                    298:     return NO;
                    299: }
                    300: 
                    301: 
                    302: 
1.2       timbl     303: /*             Find physical name and access protocol
                    304: **             --------------------------------------
1.1       timbl     305: **
                    306: **
                    307: ** On entry,
                    308: **     addr            must point to the fully qualified hypertext reference.
                    309: **     anchor          a pareent anchor with whose address is addr
                    310: **
                    311: ** On exit,
1.2       timbl     312: **     returns         HT_NO_ACCESS            Error has occured.
                    313: **                     HT_OK                   Success
1.1       timbl     314: **
                    315: */
1.21      luotonen  316: PRIVATE int get_physical ARGS1(HTRequest *, req)
                    317: {    
1.1       timbl     318:     char * access=0;   /* Name of access method */
1.21      luotonen  319:     char * addr = HTAnchor_address((HTAnchor*)req->anchor);    /* free me */
1.27      luotonen  320: 
1.2       timbl     321: #ifndef NO_RULES
1.27      luotonen  322:     if (HTImServer)    /* cern_httpd has already done its own translations */
                    323:        HTAnchor_setPhysical(req->anchor, addr);
1.21      luotonen  324:     else {
1.27      luotonen  325:        char * physical = HTTranslate(addr);
1.21      luotonen  326:        if (!physical) {
                    327:            free(addr);
                    328:            return HT_FORBIDDEN;
                    329:        }
                    330:        HTAnchor_setPhysical(req->anchor, physical);
                    331:        free(physical);                 /* free our copy */
1.2       timbl     332:     }
                    333: #else
1.21      luotonen  334:     HTAnchor_setPhysical(req->anchor, addr);
1.2       timbl     335: #endif
                    336: 
1.21      luotonen  337:     access =  HTParse(HTAnchor_physical(req->anchor),
1.27      luotonen  338:                      "file:", PARSE_ACCESS);
1.1       timbl     339: 
                    340: /*     Check whether gateway access has been set up for this
1.8       timbl     341: **
                    342: **     This function can be replaced by the rule system above.
1.1       timbl     343: */
1.8       timbl     344: #define USE_GATEWAYS
1.1       timbl     345: #ifdef USE_GATEWAYS
1.33      luotonen  346:     if (!override_proxy(addr)) {
1.27      luotonen  347:        char * gateway_parameter, *gateway, *proxy;
                    348: 
1.2       timbl     349:        gateway_parameter = (char *)malloc(strlen(access)+20);
                    350:        if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27      luotonen  351: 
                    352:        /* search for proxy gateways */
1.2       timbl     353:        strcpy(gateway_parameter, "WWW_");
                    354:        strcat(gateway_parameter, access);
                    355:        strcat(gateway_parameter, "_GATEWAY");
                    356:        gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27      luotonen  357: 
                    358:        /* search for proxy servers */
                    359:        strcpy(gateway_parameter, access);
                    360:        strcat(gateway_parameter, "_proxy");
                    361:        proxy = (char *)getenv(gateway_parameter);
                    362: 
1.2       timbl     363:        free(gateway_parameter);
1.27      luotonen  364: 
                    365:        if (TRACE && gateway)
                    366:            fprintf(stderr,"Gateway found: %s\n",gateway);
                    367:        if (TRACE && proxy)
                    368:            fprintf(stderr,"Proxy server found: %s\n",proxy);
                    369: 
1.8       timbl     370: #ifndef DIRECT_WAIS
1.9       timbl     371:        if (!gateway && 0==strcmp(access, "wais")) {
1.8       timbl     372:            gateway = DEFAULT_WAIS_GATEWAY;
                    373:        }
                    374: #endif
1.27      luotonen  375:        /* make sure the using_proxy variable is false */
                    376:        using_proxy = NO;
                    377: 
                    378:        /* proxy servers have precedence over gateway servers */
                    379:        if (proxy) {
                    380:            char * gatewayed=0;
                    381: 
                    382:             StrAllocCopy(gatewayed,proxy);
                    383:            StrAllocCat(gatewayed,addr);
                    384:            using_proxy = YES;
                    385:            HTAnchor_setPhysical(req->anchor, gatewayed);
                    386:            free(gatewayed);
                    387:            free(access);
                    388: 
                    389:            access =  HTParse(HTAnchor_physical(req->anchor),
                    390:                              "http:", PARSE_ACCESS);
                    391:        } else if (gateway) {
1.9       timbl     392:            char * path = HTParse(addr, "",
                    393:                PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
                    394:                /* Chop leading / off to make host into part of path */
                    395:            char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
                    396:            free(path);
1.21      luotonen  397:             HTAnchor_setPhysical(req->anchor, gatewayed);
1.9       timbl     398:            free(gatewayed);
1.2       timbl     399:            free(access);
1.9       timbl     400:            
1.21      luotonen  401:            access =  HTParse(HTAnchor_physical(req->anchor),
1.8       timbl     402:                "http:", PARSE_ACCESS);
1.2       timbl     403:        }
                    404:     }
1.1       timbl     405: #endif
                    406: 
1.19      timbl     407:     free(addr);
1.1       timbl     408: 
                    409: 
                    410: /*     Search registered protocols to find suitable one
                    411: */
                    412:     {
1.20      luotonen  413:        HTList *cur;
                    414:        HTProtocol *p;
1.1       timbl     415: #ifndef NO_INIT
1.2       timbl     416:         if (!protocols) HTAccessInit();
1.1       timbl     417: #endif
1.20      luotonen  418:        cur = protocols;
                    419:        while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.2       timbl     420:            if (strcmp(p->name, access)==0) {
1.21      luotonen  421:                HTAnchor_setProtocol(req->anchor, p);
1.2       timbl     422:                free(access);
                    423:                return (HT_OK);
1.1       timbl     424:            }
                    425:        }
                    426:     }
                    427: 
                    428:     free(access);
1.2       timbl     429:     return HT_NO_ACCESS;
1.1       timbl     430: }
                    431: 
                    432: 
                    433: /*             Load a document
                    434: **             ---------------
                    435: **
1.2       timbl     436: **     This is an internal routine, which has an address AND a matching
                    437: **     anchor.  (The public routines are called with one OR the other.)
                    438: **
                    439: ** On entry,
                    440: **     addr            must point to the fully qualified hypertext reference.
1.15      timbl     441: **     request->
                    442: **         anchor              a parent anchor with whose address is addr
                    443: **         output_format       valid
                    444: **         output_stream       valid on NULL
1.2       timbl     445: **
                    446: ** On exit,
                    447: **     returns         <0              Error has occured.
                    448: **                     HT_LOADED       Success
                    449: **                     HT_NO_DATA      Success, but no document loaded.
1.8       timbl     450: **                                     (telnet sesssion started etc)
1.2       timbl     451: **
                    452: */
1.15      timbl     453: PRIVATE int HTLoad ARGS2(
1.19      timbl     454:        CONST char *,           addr,   /* not used */
1.15      timbl     455:        HTRequest *,            request)
1.2       timbl     456: {
1.25      frystyk   457:     char       *arg = NULL;
                    458:     HTProtocol *p;
                    459:     int        status;
                    460: 
1.22      luotonen  461:     if (request->method == METHOD_INVALID)
                    462:        request->method = METHOD_GET;
1.21      luotonen  463:     status = get_physical(request);
1.2       timbl     464:     if (status == HT_FORBIDDEN) {
1.21      luotonen  465:         return HTLoadError(request, 500,
                    466:                           "Access forbidden by rule");
1.2       timbl     467:     }
                    468:     if (status < 0) return status;     /* Can't resolve or forbidden */
1.25      frystyk   469: 
                    470:     if(!(arg = HTAnchor_physical(request->anchor)) || !*arg) 
                    471:        return (-1);
1.27      luotonen  472: 
1.15      timbl     473:     p = HTAnchor_protocol(request->anchor);
1.17      timbl     474:     return (*(p->load))(request);
1.2       timbl     475: }
                    476: 
                    477: 
                    478: /*             Get a save stream for a document
                    479: **             --------------------------------
                    480: */
1.19      timbl     481: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
1.15      timbl     482: {
                    483:     HTProtocol * p;
1.19      timbl     484:     int status;
1.22      luotonen  485:     request->method = METHOD_PUT;
1.21      luotonen  486:     status = get_physical(request);
1.19      timbl     487:     if (status == HT_FORBIDDEN) {
1.21      luotonen  488:         HTLoadError(request, 500,
                    489:                    "Access forbidden by rule");
1.19      timbl     490:        return NULL;    /* should return error status? */
                    491:     }
                    492:     if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
                    493:     
1.15      timbl     494:     p = HTAnchor_protocol(request->anchor);
1.2       timbl     495:     if (!p) return NULL;
                    496:     
1.15      timbl     497:     return (*p->saveStream)(request);
1.2       timbl     498:     
                    499: }
                    500: 
                    501: 
                    502: /*             Load a document - with logging etc
                    503: **             ----------------------------------
                    504: **
                    505: **     - Checks or documents already loaded
                    506: **     - Logs the access
                    507: **     - Allows stdin filter option
                    508: **     - Trace ouput and error messages
                    509: **
1.1       timbl     510: **    On Entry,
1.19      timbl     511: **        request->anchor      valid for of the document to be accessed.
                    512: **      request->childAnchor   optional anchor within doc to be selected
                    513: **
1.2       timbl     514: **        filter            if YES, treat stdin as HTML
1.1       timbl     515: **
1.15      timbl     516: **       request->anchor   is the node_anchor for the document
                    517: **       request->output_format is valid
                    518: **
1.1       timbl     519: **    On Exit,
                    520: **        returns    YES     Success in opening document
                    521: **                   NO      Failure 
                    522: **
                    523: */
                    524: 
1.19      timbl     525: PRIVATE BOOL HTLoadDocument ARGS1(HTRequest *,         request)
1.1       timbl     526: 
                    527: {
                    528:     int                status;
                    529:     HText *    text;
1.19      timbl     530:     char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
                    531:     
1.1       timbl     532:     if (TRACE) fprintf (stderr,
                    533:       "HTAccess: loading document %s\n", full_address);
                    534: 
1.18      timbl     535:     request->using_cache = NULL;
                    536:     
1.15      timbl     537:     if (!request->output_format) request->output_format = WWW_PRESENT;
1.25      frystyk   538: 
1.31      frystyk   539:     if (!HTForceReload && (text=(HText *)HTAnchor_document(request->anchor)))
1.15      timbl     540:     {  /* Already loaded */
1.1       timbl     541:         if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
1.19      timbl     542:        if (request->childAnchor) {
                    543:            HText_selectAnchor(text, request->childAnchor);
                    544:        } else {
                    545:            HText_select(text); 
                    546:        }
                    547:        free(full_address);
1.1       timbl     548:        return YES;
                    549:     }
1.17      timbl     550:     
1.34    ! frystyk   551:     /* Check the Cache */
        !           552:     /* Caching is ONLY done if (char*) HTCacheDir is set. Henrik 09/03-94 */
1.17      timbl     553:     /* Bug: for each format, we only check whether it is ok, we
                    554:        don't check them all and chose the best */
1.34    ! frystyk   555:     if (HTCacheDir && request->anchor->cacheItems) {
1.17      timbl     556:         HTList * list = request->anchor->cacheItems;
1.20      luotonen  557:        HTList * cur = list;
                    558:        HTCacheItem * item;
                    559: 
                    560:        while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
1.18      timbl     561:            HTStream * s;
                    562:            
                    563:            request->using_cache = item;
                    564:            
                    565:            s = HTStreamStack(item->format, request);
1.17      timbl     566:            if (s) {            /* format was suitable */
                    567:                FILE * fp = fopen(item->filename, "r");
1.18      timbl     568:                if (TRACE) fprintf(stderr, "Cache: HIT file %s for %s\n",
1.20      luotonen  569:                                   item->filename, 
                    570:                                   full_address);
1.17      timbl     571:                if (fp) {
                    572:                    HTFileCopy(fp, s);
1.24      timbl     573:                    (*s->isa->free)(s); /* close up pipeline */
1.17      timbl     574:                    fclose(fp);
1.19      timbl     575:                    free(full_address);
1.17      timbl     576:                    return YES;
                    577:                } else {
                    578:                    fprintf(stderr, "***** Can't read cache file %s !\n",
1.20      luotonen  579:                            item->filename);
1.17      timbl     580:                } /* file open ok */
                    581:            } /* stream ok */
                    582:        } /* next cache item */
                    583:     } /* if cache available for this anchor */
1.1       timbl     584:     
1.15      timbl     585:     status = HTLoad(full_address, request);
1.2       timbl     586: 
                    587:     
1.1       timbl     588: /*     Log the access if necessary
                    589: */
                    590:     if (logfile) {
                    591:        time_t theTime;
                    592:        time(&theTime);
                    593:        fprintf(logfile, "%24.24s %s %s %s\n",
                    594:            ctime(&theTime),
                    595:            HTClientHost ? HTClientHost : "local",
                    596:            status<0 ? "FAIL" : "GET",
                    597:            full_address);
                    598:        fflush(logfile);        /* Actually update it on disk */
                    599:        if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
                    600:            ctime(&theTime),
                    601:            HTClientHost ? HTClientHost : "local",
                    602:            status<0 ? "FAIL" : "GET",
                    603:            full_address);
                    604:     }
                    605:     
                    606: 
                    607:     if (status == HT_LOADED) {
                    608:        if (TRACE) {
                    609:            fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
                    610:            full_address);
                    611:        }
1.19      timbl     612:        free(full_address);
1.1       timbl     613:        return YES;
                    614:     }
                    615:     
                    616:     if (status == HT_NO_DATA) {
                    617:        if (TRACE) {
                    618:            fprintf(stderr, 
                    619:            "HTAccess: `%s' has been accessed, No data left.\n",
                    620:            full_address);
                    621:        }
1.19      timbl     622:        free(full_address);
1.1       timbl     623:        return NO;
                    624:     }
                    625:     
1.34    ! frystyk   626:     /* Bug fix thanks to Lou Montulli. Henrik 10/03-94 */
        !           627:     if (status<=0) {                 /* Failure in accessing a document */
1.1       timbl     628: #ifdef CURSES
                    629:         user_message("Can't access `%s'", full_address);
                    630: #else
1.5       timbl     631:        if (TRACE) fprintf(stderr, 
                    632:                "HTAccess: Can't access `%s'\n", full_address);
1.1       timbl     633: #endif
1.32      frystyk   634:        /* This is done in the specific load procedures... Henrik 07/03-94 */
                    635:        /* HTLoadError(request, 500, "Unable to access document."); */
1.19      timbl     636:        free(full_address);
1.1       timbl     637:        return NO;
                    638:     }
1.9       timbl     639:  
                    640:     /* If you get this, then please find which routine is returning
                    641:        a positive unrecognised error code! */
                    642:  
1.1       timbl     643:     fprintf(stderr,
1.2       timbl     644:     "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.9       timbl     645:     fprintf(stderr,
1.19      timbl     646:     "**** HTAccess: Internal software error. Please mail www-bug@info.cern.ch quoting the version number of this software and the URL: %s!\n",
                    647:        full_address);
                    648:     free(full_address);
                    649:    
1.1       timbl     650:     exit(-6996);
1.20      luotonen  651:     return NO; /* For gcc :-( */
1.2       timbl     652: } /* HTLoadDocument */
1.1       timbl     653: 
                    654: 
                    655: 
                    656: /*             Load a document from absolute name
                    657: **             ---------------
                    658: **
                    659: **    On Entry,
                    660: **        addr     The absolute address of the document to be accessed.
                    661: **        filter   if YES, treat document as HTML
                    662: **
                    663: **    On Exit,
                    664: **        returns    YES     Success in opening document
                    665: **                   NO      Failure 
                    666: **
                    667: **
                    668: */
                    669: 
1.15      timbl     670: PUBLIC BOOL HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2       timbl     671: {
1.19      timbl     672:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    673:    request->anchor = HTAnchor_parent(anchor);
                    674:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
                    675:                        NULL : (HTChildAnchor*) anchor;
                    676:    return HTLoadDocument(request);
1.2       timbl     677: }
                    678: 
                    679: 
                    680: /*             Load a document from absolute name to stream
                    681: **             --------------------------------------------
                    682: **
                    683: **    On Entry,
                    684: **        addr     The absolute address of the document to be accessed.
1.15      timbl     685: **        request->output_stream     if non-NULL, send data down this stream
1.2       timbl     686: **
                    687: **    On Exit,
                    688: **        returns    YES     Success in opening document
                    689: **                   NO      Failure 
                    690: **
                    691: **
                    692: */
                    693: 
                    694: PUBLIC BOOL HTLoadToStream ARGS3(
                    695:                CONST char *,   addr,
                    696:                BOOL,           filter,
1.15      timbl     697:                HTRequest*,     request)
1.1       timbl     698: {
1.19      timbl     699:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    700:    request->anchor = HTAnchor_parent(anchor);
                    701:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
                    702:        (HTChildAnchor*) anchor;
1.15      timbl     703:     request->output_stream = request->output_stream;
1.19      timbl     704:     return HTLoadDocument(request);
1.1       timbl     705: }
                    706: 
                    707: 
1.2       timbl     708: 
                    709: 
1.1       timbl     710: /*             Load a document from relative name
                    711: **             ---------------
                    712: **
                    713: **    On Entry,
1.2       timbl     714: **        relative_name     The relative address of the document
                    715: **                         to be accessed.
1.1       timbl     716: **
                    717: **    On Exit,
                    718: **        returns    YES     Success in opening document
                    719: **                   NO      Failure 
                    720: **
                    721: **
                    722: */
                    723: 
1.15      timbl     724: PUBLIC BOOL HTLoadRelative ARGS3(
1.2       timbl     725:                CONST char *,           relative_name,
1.15      timbl     726:                HTParentAnchor *,       here,
1.20      luotonen  727:                HTRequest *,            request)
1.1       timbl     728: {
                    729:     char *             full_address = 0;
                    730:     BOOL                       result;
                    731:     char *             mycopy = 0;
                    732:     char *             stripped = 0;
                    733:     char *             current_address =
1.2       timbl     734:                                HTAnchor_address((HTAnchor*)here);
1.1       timbl     735: 
                    736:     StrAllocCopy(mycopy, relative_name);
                    737: 
                    738:     stripped = HTStrip(mycopy);
                    739:     full_address = HTParse(stripped,
                    740:                   current_address,
                    741:                   PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15      timbl     742:     result = HTLoadAbsolute(full_address, request);
1.1       timbl     743:     free(full_address);
                    744:     free(current_address);
                    745:     free(mycopy);  /* Memory leak fixed 10/7/92 -- JFG */
                    746:     return result;
                    747: }
                    748: 
                    749: 
                    750: /*             Load if necessary, and select an anchor
                    751: **             --------------------------------------
                    752: **
                    753: **    On Entry,
                    754: **        destination              The child or parenet anchor to be loaded.
                    755: **
                    756: **    On Exit,
                    757: **        returns    YES     Success
                    758: **                   NO      Failure 
                    759: **
                    760: */
                    761: 
1.15      timbl     762: PUBLIC BOOL HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1       timbl     763: {
1.15      timbl     764:     if (!anchor) return NO;    /* No link */
1.1       timbl     765:     
1.15      timbl     766:     request->anchor  = HTAnchor_parent(anchor);
1.19      timbl     767:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                    768:                                        : (HTChildAnchor*) anchor;
1.1       timbl     769:     
1.19      timbl     770:     return HTLoadDocument(request) ? YES : NO;
1.1       timbl     771:        
                    772: } /* HTLoadAnchor */
                    773: 
                    774: 
                    775: /*             Search
                    776: **             ------
                    777: **  Performs a keyword search on word given by the user. Adds the keyword to 
                    778: **  the end of the current address and attempts to open the new address.
                    779: **
                    780: **  On Entry,
                    781: **       *keywords     space-separated keyword list or similar search list
1.2       timbl     782: **     here            is anchor search is to be done on.
1.1       timbl     783: */
                    784: 
1.2       timbl     785: PRIVATE char hex(i)
                    786:     int i;
                    787: {
1.13      timbl     788:     char * hexchars = "0123456789ABCDEF";
                    789:     return hexchars[i];
1.2       timbl     790: }
1.1       timbl     791: 
1.15      timbl     792: PUBLIC BOOL HTSearch ARGS3(
1.2       timbl     793:        CONST char *,           keywords,
1.15      timbl     794:        HTParentAnchor *,       here,
                    795:        HTRequest *,            request)
1.1       timbl     796: {
1.2       timbl     797: 
                    798: #define acceptable \
                    799: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
                    800: 
                    801:     char *q, *u;
                    802:     CONST char * p, *s, *e;            /* Pointers into keywords */
                    803:     char * address = HTAnchor_address((HTAnchor*)here);
1.1       timbl     804:     BOOL result;
1.2       timbl     805:     char * escaped = malloc(strlen(keywords)*3+1);
                    806: 
1.29      frystyk   807:     /* static CONST BOOL isAcceptable[96] = */
                    808:     /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30      luotonen  809:     static BOOL isAcceptable[96] =
1.2       timbl     810:     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
                    811:     {    0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,      /* 2x   !"#$%&'()*+,-./  */
                    812:          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,      /* 3x  0123456789:;<=>?  */
                    813:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 4x  @ABCDEFGHIJKLMNO  */
                    814:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /* 5X  PQRSTUVWXYZ[\]^_  */
                    815:         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 6x  `abcdefghijklmno  */
                    816:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };     /* 7X  pqrstuvwxyz{\}~  DEL */
                    817: 
                    818:     if (escaped == NULL) outofmem(__FILE__, "HTSearch");
                    819:     
1.29      frystyk   820: /* Convert spaces to + and hex escape unacceptable characters */
1.2       timbl     821: 
1.29      frystyk   822:     for(s=keywords; *s && WHITE(*s); s++); /*scan */    /* Skip white space */
                    823:     for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--);     /* Skip trailers */
                    824:     for(q=escaped, p=s; p<e; p++) {                  /* scan stripped field */
1.2       timbl     825:         int c = (int)TOASCII(*p);
                    826:         if (WHITE(*p)) {
                    827:            *q++ = '+';
1.29      frystyk   828:        } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13      timbl     829:            *q++ = *p;                  /* 930706 TBL for MVS bug */
1.2       timbl     830:        } else {
                    831:            *q++ = '%';
                    832:            *q++ = hex(c / 16);
                    833:            *q++ = hex(c % 16);
                    834:        }
                    835:     } /* Loop over string */
1.1       timbl     836:     
1.2       timbl     837:     *q=0;
                    838:                                /* terminate escaped sctring */
                    839:     u=strchr(address, '?');            /* Find old search string */
                    840:     if (u) *u = 0;                             /* Chop old search off */
1.1       timbl     841: 
                    842:     StrAllocCat(address, "?");
1.2       timbl     843:     StrAllocCat(address, escaped);
                    844:     free(escaped);
1.15      timbl     845:     result = HTLoadRelative(address, here, request);
1.1       timbl     846:     free(address);
1.2       timbl     847:     
1.1       timbl     848:     return result;
1.2       timbl     849: }
                    850: 
                    851: 
                    852: /*             Search Given Indexname
                    853: **             ------
                    854: **  Performs a keyword search on word given by the user. Adds the keyword to 
                    855: **  the end of the current address and attempts to open the new address.
                    856: **
                    857: **  On Entry,
                    858: **       *keywords     space-separated keyword list or similar search list
                    859: **     *addres         is name of object search is to be done on.
                    860: */
                    861: 
1.15      timbl     862: PUBLIC BOOL HTSearchAbsolute ARGS3(
1.2       timbl     863:        CONST char *,   keywords,
1.15      timbl     864:        CONST char *,   indexname,
                    865:        HTRequest *,    request)
1.2       timbl     866: {
                    867:     HTParentAnchor * anchor =
                    868:        (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15      timbl     869:     return HTSearch(keywords, anchor, request);
1.2       timbl     870: }
                    871: 
                    872: 
                    873: /*             Generate the anchor for the home page
                    874: **             -------------------------------------
                    875: **
                    876: **     As it involves file access, this should only be done once
                    877: **     when the program first runs.
1.10      timbl     878: **     This is a default algorithm -- browser don't HAVE to use this.
                    879: **     But consistency betwen browsers is STRONGLY recommended!
1.2       timbl     880: **
1.10      timbl     881: **     Priority order is:
                    882: **
                    883: **             1       WWW_HOME environment variable (logical name, etc)
                    884: **             2       ~/WWW/default.html
                    885: **             3       /usr/local/bin/default.html
                    886: **             4       http://info.cern.ch/default.html
                    887: **
1.2       timbl     888: */
                    889: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
                    890: {
1.12      timbl     891:     char * my_home_document = NULL;
                    892:     char * home = (char *)getenv(LOGICAL_DEFAULT);
1.2       timbl     893:     char * ref;
                    894:     HTParentAnchor * anchor;
1.1       timbl     895:     
1.12      timbl     896:     if (home) {
                    897:         StrAllocCopy(my_home_document, home);
                    898:     
                    899: /*     Someone telnets in, they get a special home.
                    900: */
                    901: #define MAX_FILE_NAME 1024                                     /* @@@ */
                    902:     } else  if (HTClientHost) {                        /* Telnet server */
                    903:        FILE * fp = fopen(REMOTE_POINTER, "r");
                    904:        char * status;
                    905:        if (fp) {
                    906:            my_home_document = (char*) malloc(MAX_FILE_NAME);
                    907:            status = fgets(my_home_document, MAX_FILE_NAME, fp);
                    908:            if (!status) {
                    909:                free(my_home_document);
                    910:                my_home_document = NULL;
                    911:            }
                    912:            fclose(fp);
                    913:        }
                    914:        if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
                    915:     }
                    916: 
                    917:     
                    918: 
1.2       timbl     919: #ifdef unix
1.12      timbl     920: 
1.10      timbl     921:     if (!my_home_document) {
                    922:        FILE * fp = NULL;
                    923:        CONST char * home =  (CONST char*)getenv("HOME");
                    924:        if (home) { 
                    925:            my_home_document = (char *)malloc(
                    926:                strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
                    927:            if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
                    928:            sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
                    929:            fp = fopen(my_home_document, "r");
                    930:        }
                    931:        
                    932:        if (!fp) {
                    933:            StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
                    934:            fp = fopen(my_home_document, "r");
                    935:        }
1.2       timbl     936:        if (fp) {
                    937:            fclose(fp);
                    938:        } else {
                    939:        if (TRACE) fprintf(stderr,
1.10      timbl     940:            "HTBrowse: No local home document ~/%s or %s\n",
                    941:            PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11      timbl     942:            free(my_home_document);
                    943:            my_home_document = NULL;
1.2       timbl     944:        }
                    945:     }
                    946: #endif
1.10      timbl     947:     ref = HTParse( my_home_document ?  my_home_document :
                    948:                                HTClientHost ? REMOTE_ADDRESS
                    949:                                : LAST_RESORT,
                    950:                    "file:",
1.2       timbl     951:                    PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10      timbl     952:     if (my_home_document) {
1.2       timbl     953:        if (TRACE) fprintf(stderr,
                    954:            "HTAccess: Using custom home page %s i.e. address %s\n",
1.10      timbl     955:            my_home_document, ref);
                    956:        free(my_home_document);
1.2       timbl     957:     }
                    958:     anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
                    959:     free(ref);
                    960:     return anchor;
1.1       timbl     961: }
1.26      frystyk   962: 
                    963: 
                    964: /*             Bind an Anchor to the request structure
                    965: **             ---------------------------------------
                    966: **
                    967: **    On Entry,
                    968: **     anchor          The child or parenet anchor to be binded
                    969: **     request         The request sturcture
                    970: **    On Exit,
                    971: **        returns    YES     Success
                    972: **                   NO      Failure 
                    973: **
                    974: **  Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
                    975: **                                             Henrik Frystyk 17/02-94
                    976: */
                    977: 
                    978: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
                    979: {
                    980:     if (!anchor) return NO;    /* No link */
                    981:     
                    982:     request->anchor  = HTAnchor_parent(anchor);
                    983:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                    984:                                        : (HTChildAnchor*) anchor;
                    985:        
1.29      frystyk   986:     return YES;
1.26      frystyk   987: } /* HTBindAnchor */
                    988: 

Webmaster