Annotation of libwww/Library/src/HTAccess.c, revision 1.43

1.1       timbl       1: /*             Access Manager                                  HTAccess.c
                      2: **             ==============
                      3: **
                      4: ** Authors
                      5: **     TBL     Tim Berners-Lee timbl@info.cern.ch
1.4       timbl       6: **     JFG     Jean-Francois Groff jfg@dxcern.cern.ch
1.1       timbl       7: **     DD      Denis DeLaRoca (310) 825-4580  <CSP1DWD@mvs.oac.ucla.edu>
                      8: ** History
                      9: **       8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
                     10: **     26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42      frystyk    11: **      6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1       timbl      12: **     17 Dec 92 Tn3270 added, bug fix. DD
1.2       timbl      13: **      4 Feb 93 Access registration, Search escapes bad chars TBL
1.9       timbl      14: **               PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
                     15: **     28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19      timbl      16: **        Dec 93 Bug change around, more reentrant, etc
1.42      frystyk    17: **     09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.2       timbl      18: ** Bugs
                     19: **     This module assumes that that the graphic object is hypertext, as it
1.9       timbl      20: **     needs to select it when it has been loaded.  A superclass needs to be
1.2       timbl      21: **     defined which accepts select and select_anchor.
1.1       timbl      22: */
                     23: 
1.9       timbl      24: #ifndef DEFAULT_WAIS_GATEWAY
1.8       timbl      25: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.9       timbl      26: #endif
1.8       timbl      27: 
1.1       timbl      28: /* Implements:
                     29: */
                     30: #include "HTAccess.h"
                     31: 
                     32: /* Uses:
                     33: */
                     34: 
                     35: #include "HTParse.h"
                     36: #include "HTUtils.h"
1.4       timbl      37: #include "HTML.h"              /* SCW */
1.2       timbl      38: 
                     39: #ifndef NO_RULES
                     40: #include "HTRules.h"
                     41: #endif
                     42: 
1.1       timbl      43: #include <stdio.h>
                     44: 
1.2       timbl      45: #include "HTList.h"
                     46: #include "HText.h"     /* See bugs above */
                     47: #include "HTAlert.h"
1.17      timbl      48: #include "HTFWriter.h" /* for cache stuff */
                     49: #include "HTTee.h"
1.2       timbl      50: 
1.1       timbl      51: /*     These flags may be set to modify the operation of this module
                     52: */
1.34      frystyk    53: PUBLIC char * HTCacheDir = 0;  /* Root for cached files or 0 for no cache */
                     54: PUBLIC char * HTSaveLocallyDir = SAVE_LOCALLY_HOME_DIR;        /* Save & exe files */
1.1       timbl      55: PUBLIC char * HTClientHost = 0;        /* Name of remote login host if any */
1.42      frystyk    56: PUBLIC FILE * HTlogfile = 0;   /* File to which to output one-liners */
1.41      luotonen   57: 
1.34      frystyk    58: PUBLIC BOOL HTForceReload = NO;        /* Force reload from cache or net */
1.12      timbl      59: PUBLIC BOOL HTSecure = NO;     /* Disable access for telnet users? */
1.27      luotonen   60: PUBLIC BOOL using_proxy = NO;  /* are we using a proxy gateway? */
1.43    ! luotonen   61: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.27      luotonen   62: PUBLIC BOOL HTImProxy = NO;    /* cern_httpd as a proxy? */
1.1       timbl      63: 
1.43    ! luotonen   64: 
1.2       timbl      65: /*     To generate other things, play with these:
                     66: */
                     67: 
1.15      timbl      68: /* PUBLIC HTFormat HTOutputFormat = NULL;      use request->output_format */
                     69: /* PUBLIC HTStream* HTOutputStream = NULL;     use request->output_stream */ 
1.1       timbl      70: 
                     71: PRIVATE HTList * protocols = NULL;   /* List of registered protocol descriptors */
                     72: 
1.24      timbl      73: /*     Superclass defn */
1.1       timbl      74: 
1.24      timbl      75: struct _HTStream {
                     76:        HTStreamClass * isa;
                     77:        /* ... */
                     78: };
                     79: 
1.15      timbl      80: /*     Create  a request structure
                     81: **     ---------------------------
                     82: */
                     83: 
                     84: PUBLIC HTRequest * HTRequest_new NOARGS
                     85: {
1.28      luotonen   86:     HTRequest * me = (HTRequest*) calloc(1, sizeof(*me));  /* zero fill */
1.15      timbl      87:     if (!me) outofmem(__FILE__, "HTRequest_new()");
                     88:     
1.20      luotonen   89:     me->conversions    = HTList_new(); /* No conversions registerd yet */
                     90:     me->output_format  = WWW_PRESENT;  /* default it to present to user */
                     91: 
1.15      timbl      92:     return me;
                     93: }
                     94: 
                     95: 
1.20      luotonen   96: /*     Delete a request structure
                     97: **     --------------------------
                     98: */
                     99: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
                    100: {
                    101:     if (req) {
1.34      frystyk   102:        HTFormatDelete(req->conversions);
                    103:        HTAACleanup(req);
1.37      luotonen  104:        FREE(req->from);
1.34      frystyk   105:        FREE(req);
1.20      luotonen  106:     }
                    107: }
                    108: 
                    109: 
1.22      luotonen  110: PRIVATE char * method_names[(int)MAX_METHODS + 1] =
                    111: {
                    112:     "INVALID-METHOD",
                    113:     "GET",
                    114:     "HEAD",
                    115:     "POST",
                    116:     "PUT",
                    117:     "DELETE",
                    118:     "CHECKOUT",
                    119:     "CHECKIN",
                    120:     "SHOWMETHOD",
                    121:     "LINK",
                    122:     "UNLINK",
                    123:     NULL
                    124: };
                    125: 
                    126: /*     Get method enum value
                    127: **     ---------------------
                    128: */
                    129: PUBLIC HTMethod HTMethod_enum ARGS1(char *, name)
                    130: {
                    131:     if (name) {
                    132:        int i;
                    133:        for (i=1; i < (int)MAX_METHODS; i++)
                    134:            if (!strcmp(name, method_names[i]))
                    135:                return (HTMethod)i;
                    136:     }
                    137:     return METHOD_INVALID;
                    138: }
                    139: 
                    140: 
                    141: /*     Get method name
                    142: **     ---------------
                    143: */
                    144: PUBLIC char * HTMethod_name ARGS1(HTMethod, method)
                    145: {
                    146:     if ((int)method > (int)METHOD_INVALID  && 
                    147:        (int)method < (int)MAX_METHODS)
                    148:        return method_names[(int)method];
                    149:     else
                    150:        return method_names[(int)METHOD_INVALID];
                    151: }
                    152: 
                    153: 
                    154: /*     Is method in a list of method names?
                    155: **     -----------------------------------
                    156: */
                    157: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod,    method,
                    158:                                  HTList *,     list)
                    159: {
                    160:     char * method_name = HTMethod_name(method);
                    161:     HTList *cur = list;
                    162:     char *item;
                    163: 
                    164:     while (NULL != (item = (char*)HTList_nextObject(cur))) {
                    165:        CTRACE(stderr, " %s", item);
                    166:        if (0==strcasecomp(item, method_name))
                    167:            return YES;
                    168:     }
                    169:     return NO; /* Not found */
                    170: }
                    171: 
                    172: 
                    173: 
                    174: 
1.20      luotonen  175: 
1.1       timbl     176: /*     Register a Protocol                             HTRegisterProtocol
                    177: **     -------------------
                    178: */
                    179: 
                    180: PUBLIC BOOL HTRegisterProtocol(protocol)
                    181:        HTProtocol * protocol;
                    182: {
                    183:     if (!protocols) protocols = HTList_new();
                    184:     HTList_addObject(protocols, protocol);
                    185:     return YES;
                    186: }
                    187: 
                    188: 
                    189: /*     Register all known protocols
                    190: **     ----------------------------
                    191: **
                    192: **     Add to or subtract from this list if you add or remove protocol modules.
                    193: **     This routine is called the first time the protocol list is needed,
                    194: **     unless any protocols are already registered, in which case it is not called.
                    195: **     Therefore the application can override this list.
                    196: **
                    197: **     Compiling with NO_INIT prevents all known protocols from being forced
                    198: **     in at link time.
                    199: */
                    200: #ifndef NO_INIT
                    201: PRIVATE void HTAccessInit NOARGS                       /* Call me once */
                    202: {
1.14      duns      203: GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1       timbl     204: #ifndef DECNET
1.14      duns      205: GLOBALREF  HTProtocol HTFTP, HTNews, HTGopher;
1.42      frystyk   206: 
                    207: /* This is the replacement when HTWhoIs gets a complete protocol module */
                    208: /* GLOBALREF  HTProtocol HTFTP, HTNews, HTGopher, HTWhoIs;             */
                    209: /* -------------------------------------------------------------------- */
                    210: 
1.3       timbl     211: #ifdef DIRECT_WAIS
1.14      duns      212: GLOBALREF  HTProtocol HTWAIS;
1.3       timbl     213: #endif
1.2       timbl     214:     HTRegisterProtocol(&HTFTP);
                    215:     HTRegisterProtocol(&HTNews);
                    216:     HTRegisterProtocol(&HTGopher);
1.42      frystyk   217: 
                    218: /* This should be added when HTWhoIs gets a complete protocol module */
                    219: /*  HTRegisterProtocol(&HTWhoIs);                                   */
                    220: /* ----------------------------------------------------------------- */
1.3       timbl     221: #ifdef DIRECT_WAIS
                    222:     HTRegisterProtocol(&HTWAIS);
                    223: #endif
1.1       timbl     224: #endif
                    225: 
1.2       timbl     226:     HTRegisterProtocol(&HTTP);
                    227:     HTRegisterProtocol(&HTFile);
                    228:     HTRegisterProtocol(&HTTelnet);
                    229:     HTRegisterProtocol(&HTTn3270);
                    230:     HTRegisterProtocol(&HTRlogin);
1.1       timbl     231: }
                    232: #endif
                    233: 
                    234: 
1.33      luotonen  235: 
                    236: /*                                                     override_proxy()
                    237: **
                    238: **     Check the no_proxy environment variable to get the list
                    239: **     of hosts for which proxy server is not consulted.
                    240: **
                    241: **     no_proxy is a comma- or space-separated list of machine
                    242: **     or domain names, with optional :port part.  If no :port
                    243: **     part is present, it applies to all ports on that domain.
                    244: **
                    245: **     Example:
                    246: **             no_proxy="cern.ch,some.domain:8001"
                    247: **
                    248: */
                    249: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
                    250: {
                    251:     CONST char * no_proxy = getenv("no_proxy");
                    252:     char * p = NULL;
                    253:     char * host = NULL;
                    254:     int port = 0;
                    255:     int h_len = 0;
                    256: 
                    257:     if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
                    258:        return NO;
                    259:     if (!*host) { free(host); return NO; }
                    260: 
1.34      frystyk   261:     if ((p = strchr(host, ':')) != NULL) {     /* Port specified */
1.33      luotonen  262:        *p++ = 0;                       /* Chop off port */
                    263:        port = atoi(p);
                    264:     }
                    265:     else {                             /* Use default port */
                    266:        char * access = HTParse(addr, "", PARSE_ACCESS);
                    267:        if (access) {
                    268:            if      (!strcmp(access,"http"))    port = 80;
                    269:            else if (!strcmp(access,"gopher"))  port = 70;
                    270:            else if (!strcmp(access,"ftp"))     port = 21;
                    271:            free(access);
                    272:        }
                    273:     }
                    274:     if (!port) port = 80;              /* Default */
                    275:     h_len = strlen(host);
                    276: 
                    277:     while (*no_proxy) {
                    278:        CONST char * end;
                    279:        CONST char * colon = NULL;
                    280:        int templ_port = 0;
                    281:        int t_len;
                    282: 
                    283:        while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
                    284:            no_proxy++;                 /* Skip whitespace and separators */
                    285: 
                    286:        end = no_proxy;
                    287:        while (*end && !WHITE(*end) && *end != ',') {   /* Find separator */
                    288:            if (*end==':') colon = end;                 /* Port number given */
                    289:            end++;
                    290:        }
                    291: 
                    292:        if (colon) {
                    293:            templ_port = atoi(colon+1);
                    294:            t_len = colon - no_proxy;
                    295:        }
                    296:        else {
                    297:            t_len = end - no_proxy;
                    298:        }
                    299: 
                    300:        if ((!templ_port || templ_port == port)  &&
                    301:            (t_len > 0  &&  t_len <= h_len  &&
                    302:             !strncmp(host + h_len - t_len, no_proxy, t_len))) {
                    303:            free(host);
                    304:            return YES;
                    305:        }
                    306:        if (*end) no_proxy = end+1;
                    307:        else break;
                    308:     }
                    309: 
                    310:     free(host);
                    311:     return NO;
                    312: }
                    313: 
                    314: 
                    315: 
1.2       timbl     316: /*             Find physical name and access protocol
                    317: **             --------------------------------------
1.1       timbl     318: **
                    319: **
                    320: ** On entry,
                    321: **     addr            must point to the fully qualified hypertext reference.
                    322: **     anchor          a pareent anchor with whose address is addr
                    323: **
                    324: ** On exit,
1.2       timbl     325: **     returns         HT_NO_ACCESS            Error has occured.
                    326: **                     HT_OK                   Success
1.1       timbl     327: **
                    328: */
1.21      luotonen  329: PRIVATE int get_physical ARGS1(HTRequest *, req)
                    330: {    
1.1       timbl     331:     char * access=0;   /* Name of access method */
1.21      luotonen  332:     char * addr = HTAnchor_address((HTAnchor*)req->anchor);    /* free me */
1.27      luotonen  333: 
1.35      luotonen  334:     /*
                    335:     ** This HACK is here until we have redirection implemented.
                    336:     ** This is used when we are recursively calling HTLoad().
                    337:     ** We then take the physical address, because currently the
                    338:     ** virtual address is kept in a hash table so it can't be
                    339:     ** changed -- otherwise it wouldn't be found anymore.
                    340:     */
1.36      luotonen  341:     if (HTAnchor_physical(req->anchor))
                    342:        StrAllocCopy(addr, HTAnchor_physical(req->anchor));
1.35      luotonen  343: 
1.2       timbl     344: #ifndef NO_RULES
1.27      luotonen  345:     if (HTImServer)    /* cern_httpd has already done its own translations */
1.43    ! luotonen  346:        HTAnchor_setPhysical(req->anchor, HTImServer);
        !           347: #ifdef OLD_CODE
1.27      luotonen  348:        HTAnchor_setPhysical(req->anchor, addr);
1.43    ! luotonen  349: #endif
1.21      luotonen  350:     else {
1.27      luotonen  351:        char * physical = HTTranslate(addr);
1.21      luotonen  352:        if (!physical) {
                    353:            free(addr);
                    354:            return HT_FORBIDDEN;
                    355:        }
                    356:        HTAnchor_setPhysical(req->anchor, physical);
                    357:        free(physical);                 /* free our copy */
1.2       timbl     358:     }
                    359: #else
1.21      luotonen  360:     HTAnchor_setPhysical(req->anchor, addr);
1.2       timbl     361: #endif
                    362: 
1.21      luotonen  363:     access =  HTParse(HTAnchor_physical(req->anchor),
1.27      luotonen  364:                      "file:", PARSE_ACCESS);
1.1       timbl     365: 
                    366: /*     Check whether gateway access has been set up for this
1.8       timbl     367: **
                    368: **     This function can be replaced by the rule system above.
1.1       timbl     369: */
1.8       timbl     370: #define USE_GATEWAYS
1.1       timbl     371: #ifdef USE_GATEWAYS
1.39      luotonen  372: 
                    373:     /* make sure the using_proxy variable is false */
                    374:     using_proxy = NO;
                    375: 
1.33      luotonen  376:     if (!override_proxy(addr)) {
1.27      luotonen  377:        char * gateway_parameter, *gateway, *proxy;
                    378: 
1.2       timbl     379:        gateway_parameter = (char *)malloc(strlen(access)+20);
                    380:        if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27      luotonen  381: 
                    382:        /* search for proxy gateways */
1.2       timbl     383:        strcpy(gateway_parameter, "WWW_");
                    384:        strcat(gateway_parameter, access);
                    385:        strcat(gateway_parameter, "_GATEWAY");
                    386:        gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27      luotonen  387: 
                    388:        /* search for proxy servers */
                    389:        strcpy(gateway_parameter, access);
                    390:        strcat(gateway_parameter, "_proxy");
                    391:        proxy = (char *)getenv(gateway_parameter);
                    392: 
1.2       timbl     393:        free(gateway_parameter);
1.27      luotonen  394: 
                    395:        if (TRACE && gateway)
                    396:            fprintf(stderr,"Gateway found: %s\n",gateway);
                    397:        if (TRACE && proxy)
                    398:            fprintf(stderr,"Proxy server found: %s\n",proxy);
                    399: 
1.8       timbl     400: #ifndef DIRECT_WAIS
1.9       timbl     401:        if (!gateway && 0==strcmp(access, "wais")) {
1.8       timbl     402:            gateway = DEFAULT_WAIS_GATEWAY;
                    403:        }
                    404: #endif
1.27      luotonen  405: 
                    406:        /* proxy servers have precedence over gateway servers */
                    407:        if (proxy) {
                    408:            char * gatewayed=0;
                    409: 
                    410:             StrAllocCopy(gatewayed,proxy);
                    411:            StrAllocCat(gatewayed,addr);
                    412:            using_proxy = YES;
                    413:            HTAnchor_setPhysical(req->anchor, gatewayed);
                    414:            free(gatewayed);
                    415:            free(access);
                    416: 
                    417:            access =  HTParse(HTAnchor_physical(req->anchor),
                    418:                              "http:", PARSE_ACCESS);
                    419:        } else if (gateway) {
1.9       timbl     420:            char * path = HTParse(addr, "",
                    421:                PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
                    422:                /* Chop leading / off to make host into part of path */
                    423:            char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
                    424:            free(path);
1.21      luotonen  425:             HTAnchor_setPhysical(req->anchor, gatewayed);
1.9       timbl     426:            free(gatewayed);
1.2       timbl     427:            free(access);
1.9       timbl     428:            
1.21      luotonen  429:            access =  HTParse(HTAnchor_physical(req->anchor),
1.8       timbl     430:                "http:", PARSE_ACCESS);
1.2       timbl     431:        }
                    432:     }
1.1       timbl     433: #endif
                    434: 
1.19      timbl     435:     free(addr);
1.1       timbl     436: 
                    437: 
                    438: /*     Search registered protocols to find suitable one
                    439: */
                    440:     {
1.20      luotonen  441:        HTList *cur;
                    442:        HTProtocol *p;
1.1       timbl     443: #ifndef NO_INIT
1.2       timbl     444:         if (!protocols) HTAccessInit();
1.1       timbl     445: #endif
1.20      luotonen  446:        cur = protocols;
                    447:        while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.2       timbl     448:            if (strcmp(p->name, access)==0) {
1.21      luotonen  449:                HTAnchor_setProtocol(req->anchor, p);
1.2       timbl     450:                free(access);
                    451:                return (HT_OK);
1.1       timbl     452:            }
                    453:        }
                    454:     }
                    455: 
                    456:     free(access);
1.2       timbl     457:     return HT_NO_ACCESS;
1.1       timbl     458: }
                    459: 
                    460: 
                    461: /*             Load a document
                    462: **             ---------------
                    463: **
1.2       timbl     464: **     This is an internal routine, which has an address AND a matching
                    465: **     anchor.  (The public routines are called with one OR the other.)
                    466: **
                    467: ** On entry,
1.15      timbl     468: **     request->
1.35      luotonen  469: **         anchor              a parent anchor with fully qualified
                    470: **                             hypertext reference as its address set
1.15      timbl     471: **         output_format       valid
                    472: **         output_stream       valid on NULL
1.2       timbl     473: **
                    474: ** On exit,
                    475: **     returns         <0              Error has occured.
                    476: **                     HT_LOADED       Success
                    477: **                     HT_NO_DATA      Success, but no document loaded.
1.8       timbl     478: **                                     (telnet sesssion started etc)
1.2       timbl     479: **
                    480: */
1.35      luotonen  481: PUBLIC int HTLoad ARGS1(HTRequest *, request)
1.2       timbl     482: {
1.25      frystyk   483:     char       *arg = NULL;
                    484:     HTProtocol *p;
                    485:     int        status;
                    486: 
1.22      luotonen  487:     if (request->method == METHOD_INVALID)
                    488:        request->method = METHOD_GET;
1.21      luotonen  489:     status = get_physical(request);
1.2       timbl     490:     if (status == HT_FORBIDDEN) {
1.21      luotonen  491:         return HTLoadError(request, 500,
                    492:                           "Access forbidden by rule");
1.2       timbl     493:     }
                    494:     if (status < 0) return status;     /* Can't resolve or forbidden */
1.25      frystyk   495: 
                    496:     if(!(arg = HTAnchor_physical(request->anchor)) || !*arg) 
                    497:        return (-1);
1.27      luotonen  498: 
1.15      timbl     499:     p = HTAnchor_protocol(request->anchor);
1.17      timbl     500:     return (*(p->load))(request);
1.2       timbl     501: }
                    502: 
                    503: 
                    504: /*             Get a save stream for a document
                    505: **             --------------------------------
                    506: */
1.19      timbl     507: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
1.15      timbl     508: {
                    509:     HTProtocol * p;
1.19      timbl     510:     int status;
1.22      luotonen  511:     request->method = METHOD_PUT;
1.21      luotonen  512:     status = get_physical(request);
1.19      timbl     513:     if (status == HT_FORBIDDEN) {
1.21      luotonen  514:         HTLoadError(request, 500,
                    515:                    "Access forbidden by rule");
1.19      timbl     516:        return NULL;    /* should return error status? */
                    517:     }
                    518:     if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
                    519:     
1.15      timbl     520:     p = HTAnchor_protocol(request->anchor);
1.2       timbl     521:     if (!p) return NULL;
                    522:     
1.15      timbl     523:     return (*p->saveStream)(request);
1.2       timbl     524:     
                    525: }
                    526: 
                    527: 
                    528: /*             Load a document - with logging etc
                    529: **             ----------------------------------
                    530: **
                    531: **     - Checks or documents already loaded
                    532: **     - Logs the access
                    533: **     - Allows stdin filter option
                    534: **     - Trace ouput and error messages
                    535: **
1.1       timbl     536: **    On Entry,
1.19      timbl     537: **        request->anchor      valid for of the document to be accessed.
                    538: **      request->childAnchor   optional anchor within doc to be selected
                    539: **
1.2       timbl     540: **        filter            if YES, treat stdin as HTML
1.1       timbl     541: **
1.15      timbl     542: **       request->anchor   is the node_anchor for the document
                    543: **       request->output_format is valid
                    544: **
1.1       timbl     545: **    On Exit,
                    546: **        returns    YES     Success in opening document
                    547: **                   NO      Failure 
                    548: **
                    549: */
                    550: 
1.19      timbl     551: PRIVATE BOOL HTLoadDocument ARGS1(HTRequest *,         request)
1.1       timbl     552: 
                    553: {
                    554:     int                status;
                    555:     HText *    text;
1.19      timbl     556:     char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
                    557:     
1.1       timbl     558:     if (TRACE) fprintf (stderr,
                    559:       "HTAccess: loading document %s\n", full_address);
                    560: 
1.18      timbl     561:     request->using_cache = NULL;
                    562:     
1.15      timbl     563:     if (!request->output_format) request->output_format = WWW_PRESENT;
1.25      frystyk   564: 
1.31      frystyk   565:     if (!HTForceReload && (text=(HText *)HTAnchor_document(request->anchor)))
1.15      timbl     566:     {  /* Already loaded */
1.1       timbl     567:         if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
1.19      timbl     568:        if (request->childAnchor) {
                    569:            HText_selectAnchor(text, request->childAnchor);
                    570:        } else {
                    571:            HText_select(text); 
                    572:        }
                    573:        free(full_address);
1.1       timbl     574:        return YES;
                    575:     }
1.17      timbl     576:     
1.34      frystyk   577:     /* Check the Cache */
                    578:     /* Caching is ONLY done if (char*) HTCacheDir is set. Henrik 09/03-94 */
1.17      timbl     579:     /* Bug: for each format, we only check whether it is ok, we
                    580:        don't check them all and chose the best */
1.38      timbl     581:     if (/* HTCacheDir && */ request->anchor->cacheItems) {
1.17      timbl     582:         HTList * list = request->anchor->cacheItems;
1.20      luotonen  583:        HTList * cur = list;
                    584:        HTCacheItem * item;
                    585: 
                    586:        while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
1.18      timbl     587:            HTStream * s;
                    588:            
                    589:            request->using_cache = item;
                    590:            
1.37      luotonen  591:            s = HTStreamStack(item->format, request, NO);
1.17      timbl     592:            if (s) {            /* format was suitable */
                    593:                FILE * fp = fopen(item->filename, "r");
1.18      timbl     594:                if (TRACE) fprintf(stderr, "Cache: HIT file %s for %s\n",
1.20      luotonen  595:                                   item->filename, 
                    596:                                   full_address);
1.17      timbl     597:                if (fp) {
                    598:                    HTFileCopy(fp, s);
1.24      timbl     599:                    (*s->isa->free)(s); /* close up pipeline */
1.17      timbl     600:                    fclose(fp);
1.19      timbl     601:                    free(full_address);
1.17      timbl     602:                    return YES;
                    603:                } else {
                    604:                    fprintf(stderr, "***** Can't read cache file %s !\n",
1.20      luotonen  605:                            item->filename);
1.17      timbl     606:                } /* file open ok */
                    607:            } /* stream ok */
                    608:        } /* next cache item */
                    609:     } /* if cache available for this anchor */
1.1       timbl     610:     
1.35      luotonen  611:     status = HTLoad(request);
1.2       timbl     612: 
                    613:     
1.1       timbl     614: /*     Log the access if necessary
                    615: */
1.42      frystyk   616:     if (HTlogfile) {
1.1       timbl     617:        time_t theTime;
                    618:        time(&theTime);
1.42      frystyk   619:        fprintf(HTlogfile, "%24.24s %s %s %s\n",
1.1       timbl     620:            ctime(&theTime),
                    621:            HTClientHost ? HTClientHost : "local",
                    622:            status<0 ? "FAIL" : "GET",
                    623:            full_address);
1.42      frystyk   624:        fflush(HTlogfile);      /* Actually update it on disk */
1.1       timbl     625:        if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
                    626:            ctime(&theTime),
                    627:            HTClientHost ? HTClientHost : "local",
                    628:            status<0 ? "FAIL" : "GET",
                    629:            full_address);
                    630:     }
                    631: 
                    632:     if (status == HT_LOADED) {
                    633:        if (TRACE) {
                    634:            fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
                    635:            full_address);
                    636:        }
1.19      timbl     637:        free(full_address);
1.1       timbl     638:        return YES;
                    639:     }
                    640:     
                    641:     if (status == HT_NO_DATA) {
                    642:        if (TRACE) {
                    643:            fprintf(stderr, 
                    644:            "HTAccess: `%s' has been accessed, No data left.\n",
                    645:            full_address);
                    646:        }
1.19      timbl     647:        free(full_address);
1.1       timbl     648:        return NO;
                    649:     }
                    650:     
1.34      frystyk   651:     /* Bug fix thanks to Lou Montulli. Henrik 10/03-94 */
                    652:     if (status<=0) {                 /* Failure in accessing a document */
1.1       timbl     653: #ifdef CURSES
                    654:         user_message("Can't access `%s'", full_address);
                    655: #else
1.5       timbl     656:        if (TRACE) fprintf(stderr, 
                    657:                "HTAccess: Can't access `%s'\n", full_address);
1.1       timbl     658: #endif
1.32      frystyk   659:        /* This is done in the specific load procedures... Henrik 07/03-94 */
1.39      luotonen  660:        if (request->error_stack)
                    661:            HTLoadError(request, 500, "Unable to access document.");
1.19      timbl     662:        free(full_address);
1.1       timbl     663:        return NO;
                    664:     }
1.9       timbl     665:  
                    666:     /* If you get this, then please find which routine is returning
                    667:        a positive unrecognised error code! */
                    668:  
1.1       timbl     669:     fprintf(stderr,
1.2       timbl     670:     "**** HTAccess: socket or file number returned by obsolete load routine!\n");
1.9       timbl     671:     fprintf(stderr,
1.19      timbl     672:     "**** HTAccess: Internal software error. Please mail www-bug@info.cern.ch quoting the version number of this software and the URL: %s!\n",
                    673:        full_address);
                    674:     free(full_address);
                    675:    
1.1       timbl     676:     exit(-6996);
1.20      luotonen  677:     return NO; /* For gcc :-( */
1.2       timbl     678: } /* HTLoadDocument */
1.1       timbl     679: 
                    680: 
                    681: 
                    682: /*             Load a document from absolute name
                    683: **             ---------------
                    684: **
                    685: **    On Entry,
                    686: **        addr     The absolute address of the document to be accessed.
                    687: **        filter   if YES, treat document as HTML
                    688: **
                    689: **    On Exit,
                    690: **        returns    YES     Success in opening document
                    691: **                   NO      Failure 
                    692: **
                    693: **
                    694: */
                    695: 
1.15      timbl     696: PUBLIC BOOL HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2       timbl     697: {
1.19      timbl     698:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    699:    request->anchor = HTAnchor_parent(anchor);
                    700:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
                    701:                        NULL : (HTChildAnchor*) anchor;
                    702:    return HTLoadDocument(request);
1.2       timbl     703: }
                    704: 
                    705: 
                    706: /*             Load a document from absolute name to stream
                    707: **             --------------------------------------------
                    708: **
                    709: **    On Entry,
                    710: **        addr     The absolute address of the document to be accessed.
1.15      timbl     711: **        request->output_stream     if non-NULL, send data down this stream
1.2       timbl     712: **
                    713: **    On Exit,
                    714: **        returns    YES     Success in opening document
                    715: **                   NO      Failure 
                    716: **
                    717: **
                    718: */
                    719: 
                    720: PUBLIC BOOL HTLoadToStream ARGS3(
                    721:                CONST char *,   addr,
                    722:                BOOL,           filter,
1.15      timbl     723:                HTRequest*,     request)
1.1       timbl     724: {
1.19      timbl     725:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    726:    request->anchor = HTAnchor_parent(anchor);
                    727:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
                    728:        (HTChildAnchor*) anchor;
1.15      timbl     729:     request->output_stream = request->output_stream;
1.19      timbl     730:     return HTLoadDocument(request);
1.1       timbl     731: }
                    732: 
                    733: 
1.2       timbl     734: 
                    735: 
1.1       timbl     736: /*             Load a document from relative name
                    737: **             ---------------
                    738: **
                    739: **    On Entry,
1.2       timbl     740: **        relative_name     The relative address of the document
                    741: **                         to be accessed.
1.1       timbl     742: **
                    743: **    On Exit,
                    744: **        returns    YES     Success in opening document
                    745: **                   NO      Failure 
                    746: **
                    747: **
                    748: */
                    749: 
1.15      timbl     750: PUBLIC BOOL HTLoadRelative ARGS3(
1.2       timbl     751:                CONST char *,           relative_name,
1.15      timbl     752:                HTParentAnchor *,       here,
1.20      luotonen  753:                HTRequest *,            request)
1.1       timbl     754: {
                    755:     char *             full_address = 0;
                    756:     BOOL                       result;
                    757:     char *             mycopy = 0;
                    758:     char *             stripped = 0;
                    759:     char *             current_address =
1.2       timbl     760:                                HTAnchor_address((HTAnchor*)here);
1.1       timbl     761: 
                    762:     StrAllocCopy(mycopy, relative_name);
                    763: 
                    764:     stripped = HTStrip(mycopy);
                    765:     full_address = HTParse(stripped,
                    766:                   current_address,
                    767:                   PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15      timbl     768:     result = HTLoadAbsolute(full_address, request);
1.1       timbl     769:     free(full_address);
                    770:     free(current_address);
                    771:     free(mycopy);  /* Memory leak fixed 10/7/92 -- JFG */
                    772:     return result;
                    773: }
                    774: 
                    775: 
                    776: /*             Load if necessary, and select an anchor
                    777: **             --------------------------------------
                    778: **
                    779: **    On Entry,
                    780: **        destination              The child or parenet anchor to be loaded.
                    781: **
                    782: **    On Exit,
                    783: **        returns    YES     Success
                    784: **                   NO      Failure 
                    785: **
                    786: */
                    787: 
1.15      timbl     788: PUBLIC BOOL HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1       timbl     789: {
1.15      timbl     790:     if (!anchor) return NO;    /* No link */
1.1       timbl     791:     
1.15      timbl     792:     request->anchor  = HTAnchor_parent(anchor);
1.19      timbl     793:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                    794:                                        : (HTChildAnchor*) anchor;
1.1       timbl     795:     
1.19      timbl     796:     return HTLoadDocument(request) ? YES : NO;
1.1       timbl     797:        
                    798: } /* HTLoadAnchor */
                    799: 
                    800: 
                    801: /*             Search
                    802: **             ------
                    803: **  Performs a keyword search on word given by the user. Adds the keyword to 
                    804: **  the end of the current address and attempts to open the new address.
                    805: **
                    806: **  On Entry,
                    807: **       *keywords     space-separated keyword list or similar search list
1.2       timbl     808: **     here            is anchor search is to be done on.
1.1       timbl     809: */
                    810: 
1.2       timbl     811: PRIVATE char hex(i)
                    812:     int i;
                    813: {
1.13      timbl     814:     char * hexchars = "0123456789ABCDEF";
                    815:     return hexchars[i];
1.2       timbl     816: }
1.1       timbl     817: 
1.15      timbl     818: PUBLIC BOOL HTSearch ARGS3(
1.2       timbl     819:        CONST char *,           keywords,
1.15      timbl     820:        HTParentAnchor *,       here,
                    821:        HTRequest *,            request)
1.1       timbl     822: {
1.2       timbl     823: 
                    824: #define acceptable \
                    825: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
                    826: 
                    827:     char *q, *u;
                    828:     CONST char * p, *s, *e;            /* Pointers into keywords */
                    829:     char * address = HTAnchor_address((HTAnchor*)here);
1.1       timbl     830:     BOOL result;
1.2       timbl     831:     char * escaped = malloc(strlen(keywords)*3+1);
                    832: 
1.29      frystyk   833:     /* static CONST BOOL isAcceptable[96] = */
                    834:     /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30      luotonen  835:     static BOOL isAcceptable[96] =
1.2       timbl     836:     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
                    837:     {    0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,      /* 2x   !"#$%&'()*+,-./  */
                    838:          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,      /* 3x  0123456789:;<=>?  */
                    839:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 4x  @ABCDEFGHIJKLMNO  */
                    840:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /* 5X  PQRSTUVWXYZ[\]^_  */
                    841:         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 6x  `abcdefghijklmno  */
                    842:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };     /* 7X  pqrstuvwxyz{\}~  DEL */
                    843: 
                    844:     if (escaped == NULL) outofmem(__FILE__, "HTSearch");
                    845:     
1.29      frystyk   846: /* Convert spaces to + and hex escape unacceptable characters */
1.2       timbl     847: 
1.29      frystyk   848:     for(s=keywords; *s && WHITE(*s); s++); /*scan */    /* Skip white space */
                    849:     for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--);     /* Skip trailers */
                    850:     for(q=escaped, p=s; p<e; p++) {                  /* scan stripped field */
1.2       timbl     851:         int c = (int)TOASCII(*p);
                    852:         if (WHITE(*p)) {
                    853:            *q++ = '+';
1.29      frystyk   854:        } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13      timbl     855:            *q++ = *p;                  /* 930706 TBL for MVS bug */
1.2       timbl     856:        } else {
                    857:            *q++ = '%';
                    858:            *q++ = hex(c / 16);
                    859:            *q++ = hex(c % 16);
                    860:        }
                    861:     } /* Loop over string */
1.1       timbl     862:     
1.2       timbl     863:     *q=0;
                    864:                                /* terminate escaped sctring */
                    865:     u=strchr(address, '?');            /* Find old search string */
                    866:     if (u) *u = 0;                             /* Chop old search off */
1.1       timbl     867: 
                    868:     StrAllocCat(address, "?");
1.2       timbl     869:     StrAllocCat(address, escaped);
                    870:     free(escaped);
1.15      timbl     871:     result = HTLoadRelative(address, here, request);
1.1       timbl     872:     free(address);
1.2       timbl     873:     
1.1       timbl     874:     return result;
1.2       timbl     875: }
                    876: 
                    877: 
                    878: /*             Search Given Indexname
                    879: **             ------
                    880: **  Performs a keyword search on word given by the user. Adds the keyword to 
                    881: **  the end of the current address and attempts to open the new address.
                    882: **
                    883: **  On Entry,
                    884: **       *keywords     space-separated keyword list or similar search list
                    885: **     *addres         is name of object search is to be done on.
                    886: */
                    887: 
1.15      timbl     888: PUBLIC BOOL HTSearchAbsolute ARGS3(
1.2       timbl     889:        CONST char *,   keywords,
1.15      timbl     890:        CONST char *,   indexname,
                    891:        HTRequest *,    request)
1.2       timbl     892: {
                    893:     HTParentAnchor * anchor =
                    894:        (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15      timbl     895:     return HTSearch(keywords, anchor, request);
1.2       timbl     896: }
                    897: 
                    898: 
                    899: /*             Generate the anchor for the home page
                    900: **             -------------------------------------
                    901: **
                    902: **     As it involves file access, this should only be done once
                    903: **     when the program first runs.
1.10      timbl     904: **     This is a default algorithm -- browser don't HAVE to use this.
                    905: **     But consistency betwen browsers is STRONGLY recommended!
1.2       timbl     906: **
1.10      timbl     907: **     Priority order is:
                    908: **
                    909: **             1       WWW_HOME environment variable (logical name, etc)
                    910: **             2       ~/WWW/default.html
                    911: **             3       /usr/local/bin/default.html
                    912: **             4       http://info.cern.ch/default.html
                    913: **
1.2       timbl     914: */
                    915: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
                    916: {
1.12      timbl     917:     char * my_home_document = NULL;
                    918:     char * home = (char *)getenv(LOGICAL_DEFAULT);
1.2       timbl     919:     char * ref;
                    920:     HTParentAnchor * anchor;
1.1       timbl     921:     
1.12      timbl     922:     if (home) {
                    923:         StrAllocCopy(my_home_document, home);
                    924:     
                    925: /*     Someone telnets in, they get a special home.
                    926: */
                    927: #define MAX_FILE_NAME 1024                                     /* @@@ */
                    928:     } else  if (HTClientHost) {                        /* Telnet server */
                    929:        FILE * fp = fopen(REMOTE_POINTER, "r");
                    930:        char * status;
                    931:        if (fp) {
                    932:            my_home_document = (char*) malloc(MAX_FILE_NAME);
                    933:            status = fgets(my_home_document, MAX_FILE_NAME, fp);
                    934:            if (!status) {
                    935:                free(my_home_document);
                    936:                my_home_document = NULL;
                    937:            }
                    938:            fclose(fp);
                    939:        }
                    940:        if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
                    941:     }
                    942: 
                    943:     
                    944: 
1.2       timbl     945: #ifdef unix
1.12      timbl     946: 
1.10      timbl     947:     if (!my_home_document) {
                    948:        FILE * fp = NULL;
                    949:        CONST char * home =  (CONST char*)getenv("HOME");
                    950:        if (home) { 
                    951:            my_home_document = (char *)malloc(
                    952:                strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
                    953:            if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
                    954:            sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
                    955:            fp = fopen(my_home_document, "r");
                    956:        }
                    957:        
                    958:        if (!fp) {
                    959:            StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
                    960:            fp = fopen(my_home_document, "r");
                    961:        }
1.2       timbl     962:        if (fp) {
                    963:            fclose(fp);
                    964:        } else {
                    965:        if (TRACE) fprintf(stderr,
1.10      timbl     966:            "HTBrowse: No local home document ~/%s or %s\n",
                    967:            PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11      timbl     968:            free(my_home_document);
                    969:            my_home_document = NULL;
1.2       timbl     970:        }
                    971:     }
                    972: #endif
1.10      timbl     973:     ref = HTParse( my_home_document ?  my_home_document :
                    974:                                HTClientHost ? REMOTE_ADDRESS
                    975:                                : LAST_RESORT,
                    976:                    "file:",
1.2       timbl     977:                    PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10      timbl     978:     if (my_home_document) {
1.2       timbl     979:        if (TRACE) fprintf(stderr,
                    980:            "HTAccess: Using custom home page %s i.e. address %s\n",
1.10      timbl     981:            my_home_document, ref);
                    982:        free(my_home_document);
1.2       timbl     983:     }
                    984:     anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
                    985:     free(ref);
                    986:     return anchor;
1.1       timbl     987: }
1.26      frystyk   988: 
                    989: 
                    990: /*             Bind an Anchor to the request structure
                    991: **             ---------------------------------------
                    992: **
                    993: **    On Entry,
                    994: **     anchor          The child or parenet anchor to be binded
                    995: **     request         The request sturcture
                    996: **    On Exit,
                    997: **        returns    YES     Success
                    998: **                   NO      Failure 
                    999: **
                   1000: **  Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
                   1001: **                                             Henrik Frystyk 17/02-94
                   1002: */
                   1003: 
                   1004: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
                   1005: {
                   1006:     if (!anchor) return NO;    /* No link */
                   1007:     
                   1008:     request->anchor  = HTAnchor_parent(anchor);
                   1009:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                   1010:                                        : (HTChildAnchor*) anchor;
                   1011:        
1.29      frystyk  1012:     return YES;
1.26      frystyk  1013: } /* HTBindAnchor */
1.39      luotonen 1014: 
                   1015: 
                   1016: 
                   1017: /*
                   1018:  *     Error diagnostics
                   1019:  */
                   1020: PUBLIC void HTAddError ARGS2(HTRequest *,      req,
1.40      luotonen 1021:                             CONST char *,      msg)
1.39      luotonen 1022: {
                   1023:     HTAddError2(req,msg,NULL);
                   1024: }
                   1025: 
                   1026: PUBLIC void HTAddError2 ARGS3(HTRequest *,     req,
1.40      luotonen 1027:                              CONST char *,     msg,
                   1028:                              CONST char *,     param)
1.39      luotonen 1029: {
                   1030:     int mlen = msg ? strlen(msg) : 0;
                   1031:     int plen = param ? strlen(param) : 0;
                   1032:     char * str;
                   1033: 
                   1034:     if (!req) return;
                   1035:     if (!req->error_stack) req->error_stack = HTList_new();
                   1036: 
                   1037:     str = (char*)malloc(mlen + plen + 2);
                   1038:     if (!str) outofmem(__FILE__,"HTAddError2");
                   1039: 
                   1040:     if (msg) strcpy(str,msg);
                   1041:     strcpy(str+mlen," ");
                   1042:     if (param) strcpy(str+mlen+1,param);
                   1043: 
                   1044:     HTList_addObject(req->error_stack, (void*)str);
                   1045:     CTRACE(stderr, "libwww error: %s\n", str);
                   1046: }
                   1047: 
                   1048: PUBLIC void HTAddErrorN ARGS3(HTRequest *,     req,
1.40      luotonen 1049:                              CONST char *,     msg,
1.39      luotonen 1050:                              int,              num)
                   1051: {
                   1052:     char buf[20];
                   1053:     sprintf(buf,"%d",num);
                   1054:     HTAddError2(req,msg,buf);
                   1055: }
                   1056: 
                   1057: PUBLIC void HTClearErrors ARGS1(HTRequest *,   req)
                   1058: {
                   1059:     if (req && req->error_stack) {
                   1060:        HTList * cur = req->error_stack;
                   1061:        char * str;
                   1062:        while ((str = (char*)HTList_nextObject(cur)))
                   1063:            free(str);
                   1064:        HTList_delete(req->error_stack);
                   1065:        req->error_stack = NULL;
                   1066:     }
                   1067: }
1.26      frystyk  1068: 

Webmaster