Annotation of libwww/Library/src/HTAccess.c, revision 1.51

1.1       timbl       1: /*             Access Manager                                  HTAccess.c
                      2: **             ==============
                      3: **
                      4: ** Authors
                      5: **     TBL     Tim Berners-Lee timbl@info.cern.ch
1.4       timbl       6: **     JFG     Jean-Francois Groff jfg@dxcern.cern.ch
1.1       timbl       7: **     DD      Denis DeLaRoca (310) 825-4580  <CSP1DWD@mvs.oac.ucla.edu>
                      8: ** History
                      9: **       8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
                     10: **     26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42      frystyk    11: **      6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1       timbl      12: **     17 Dec 92 Tn3270 added, bug fix. DD
1.2       timbl      13: **      4 Feb 93 Access registration, Search escapes bad chars TBL
1.9       timbl      14: **               PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
                     15: **     28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19      timbl      16: **        Dec 93 Bug change around, more reentrant, etc
1.42      frystyk    17: **     09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.2       timbl      18: ** Bugs
                     19: **     This module assumes that that the graphic object is hypertext, as it
1.9       timbl      20: **     needs to select it when it has been loaded.  A superclass needs to be
1.2       timbl      21: **     defined which accepts select and select_anchor.
1.1       timbl      22: */
                     23: 
1.9       timbl      24: #ifndef DEFAULT_WAIS_GATEWAY
1.8       timbl      25: #define DEFAULT_WAIS_GATEWAY "http://info.cern.ch:8001/"
1.9       timbl      26: #endif
1.8       timbl      27: 
1.1       timbl      28: /* Implements:
                     29: */
                     30: #include "HTAccess.h"
                     31: 
                     32: /* Uses:
                     33: */
                     34: 
                     35: #include "HTParse.h"
                     36: #include "HTUtils.h"
1.4       timbl      37: #include "HTML.h"              /* SCW */
1.2       timbl      38: 
                     39: #ifndef NO_RULES
                     40: #include "HTRules.h"
                     41: #endif
                     42: 
1.1       timbl      43: #include <stdio.h>
                     44: 
1.2       timbl      45: #include "HTList.h"
                     46: #include "HText.h"     /* See bugs above */
                     47: #include "HTAlert.h"
1.17      timbl      48: #include "HTFWriter.h" /* for cache stuff */
                     49: #include "HTTee.h"
1.46      frystyk    50: #include "HTError.h"
1.2       timbl      51: 
1.1       timbl      52: /*     These flags may be set to modify the operation of this module
                     53: */
1.34      frystyk    54: PUBLIC char * HTCacheDir = 0;  /* Root for cached files or 0 for no cache */
                     55: PUBLIC char * HTSaveLocallyDir = SAVE_LOCALLY_HOME_DIR;        /* Save & exe files */
1.1       timbl      56: PUBLIC char * HTClientHost = 0;        /* Name of remote login host if any */
1.42      frystyk    57: PUBLIC FILE * HTlogfile = 0;   /* File to which to output one-liners */
1.41      luotonen   58: 
1.34      frystyk    59: PUBLIC BOOL HTForceReload = NO;        /* Force reload from cache or net */
1.12      timbl      60: PUBLIC BOOL HTSecure = NO;     /* Disable access for telnet users? */
1.27      luotonen   61: PUBLIC BOOL using_proxy = NO;  /* are we using a proxy gateway? */
1.43      luotonen   62: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.27      luotonen   63: PUBLIC BOOL HTImProxy = NO;    /* cern_httpd as a proxy? */
1.1       timbl      64: 
1.43      luotonen   65: 
1.2       timbl      66: /*     To generate other things, play with these:
                     67: */
                     68: 
1.15      timbl      69: /* PUBLIC HTFormat HTOutputFormat = NULL;      use request->output_format */
                     70: /* PUBLIC HTStream* HTOutputStream = NULL;     use request->output_stream */ 
1.1       timbl      71: 
                     72: PRIVATE HTList * protocols = NULL;   /* List of registered protocol descriptors */
                     73: 
1.24      timbl      74: /*     Superclass defn */
1.1       timbl      75: 
1.24      timbl      76: struct _HTStream {
                     77:        HTStreamClass * isa;
                     78:        /* ... */
                     79: };
                     80: 
1.15      timbl      81: /*     Create  a request structure
                     82: **     ---------------------------
                     83: */
                     84: 
                     85: PUBLIC HTRequest * HTRequest_new NOARGS
                     86: {
1.28      luotonen   87:     HTRequest * me = (HTRequest*) calloc(1, sizeof(*me));  /* zero fill */
1.15      timbl      88:     if (!me) outofmem(__FILE__, "HTRequest_new()");
                     89:     
1.20      luotonen   90:     me->conversions    = HTList_new(); /* No conversions registerd yet */
                     91:     me->output_format  = WWW_PRESENT;  /* default it to present to user */
                     92: 
1.15      timbl      93:     return me;
                     94: }
                     95: 
                     96: 
1.49      frystyk    97: /*     Clear  a request structure
                     98: **     ---------------------------
                     99: **     This function clears the reguest structure so that only the
                    100: **     conversions remain. Everything else is as if it was created from
                    101: **     scratch.
                    102: */
                    103: PUBLIC void HTRequest_clear ARGS1(HTRequest *, req)
                    104: {
                    105:     HTList *conversions;
                    106:     if (!req) {
                    107:        if (TRACE)
                    108:            fprintf(stderr, "Clear....... request: Bad argument!\n");
                    109:        return;
                    110:     }
                    111:     conversions = req->conversions;                 /* Save the conversions */
                    112:     HTErrorFree(req);
                    113:     HTAACleanup(req);
                    114:     FREE(req->from);
                    115:     memset(req, '\0', sizeof(HTRequest));
                    116: 
                    117:     /* Now initialize as from scratch but with the old list of conversions */
                    118:     req->conversions = conversions;
                    119:     req->output_format = WWW_PRESENT;      /* default it to present to user */
                    120: }
                    121: 
                    122: 
1.20      luotonen  123: /*     Delete a request structure
                    124: **     --------------------------
                    125: */
                    126: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
                    127: {
                    128:     if (req) {
1.34      frystyk   129:        HTFormatDelete(req->conversions);
1.46      frystyk   130:        HTErrorFree(req);
1.34      frystyk   131:        HTAACleanup(req);
1.37      luotonen  132:        FREE(req->from);
1.34      frystyk   133:        FREE(req);
1.20      luotonen  134:     }
                    135: }
                    136: 
                    137: 
1.22      luotonen  138: PRIVATE char * method_names[(int)MAX_METHODS + 1] =
                    139: {
                    140:     "INVALID-METHOD",
                    141:     "GET",
                    142:     "HEAD",
                    143:     "POST",
                    144:     "PUT",
                    145:     "DELETE",
                    146:     "CHECKOUT",
                    147:     "CHECKIN",
                    148:     "SHOWMETHOD",
                    149:     "LINK",
                    150:     "UNLINK",
                    151:     NULL
                    152: };
                    153: 
                    154: /*     Get method enum value
                    155: **     ---------------------
                    156: */
                    157: PUBLIC HTMethod HTMethod_enum ARGS1(char *, name)
                    158: {
                    159:     if (name) {
                    160:        int i;
                    161:        for (i=1; i < (int)MAX_METHODS; i++)
                    162:            if (!strcmp(name, method_names[i]))
                    163:                return (HTMethod)i;
                    164:     }
                    165:     return METHOD_INVALID;
                    166: }
                    167: 
                    168: 
                    169: /*     Get method name
                    170: **     ---------------
                    171: */
                    172: PUBLIC char * HTMethod_name ARGS1(HTMethod, method)
                    173: {
                    174:     if ((int)method > (int)METHOD_INVALID  && 
                    175:        (int)method < (int)MAX_METHODS)
                    176:        return method_names[(int)method];
                    177:     else
                    178:        return method_names[(int)METHOD_INVALID];
                    179: }
                    180: 
                    181: 
                    182: /*     Is method in a list of method names?
                    183: **     -----------------------------------
                    184: */
                    185: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod,    method,
                    186:                                  HTList *,     list)
                    187: {
                    188:     char * method_name = HTMethod_name(method);
                    189:     HTList *cur = list;
                    190:     char *item;
                    191: 
                    192:     while (NULL != (item = (char*)HTList_nextObject(cur))) {
                    193:        CTRACE(stderr, " %s", item);
                    194:        if (0==strcasecomp(item, method_name))
                    195:            return YES;
                    196:     }
                    197:     return NO; /* Not found */
                    198: }
                    199: 
                    200: 
                    201: 
                    202: 
1.20      luotonen  203: 
1.1       timbl     204: /*     Register a Protocol                             HTRegisterProtocol
                    205: **     -------------------
                    206: */
                    207: 
                    208: PUBLIC BOOL HTRegisterProtocol(protocol)
                    209:        HTProtocol * protocol;
                    210: {
                    211:     if (!protocols) protocols = HTList_new();
                    212:     HTList_addObject(protocols, protocol);
                    213:     return YES;
                    214: }
                    215: 
                    216: 
                    217: /*     Register all known protocols
                    218: **     ----------------------------
                    219: **
                    220: **     Add to or subtract from this list if you add or remove protocol modules.
                    221: **     This routine is called the first time the protocol list is needed,
                    222: **     unless any protocols are already registered, in which case it is not called.
                    223: **     Therefore the application can override this list.
                    224: **
                    225: **     Compiling with NO_INIT prevents all known protocols from being forced
                    226: **     in at link time.
                    227: */
                    228: #ifndef NO_INIT
                    229: PRIVATE void HTAccessInit NOARGS                       /* Call me once */
                    230: {
1.14      duns      231: GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1       timbl     232: #ifndef DECNET
1.14      duns      233: GLOBALREF  HTProtocol HTFTP, HTNews, HTGopher;
1.42      frystyk   234: 
                    235: /* This is the replacement when HTWhoIs gets a complete protocol module */
                    236: /* GLOBALREF  HTProtocol HTFTP, HTNews, HTGopher, HTWhoIs;             */
                    237: /* -------------------------------------------------------------------- */
                    238: 
1.3       timbl     239: #ifdef DIRECT_WAIS
1.14      duns      240: GLOBALREF  HTProtocol HTWAIS;
1.3       timbl     241: #endif
1.2       timbl     242:     HTRegisterProtocol(&HTFTP);
                    243:     HTRegisterProtocol(&HTNews);
                    244:     HTRegisterProtocol(&HTGopher);
1.42      frystyk   245: 
                    246: /* This should be added when HTWhoIs gets a complete protocol module */
                    247: /*  HTRegisterProtocol(&HTWhoIs);                                   */
                    248: /* ----------------------------------------------------------------- */
1.3       timbl     249: #ifdef DIRECT_WAIS
                    250:     HTRegisterProtocol(&HTWAIS);
                    251: #endif
1.1       timbl     252: #endif
                    253: 
1.2       timbl     254:     HTRegisterProtocol(&HTTP);
                    255:     HTRegisterProtocol(&HTFile);
                    256:     HTRegisterProtocol(&HTTelnet);
                    257:     HTRegisterProtocol(&HTTn3270);
                    258:     HTRegisterProtocol(&HTRlogin);
1.1       timbl     259: }
                    260: #endif
                    261: 
                    262: 
1.33      luotonen  263: 
                    264: /*                                                     override_proxy()
                    265: **
                    266: **     Check the no_proxy environment variable to get the list
                    267: **     of hosts for which proxy server is not consulted.
                    268: **
                    269: **     no_proxy is a comma- or space-separated list of machine
                    270: **     or domain names, with optional :port part.  If no :port
                    271: **     part is present, it applies to all ports on that domain.
                    272: **
                    273: **     Example:
                    274: **             no_proxy="cern.ch,some.domain:8001"
                    275: **
                    276: */
                    277: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
                    278: {
                    279:     CONST char * no_proxy = getenv("no_proxy");
                    280:     char * p = NULL;
                    281:     char * host = NULL;
                    282:     int port = 0;
                    283:     int h_len = 0;
                    284: 
                    285:     if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
                    286:        return NO;
                    287:     if (!*host) { free(host); return NO; }
                    288: 
1.34      frystyk   289:     if ((p = strchr(host, ':')) != NULL) {     /* Port specified */
1.33      luotonen  290:        *p++ = 0;                       /* Chop off port */
                    291:        port = atoi(p);
                    292:     }
                    293:     else {                             /* Use default port */
                    294:        char * access = HTParse(addr, "", PARSE_ACCESS);
                    295:        if (access) {
                    296:            if      (!strcmp(access,"http"))    port = 80;
                    297:            else if (!strcmp(access,"gopher"))  port = 70;
                    298:            else if (!strcmp(access,"ftp"))     port = 21;
                    299:            free(access);
                    300:        }
                    301:     }
                    302:     if (!port) port = 80;              /* Default */
                    303:     h_len = strlen(host);
                    304: 
                    305:     while (*no_proxy) {
                    306:        CONST char * end;
                    307:        CONST char * colon = NULL;
                    308:        int templ_port = 0;
                    309:        int t_len;
                    310: 
                    311:        while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
                    312:            no_proxy++;                 /* Skip whitespace and separators */
                    313: 
                    314:        end = no_proxy;
                    315:        while (*end && !WHITE(*end) && *end != ',') {   /* Find separator */
                    316:            if (*end==':') colon = end;                 /* Port number given */
                    317:            end++;
                    318:        }
                    319: 
                    320:        if (colon) {
                    321:            templ_port = atoi(colon+1);
                    322:            t_len = colon - no_proxy;
                    323:        }
                    324:        else {
                    325:            t_len = end - no_proxy;
                    326:        }
                    327: 
                    328:        if ((!templ_port || templ_port == port)  &&
                    329:            (t_len > 0  &&  t_len <= h_len  &&
                    330:             !strncmp(host + h_len - t_len, no_proxy, t_len))) {
                    331:            free(host);
                    332:            return YES;
                    333:        }
                    334:        if (*end) no_proxy = end+1;
                    335:        else break;
                    336:     }
                    337: 
                    338:     free(host);
                    339:     return NO;
                    340: }
                    341: 
                    342: 
                    343: 
1.2       timbl     344: /*             Find physical name and access protocol
                    345: **             --------------------------------------
1.1       timbl     346: **
                    347: **
                    348: ** On entry,
                    349: **     addr            must point to the fully qualified hypertext reference.
                    350: **     anchor          a pareent anchor with whose address is addr
                    351: **
                    352: ** On exit,
1.2       timbl     353: **     returns         HT_NO_ACCESS            Error has occured.
                    354: **                     HT_OK                   Success
1.1       timbl     355: **
                    356: */
1.21      luotonen  357: PRIVATE int get_physical ARGS1(HTRequest *, req)
                    358: {    
1.1       timbl     359:     char * access=0;   /* Name of access method */
1.21      luotonen  360:     char * addr = HTAnchor_address((HTAnchor*)req->anchor);    /* free me */
1.27      luotonen  361: 
1.35      luotonen  362:     /*
                    363:     ** This HACK is here until we have redirection implemented.
                    364:     ** This is used when we are recursively calling HTLoad().
                    365:     ** We then take the physical address, because currently the
                    366:     ** virtual address is kept in a hash table so it can't be
                    367:     ** changed -- otherwise it wouldn't be found anymore.
                    368:     */
1.36      luotonen  369:     if (HTAnchor_physical(req->anchor))
                    370:        StrAllocCopy(addr, HTAnchor_physical(req->anchor));
1.35      luotonen  371: 
1.2       timbl     372: #ifndef NO_RULES
1.47      luotonen  373:     if (HTImServer) {  /* cern_httpd has already done its own translations */
1.45      luotonen  374:        HTAnchor_setPhysical(req->anchor, HTImServer);
1.47      luotonen  375:        StrAllocCopy(addr, HTImServer); /* Oops, queries thru many proxies */
                    376:                                        /* didn't work without this -- AL  */
                    377:     }
1.21      luotonen  378:     else {
1.27      luotonen  379:        char * physical = HTTranslate(addr);
1.21      luotonen  380:        if (!physical) {
1.47      luotonen  381:            free(addr);
1.21      luotonen  382:            return HT_FORBIDDEN;
                    383:        }
                    384:        HTAnchor_setPhysical(req->anchor, physical);
                    385:        free(physical);                 /* free our copy */
1.2       timbl     386:     }
                    387: #else
1.21      luotonen  388:     HTAnchor_setPhysical(req->anchor, addr);
1.2       timbl     389: #endif
                    390: 
1.21      luotonen  391:     access =  HTParse(HTAnchor_physical(req->anchor),
1.27      luotonen  392:                      "file:", PARSE_ACCESS);
1.1       timbl     393: 
                    394: /*     Check whether gateway access has been set up for this
1.8       timbl     395: **
                    396: **     This function can be replaced by the rule system above.
1.1       timbl     397: */
1.8       timbl     398: #define USE_GATEWAYS
1.1       timbl     399: #ifdef USE_GATEWAYS
1.39      luotonen  400: 
                    401:     /* make sure the using_proxy variable is false */
                    402:     using_proxy = NO;
                    403: 
1.33      luotonen  404:     if (!override_proxy(addr)) {
1.27      luotonen  405:        char * gateway_parameter, *gateway, *proxy;
                    406: 
1.2       timbl     407:        gateway_parameter = (char *)malloc(strlen(access)+20);
                    408:        if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27      luotonen  409: 
                    410:        /* search for proxy gateways */
1.2       timbl     411:        strcpy(gateway_parameter, "WWW_");
                    412:        strcat(gateway_parameter, access);
                    413:        strcat(gateway_parameter, "_GATEWAY");
                    414:        gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27      luotonen  415: 
                    416:        /* search for proxy servers */
                    417:        strcpy(gateway_parameter, access);
                    418:        strcat(gateway_parameter, "_proxy");
                    419:        proxy = (char *)getenv(gateway_parameter);
                    420: 
1.2       timbl     421:        free(gateway_parameter);
1.27      luotonen  422: 
                    423:        if (TRACE && gateway)
                    424:            fprintf(stderr,"Gateway found: %s\n",gateway);
                    425:        if (TRACE && proxy)
                    426:            fprintf(stderr,"Proxy server found: %s\n",proxy);
                    427: 
1.8       timbl     428: #ifndef DIRECT_WAIS
1.9       timbl     429:        if (!gateway && 0==strcmp(access, "wais")) {
1.8       timbl     430:            gateway = DEFAULT_WAIS_GATEWAY;
                    431:        }
                    432: #endif
1.27      luotonen  433: 
                    434:        /* proxy servers have precedence over gateway servers */
                    435:        if (proxy) {
                    436:            char * gatewayed=0;
                    437: 
                    438:             StrAllocCopy(gatewayed,proxy);
                    439:            StrAllocCat(gatewayed,addr);
                    440:            using_proxy = YES;
                    441:            HTAnchor_setPhysical(req->anchor, gatewayed);
                    442:            free(gatewayed);
                    443:            free(access);
                    444: 
                    445:            access =  HTParse(HTAnchor_physical(req->anchor),
                    446:                              "http:", PARSE_ACCESS);
                    447:        } else if (gateway) {
1.9       timbl     448:            char * path = HTParse(addr, "",
                    449:                PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
                    450:                /* Chop leading / off to make host into part of path */
                    451:            char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
                    452:            free(path);
1.21      luotonen  453:             HTAnchor_setPhysical(req->anchor, gatewayed);
1.9       timbl     454:            free(gatewayed);
1.2       timbl     455:            free(access);
1.9       timbl     456:            
1.21      luotonen  457:            access =  HTParse(HTAnchor_physical(req->anchor),
1.8       timbl     458:                "http:", PARSE_ACCESS);
1.2       timbl     459:        }
                    460:     }
1.1       timbl     461: #endif
                    462: 
1.19      timbl     463:     free(addr);
1.1       timbl     464: 
                    465: 
                    466: /*     Search registered protocols to find suitable one
                    467: */
                    468:     {
1.20      luotonen  469:        HTList *cur;
                    470:        HTProtocol *p;
1.1       timbl     471: #ifndef NO_INIT
1.2       timbl     472:         if (!protocols) HTAccessInit();
1.1       timbl     473: #endif
1.20      luotonen  474:        cur = protocols;
                    475:        while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.2       timbl     476:            if (strcmp(p->name, access)==0) {
1.21      luotonen  477:                HTAnchor_setProtocol(req->anchor, p);
1.2       timbl     478:                free(access);
                    479:                return (HT_OK);
1.1       timbl     480:            }
                    481:        }
                    482:     }
                    483: 
                    484:     free(access);
1.2       timbl     485:     return HT_NO_ACCESS;
1.1       timbl     486: }
                    487: 
                    488: 
                    489: /*             Load a document
                    490: **             ---------------
                    491: **
1.2       timbl     492: **     This is an internal routine, which has an address AND a matching
                    493: **     anchor.  (The public routines are called with one OR the other.)
                    494: **
                    495: ** On entry,
1.15      timbl     496: **     request->
1.35      luotonen  497: **         anchor              a parent anchor with fully qualified
                    498: **                             hypertext reference as its address set
1.15      timbl     499: **         output_format       valid
                    500: **         output_stream       valid on NULL
1.2       timbl     501: **
                    502: ** On exit,
                    503: **     returns         <0              Error has occured.
                    504: **                     HT_LOADED       Success
                    505: **                     HT_NO_DATA      Success, but no document loaded.
1.8       timbl     506: **                                     (telnet sesssion started etc)
1.2       timbl     507: **
                    508: */
1.35      luotonen  509: PUBLIC int HTLoad ARGS1(HTRequest *, request)
1.2       timbl     510: {
1.25      frystyk   511:     char       *arg = NULL;
                    512:     HTProtocol *p;
                    513:     int        status;
                    514: 
1.22      luotonen  515:     if (request->method == METHOD_INVALID)
                    516:        request->method = METHOD_GET;
1.21      luotonen  517:     status = get_physical(request);
1.2       timbl     518:     if (status == HT_FORBIDDEN) {
1.49      frystyk   519: #ifdef OLD_CODE
1.21      luotonen  520:         return HTLoadError(request, 500,
1.49      frystyk   521:                            "Access forbidden by rule");
                    522: #endif /* OLD_CODE */
                    523:        char *url = HTAnchor_address((HTAnchor *) request->anchor);
                    524:        if (url) {
                    525:            HTUnEscape(url);
                    526:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    527:                       (void *) url, (int) strlen(url), "HTLoad");
                    528:            free(url);
                    529:        } else {
                    530:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    531:                       NULL, 0, "HTLoad");
                    532:        }
                    533:        return -1;
1.2       timbl     534:     }
                    535:     if (status < 0) return status;     /* Can't resolve or forbidden */
1.25      frystyk   536: 
                    537:     if(!(arg = HTAnchor_physical(request->anchor)) || !*arg) 
                    538:        return (-1);
1.27      luotonen  539: 
1.15      timbl     540:     p = HTAnchor_protocol(request->anchor);
1.17      timbl     541:     return (*(p->load))(request);
1.2       timbl     542: }
                    543: 
                    544: 
                    545: /*             Get a save stream for a document
                    546: **             --------------------------------
                    547: */
1.19      timbl     548: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
1.15      timbl     549: {
                    550:     HTProtocol * p;
1.19      timbl     551:     int status;
1.22      luotonen  552:     request->method = METHOD_PUT;
1.21      luotonen  553:     status = get_physical(request);
1.19      timbl     554:     if (status == HT_FORBIDDEN) {
1.49      frystyk   555: #ifdef OLD_CODE
1.21      luotonen  556:         HTLoadError(request, 500,
                    557:                    "Access forbidden by rule");
1.19      timbl     558:        return NULL;    /* should return error status? */
1.49      frystyk   559: #endif /* OLD_CODE */
                    560:        char *url = HTAnchor_address((HTAnchor *) request->anchor);
                    561:        if (url) {
                    562:            HTUnEscape(url);
                    563:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    564:                       (void *) url, (int) strlen(url), "HTLoad");
                    565:            free(url);
                    566:        } else {
                    567:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    568:                       NULL, 0, "HTLoad");
                    569:        }
                    570:        return NULL;    /* should return error status? */
1.19      timbl     571:     }
                    572:     if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
                    573:     
1.15      timbl     574:     p = HTAnchor_protocol(request->anchor);
1.2       timbl     575:     if (!p) return NULL;
                    576:     
1.15      timbl     577:     return (*p->saveStream)(request);
1.2       timbl     578:     
                    579: }
                    580: 
                    581: 
                    582: /*             Load a document - with logging etc
                    583: **             ----------------------------------
                    584: **
                    585: **     - Checks or documents already loaded
                    586: **     - Logs the access
                    587: **     - Allows stdin filter option
                    588: **     - Trace ouput and error messages
                    589: **
1.1       timbl     590: **    On Entry,
1.19      timbl     591: **        request->anchor      valid for of the document to be accessed.
                    592: **      request->childAnchor   optional anchor within doc to be selected
                    593: **
1.2       timbl     594: **        filter            if YES, treat stdin as HTML
1.1       timbl     595: **
1.15      timbl     596: **       request->anchor   is the node_anchor for the document
                    597: **       request->output_format is valid
                    598: **
1.1       timbl     599: **    On Exit,
                    600: **        returns    YES     Success in opening document
                    601: **                   NO      Failure 
                    602: **
                    603: */
                    604: 
1.19      timbl     605: PRIVATE BOOL HTLoadDocument ARGS1(HTRequest *,         request)
1.1       timbl     606: 
                    607: {
                    608:     int                status;
                    609:     HText *    text;
1.19      timbl     610:     char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
                    611:     
1.49      frystyk   612:     if (TRACE) fprintf (stderr, "HTAccess.... Loading document %s\n",
                    613:                        full_address);
1.1       timbl     614: 
1.18      timbl     615:     request->using_cache = NULL;
                    616:     
1.15      timbl     617:     if (!request->output_format) request->output_format = WWW_PRESENT;
1.25      frystyk   618: 
1.31      frystyk   619:     if (!HTForceReload && (text=(HText *)HTAnchor_document(request->anchor)))
1.15      timbl     620:     {  /* Already loaded */
1.1       timbl     621:         if (TRACE) fprintf(stderr, "HTAccess: Document already in memory.\n");
1.19      timbl     622:        if (request->childAnchor) {
                    623:            HText_selectAnchor(text, request->childAnchor);
                    624:        } else {
                    625:            HText_select(text); 
                    626:        }
                    627:        free(full_address);
1.1       timbl     628:        return YES;
                    629:     }
1.17      timbl     630:     
1.34      frystyk   631:     /* Check the Cache */
                    632:     /* Caching is ONLY done if (char*) HTCacheDir is set. Henrik 09/03-94 */
1.17      timbl     633:     /* Bug: for each format, we only check whether it is ok, we
                    634:        don't check them all and chose the best */
1.38      timbl     635:     if (/* HTCacheDir && */ request->anchor->cacheItems) {
1.17      timbl     636:         HTList * list = request->anchor->cacheItems;
1.20      luotonen  637:        HTList * cur = list;
                    638:        HTCacheItem * item;
                    639: 
                    640:        while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
1.18      timbl     641:            HTStream * s;
                    642:            
                    643:            request->using_cache = item;
                    644:            
1.37      luotonen  645:            s = HTStreamStack(item->format, request, NO);
1.17      timbl     646:            if (s) {            /* format was suitable */
                    647:                FILE * fp = fopen(item->filename, "r");
1.18      timbl     648:                if (TRACE) fprintf(stderr, "Cache: HIT file %s for %s\n",
1.20      luotonen  649:                                   item->filename, 
                    650:                                   full_address);
1.17      timbl     651:                if (fp) {
                    652:                    HTFileCopy(fp, s);
1.24      timbl     653:                    (*s->isa->free)(s); /* close up pipeline */
1.17      timbl     654:                    fclose(fp);
1.19      timbl     655:                    free(full_address);
1.17      timbl     656:                    return YES;
                    657:                } else {
                    658:                    fprintf(stderr, "***** Can't read cache file %s !\n",
1.20      luotonen  659:                            item->filename);
1.17      timbl     660:                } /* file open ok */
                    661:            } /* stream ok */
                    662:        } /* next cache item */
                    663:     } /* if cache available for this anchor */
1.1       timbl     664:     
1.35      luotonen  665:     status = HTLoad(request);
1.2       timbl     666: 
                    667:     
1.1       timbl     668: /*     Log the access if necessary
                    669: */
1.42      frystyk   670:     if (HTlogfile) {
1.1       timbl     671:        time_t theTime;
                    672:        time(&theTime);
1.42      frystyk   673:        fprintf(HTlogfile, "%24.24s %s %s %s\n",
1.1       timbl     674:            ctime(&theTime),
                    675:            HTClientHost ? HTClientHost : "local",
                    676:            status<0 ? "FAIL" : "GET",
                    677:            full_address);
1.42      frystyk   678:        fflush(HTlogfile);      /* Actually update it on disk */
1.1       timbl     679:        if (TRACE) fprintf(stderr, "Log: %24.24s %s %s %s\n",
                    680:            ctime(&theTime),
                    681:            HTClientHost ? HTClientHost : "local",
                    682:            status<0 ? "FAIL" : "GET",
                    683:            full_address);
                    684:     }
                    685: 
                    686:     if (status == HT_LOADED) {
                    687:        if (TRACE) {
                    688:            fprintf(stderr, "HTAccess: `%s' has been accessed.\n",
                    689:            full_address);
                    690:        }
1.19      timbl     691:        free(full_address);
1.1       timbl     692:        return YES;
                    693:     }
                    694:     
                    695:     if (status == HT_NO_DATA) {
                    696:        if (TRACE) {
                    697:            fprintf(stderr, 
                    698:            "HTAccess: `%s' has been accessed, No data left.\n",
                    699:            full_address);
                    700:        }
1.19      timbl     701:        free(full_address);
1.1       timbl     702:        return NO;
                    703:     }
                    704:     
1.34      frystyk   705:     /* Bug fix thanks to Lou Montulli. Henrik 10/03-94 */
                    706:     if (status<=0) {                 /* Failure in accessing a document */
1.1       timbl     707: #ifdef CURSES
                    708:         user_message("Can't access `%s'", full_address);
                    709: #else
1.5       timbl     710:        if (TRACE) fprintf(stderr, 
                    711:                "HTAccess: Can't access `%s'\n", full_address);
1.1       timbl     712: #endif
1.32      frystyk   713:        /* This is done in the specific load procedures... Henrik 07/03-94 */
1.48      luotonen  714:        if (request->error_stack)
                    715:            HTErrorMsg(request);
1.19      timbl     716:        free(full_address);
1.1       timbl     717:        return NO;
                    718:     }
1.9       timbl     719:  
                    720:     /* If you get this, then please find which routine is returning
                    721:        a positive unrecognised error code! */
1.1       timbl     722:     fprintf(stderr,
1.50      frystyk   723:     "**** HTAccess: Internal software error in CERN WWWLib version %s ****\n\nPlease mail www-bug@info.cern.ch quoting what software and what version you are using\nand the URL: %s that caused the problem, thanks!\n",
                    724:            HTLibraryVersion,
                    725:            full_address);
1.19      timbl     726:     free(full_address);
                    727:    
1.1       timbl     728:     exit(-6996);
1.20      luotonen  729:     return NO; /* For gcc :-( */
1.2       timbl     730: } /* HTLoadDocument */
1.1       timbl     731: 
                    732: 
                    733: 
                    734: /*             Load a document from absolute name
                    735: **             ---------------
                    736: **
                    737: **    On Entry,
                    738: **        addr     The absolute address of the document to be accessed.
                    739: **        filter   if YES, treat document as HTML
                    740: **
                    741: **    On Exit,
                    742: **        returns    YES     Success in opening document
                    743: **                   NO      Failure 
                    744: **
                    745: **
                    746: */
                    747: 
1.15      timbl     748: PUBLIC BOOL HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2       timbl     749: {
1.19      timbl     750:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    751:    request->anchor = HTAnchor_parent(anchor);
                    752:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
                    753:                        NULL : (HTChildAnchor*) anchor;
                    754:    return HTLoadDocument(request);
1.2       timbl     755: }
                    756: 
                    757: 
                    758: /*             Load a document from absolute name to stream
                    759: **             --------------------------------------------
                    760: **
                    761: **    On Entry,
                    762: **        addr     The absolute address of the document to be accessed.
1.15      timbl     763: **        request->output_stream     if non-NULL, send data down this stream
1.2       timbl     764: **
                    765: **    On Exit,
                    766: **        returns    YES     Success in opening document
                    767: **                   NO      Failure 
                    768: **
                    769: **
                    770: */
                    771: 
                    772: PUBLIC BOOL HTLoadToStream ARGS3(
                    773:                CONST char *,   addr,
                    774:                BOOL,           filter,
1.15      timbl     775:                HTRequest*,     request)
1.1       timbl     776: {
1.19      timbl     777:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    778:    request->anchor = HTAnchor_parent(anchor);
                    779:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
                    780:        (HTChildAnchor*) anchor;
1.15      timbl     781:     request->output_stream = request->output_stream;
1.19      timbl     782:     return HTLoadDocument(request);
1.1       timbl     783: }
                    784: 
                    785: 
1.2       timbl     786: 
                    787: 
1.1       timbl     788: /*             Load a document from relative name
                    789: **             ---------------
                    790: **
                    791: **    On Entry,
1.2       timbl     792: **        relative_name     The relative address of the document
                    793: **                         to be accessed.
1.1       timbl     794: **
                    795: **    On Exit,
                    796: **        returns    YES     Success in opening document
                    797: **                   NO      Failure 
                    798: **
                    799: **
                    800: */
                    801: 
1.15      timbl     802: PUBLIC BOOL HTLoadRelative ARGS3(
1.2       timbl     803:                CONST char *,           relative_name,
1.15      timbl     804:                HTParentAnchor *,       here,
1.20      luotonen  805:                HTRequest *,            request)
1.1       timbl     806: {
                    807:     char *             full_address = 0;
                    808:     BOOL                       result;
                    809:     char *             mycopy = 0;
                    810:     char *             stripped = 0;
                    811:     char *             current_address =
1.2       timbl     812:                                HTAnchor_address((HTAnchor*)here);
1.1       timbl     813: 
                    814:     StrAllocCopy(mycopy, relative_name);
                    815: 
                    816:     stripped = HTStrip(mycopy);
                    817:     full_address = HTParse(stripped,
                    818:                   current_address,
                    819:                   PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15      timbl     820:     result = HTLoadAbsolute(full_address, request);
1.1       timbl     821:     free(full_address);
                    822:     free(current_address);
                    823:     free(mycopy);  /* Memory leak fixed 10/7/92 -- JFG */
                    824:     return result;
                    825: }
                    826: 
                    827: 
                    828: /*             Load if necessary, and select an anchor
                    829: **             --------------------------------------
                    830: **
                    831: **    On Entry,
                    832: **        destination              The child or parenet anchor to be loaded.
                    833: **
                    834: **    On Exit,
                    835: **        returns    YES     Success
                    836: **                   NO      Failure 
                    837: **
                    838: */
                    839: 
1.15      timbl     840: PUBLIC BOOL HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1       timbl     841: {
1.15      timbl     842:     if (!anchor) return NO;    /* No link */
1.1       timbl     843:     
1.15      timbl     844:     request->anchor  = HTAnchor_parent(anchor);
1.19      timbl     845:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                    846:                                        : (HTChildAnchor*) anchor;
1.1       timbl     847:     
1.19      timbl     848:     return HTLoadDocument(request) ? YES : NO;
1.1       timbl     849:        
                    850: } /* HTLoadAnchor */
                    851: 
                    852: 
                    853: /*             Search
                    854: **             ------
                    855: **  Performs a keyword search on word given by the user. Adds the keyword to 
                    856: **  the end of the current address and attempts to open the new address.
                    857: **
                    858: **  On Entry,
                    859: **       *keywords     space-separated keyword list or similar search list
1.2       timbl     860: **     here            is anchor search is to be done on.
1.1       timbl     861: */
                    862: 
1.2       timbl     863: PRIVATE char hex(i)
                    864:     int i;
                    865: {
1.13      timbl     866:     char * hexchars = "0123456789ABCDEF";
                    867:     return hexchars[i];
1.2       timbl     868: }
1.1       timbl     869: 
1.15      timbl     870: PUBLIC BOOL HTSearch ARGS3(
1.2       timbl     871:        CONST char *,           keywords,
1.15      timbl     872:        HTParentAnchor *,       here,
                    873:        HTRequest *,            request)
1.1       timbl     874: {
1.2       timbl     875: 
                    876: #define acceptable \
                    877: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
                    878: 
                    879:     char *q, *u;
                    880:     CONST char * p, *s, *e;            /* Pointers into keywords */
                    881:     char * address = HTAnchor_address((HTAnchor*)here);
1.1       timbl     882:     BOOL result;
1.2       timbl     883:     char * escaped = malloc(strlen(keywords)*3+1);
                    884: 
1.29      frystyk   885:     /* static CONST BOOL isAcceptable[96] = */
                    886:     /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30      luotonen  887:     static BOOL isAcceptable[96] =
1.2       timbl     888:     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
                    889:     {    0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,      /* 2x   !"#$%&'()*+,-./  */
                    890:          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,      /* 3x  0123456789:;<=>?  */
                    891:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 4x  @ABCDEFGHIJKLMNO  */
                    892:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /* 5X  PQRSTUVWXYZ[\]^_  */
                    893:         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 6x  `abcdefghijklmno  */
                    894:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };     /* 7X  pqrstuvwxyz{\}~  DEL */
                    895: 
                    896:     if (escaped == NULL) outofmem(__FILE__, "HTSearch");
                    897:     
1.29      frystyk   898: /* Convert spaces to + and hex escape unacceptable characters */
1.2       timbl     899: 
1.29      frystyk   900:     for(s=keywords; *s && WHITE(*s); s++); /*scan */    /* Skip white space */
                    901:     for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--);     /* Skip trailers */
                    902:     for(q=escaped, p=s; p<e; p++) {                  /* scan stripped field */
1.2       timbl     903:         int c = (int)TOASCII(*p);
                    904:         if (WHITE(*p)) {
                    905:            *q++ = '+';
1.29      frystyk   906:        } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13      timbl     907:            *q++ = *p;                  /* 930706 TBL for MVS bug */
1.2       timbl     908:        } else {
                    909:            *q++ = '%';
                    910:            *q++ = hex(c / 16);
                    911:            *q++ = hex(c % 16);
                    912:        }
                    913:     } /* Loop over string */
1.1       timbl     914:     
1.2       timbl     915:     *q=0;
                    916:                                /* terminate escaped sctring */
                    917:     u=strchr(address, '?');            /* Find old search string */
                    918:     if (u) *u = 0;                             /* Chop old search off */
1.1       timbl     919: 
                    920:     StrAllocCat(address, "?");
1.2       timbl     921:     StrAllocCat(address, escaped);
                    922:     free(escaped);
1.15      timbl     923:     result = HTLoadRelative(address, here, request);
1.1       timbl     924:     free(address);
1.2       timbl     925:     
1.1       timbl     926:     return result;
1.2       timbl     927: }
                    928: 
                    929: 
                    930: /*             Search Given Indexname
                    931: **             ------
                    932: **  Performs a keyword search on word given by the user. Adds the keyword to 
                    933: **  the end of the current address and attempts to open the new address.
                    934: **
                    935: **  On Entry,
                    936: **       *keywords     space-separated keyword list or similar search list
                    937: **     *addres         is name of object search is to be done on.
                    938: */
                    939: 
1.15      timbl     940: PUBLIC BOOL HTSearchAbsolute ARGS3(
1.2       timbl     941:        CONST char *,   keywords,
1.15      timbl     942:        CONST char *,   indexname,
                    943:        HTRequest *,    request)
1.2       timbl     944: {
                    945:     HTParentAnchor * anchor =
                    946:        (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15      timbl     947:     return HTSearch(keywords, anchor, request);
1.2       timbl     948: }
                    949: 
                    950: 
                    951: /*             Generate the anchor for the home page
                    952: **             -------------------------------------
                    953: **
                    954: **     As it involves file access, this should only be done once
                    955: **     when the program first runs.
1.10      timbl     956: **     This is a default algorithm -- browser don't HAVE to use this.
                    957: **     But consistency betwen browsers is STRONGLY recommended!
1.2       timbl     958: **
1.10      timbl     959: **     Priority order is:
                    960: **
                    961: **             1       WWW_HOME environment variable (logical name, etc)
                    962: **             2       ~/WWW/default.html
                    963: **             3       /usr/local/bin/default.html
                    964: **             4       http://info.cern.ch/default.html
                    965: **
1.2       timbl     966: */
                    967: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
                    968: {
1.12      timbl     969:     char * my_home_document = NULL;
                    970:     char * home = (char *)getenv(LOGICAL_DEFAULT);
1.2       timbl     971:     char * ref;
                    972:     HTParentAnchor * anchor;
1.1       timbl     973:     
1.12      timbl     974:     if (home) {
                    975:         StrAllocCopy(my_home_document, home);
                    976:     
                    977: /*     Someone telnets in, they get a special home.
                    978: */
                    979: #define MAX_FILE_NAME 1024                                     /* @@@ */
                    980:     } else  if (HTClientHost) {                        /* Telnet server */
                    981:        FILE * fp = fopen(REMOTE_POINTER, "r");
                    982:        char * status;
                    983:        if (fp) {
                    984:            my_home_document = (char*) malloc(MAX_FILE_NAME);
                    985:            status = fgets(my_home_document, MAX_FILE_NAME, fp);
                    986:            if (!status) {
                    987:                free(my_home_document);
                    988:                my_home_document = NULL;
                    989:            }
                    990:            fclose(fp);
                    991:        }
                    992:        if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
                    993:     }
                    994: 
                    995:     
                    996: 
1.2       timbl     997: #ifdef unix
1.12      timbl     998: 
1.10      timbl     999:     if (!my_home_document) {
                   1000:        FILE * fp = NULL;
                   1001:        CONST char * home =  (CONST char*)getenv("HOME");
                   1002:        if (home) { 
                   1003:            my_home_document = (char *)malloc(
                   1004:                strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
                   1005:            if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
                   1006:            sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
                   1007:            fp = fopen(my_home_document, "r");
                   1008:        }
                   1009:        
                   1010:        if (!fp) {
                   1011:            StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
                   1012:            fp = fopen(my_home_document, "r");
                   1013:        }
1.2       timbl    1014:        if (fp) {
                   1015:            fclose(fp);
                   1016:        } else {
                   1017:        if (TRACE) fprintf(stderr,
1.10      timbl    1018:            "HTBrowse: No local home document ~/%s or %s\n",
                   1019:            PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11      timbl    1020:            free(my_home_document);
                   1021:            my_home_document = NULL;
1.2       timbl    1022:        }
                   1023:     }
                   1024: #endif
1.10      timbl    1025:     ref = HTParse( my_home_document ?  my_home_document :
                   1026:                                HTClientHost ? REMOTE_ADDRESS
                   1027:                                : LAST_RESORT,
                   1028:                    "file:",
1.2       timbl    1029:                    PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10      timbl    1030:     if (my_home_document) {
1.2       timbl    1031:        if (TRACE) fprintf(stderr,
                   1032:            "HTAccess: Using custom home page %s i.e. address %s\n",
1.10      timbl    1033:            my_home_document, ref);
                   1034:        free(my_home_document);
1.2       timbl    1035:     }
                   1036:     anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
                   1037:     free(ref);
                   1038:     return anchor;
1.1       timbl    1039: }
1.26      frystyk  1040: 
                   1041: 
                   1042: /*             Bind an Anchor to the request structure
                   1043: **             ---------------------------------------
                   1044: **
                   1045: **    On Entry,
                   1046: **     anchor          The child or parenet anchor to be binded
                   1047: **     request         The request sturcture
                   1048: **    On Exit,
                   1049: **        returns    YES     Success
                   1050: **                   NO      Failure 
                   1051: **
                   1052: **  Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
                   1053: **                                             Henrik Frystyk 17/02-94
                   1054: */
                   1055: 
                   1056: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
                   1057: {
                   1058:     if (!anchor) return NO;    /* No link */
                   1059:     
                   1060:     request->anchor  = HTAnchor_parent(anchor);
                   1061:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                   1062:                                        : (HTChildAnchor*) anchor;
                   1063:        
1.29      frystyk  1064:     return YES;
1.26      frystyk  1065: } /* HTBindAnchor */
                   1066: 

Webmaster