Annotation of libwww/Library/src/HTAccess.c, revision 1.73

1.61      frystyk     1: /*                                                                  HTAccess.c
                      2: **     ACCESS MANAGER
                      3: **
                      4: **     (c) COPYRIGHT CERN 1994.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
1.1       timbl       6: **
                      7: ** Authors
                      8: **     TBL     Tim Berners-Lee timbl@info.cern.ch
1.4       timbl       9: **     JFG     Jean-Francois Groff jfg@dxcern.cern.ch
1.1       timbl      10: **     DD      Denis DeLaRoca (310) 825-4580  <CSP1DWD@mvs.oac.ucla.edu>
                     11: ** History
                     12: **       8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
                     13: **     26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42      frystyk    14: **      6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1       timbl      15: **     17 Dec 92 Tn3270 added, bug fix. DD
1.2       timbl      16: **      4 Feb 93 Access registration, Search escapes bad chars TBL
1.9       timbl      17: **               PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
                     18: **     28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19      timbl      19: **        Dec 93 Bug change around, more reentrant, etc
1.42      frystyk    20: **     09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.53      duns       21: **      8 Jul 94 Insulate free() from _free structure element.
1.2       timbl      22: ** Bugs
                     23: **     This module assumes that that the graphic object is hypertext, as it
1.9       timbl      24: **     needs to select it when it has been loaded.  A superclass needs to be
1.2       timbl      25: **     defined which accepts select and select_anchor.
1.1       timbl      26: */
                     27: 
1.68      frystyk    28: #if !defined(HT_DIRECT_WAIS) && !defined(HT_DEFAULT_WAIS_GATEWAY)
                     29: #define HT_DEFAULT_WAIS_GATEWAY "http://www.w3.org:8001/"
1.54      frystyk    30: #endif
1.8       timbl      31: 
1.67      frystyk    32: /* Library include files */
                     33: #include "tcp.h"
                     34: #include "HTUtils.h"
1.1       timbl      35: #include "HTParse.h"
1.4       timbl      36: #include "HTML.h"              /* SCW */
1.2       timbl      37: #include "HTList.h"
                     38: #include "HText.h"     /* See bugs above */
                     39: #include "HTAlert.h"
1.67      frystyk    40: #include "HTFWrite.h"  /* for cache stuff */
1.70      frystyk    41: #include "HTLog.h"
1.17      timbl      42: #include "HTTee.h"
1.46      frystyk    43: #include "HTError.h"
1.67      frystyk    44: #include "HTString.h"
1.57      howcome    45: #include "HTTCP.h"      /* HWL: for HTFindRelatedName */
1.59      frystyk    46: #include "HTThread.h"
1.63      frystyk    47: #include "HTEvent.h"
1.73    ! frystyk    48: #include "HTBind.h"
1.70      frystyk    49: #include "HTInit.h"
1.67      frystyk    50: #ifndef NO_RULES
                     51: #include "HTRules.h"
                     52: #endif
                     53: #include "HTAccess.h"                                   /* Implemented here */
1.2       timbl      54: 
1.54      frystyk    55: /* These flags may be set to modify the operation of this module */
1.73    ! frystyk    56: PUBLIC int  HTMaxRedirections = 10;           /* Max number of redirections */
        !            57: 
1.54      frystyk    58: PUBLIC char * HTClientHost = 0;                 /* Name of remote login host if any */
1.70      frystyk    59: PUBLIC BOOL HTSecure = NO;              /* Disable access for telnet users? */
1.41      luotonen   60: 
1.43      luotonen   61: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.70      frystyk    62: PUBLIC BOOL HTImProxy = NO;                       /* cern_httpd as a proxy? */
1.1       timbl      63: 
1.70      frystyk    64: PRIVATE HTList * protocols = NULL;           /* List of registered protocols */
1.43      luotonen   65: 
1.63      frystyk    66: /* Superclass defn */
1.24      timbl      67: struct _HTStream {
                     68:        HTStreamClass * isa;
                     69:        /* ... */
                     70: };
                     71: 
1.59      frystyk    72: /* --------------------------------------------------------------------------*/
                     73: /*                     Management of the HTRequest structure                */
                     74: /* --------------------------------------------------------------------------*/
                     75: 
1.15      timbl      76: /*     Create  a request structure
                     77: **     ---------------------------
                     78: */
                     79: PUBLIC HTRequest * HTRequest_new NOARGS
                     80: {
1.28      luotonen   81:     HTRequest * me = (HTRequest*) calloc(1, sizeof(*me));  /* zero fill */
1.15      timbl      82:     if (!me) outofmem(__FILE__, "HTRequest_new()");
                     83:     
1.70      frystyk    84:     me->conversions    = HTList_new();     /* No conversions registered yet */
                     85:     me->output_format  = WWW_PRESENT;      /* default it to present to user */
1.72      frystyk    86:     me->error_format   = WWW_HTML;      /* default format of error messages */
1.70      frystyk    87:     me->HeaderMask     = DEFAULT_HEADERS;             /* Send these headers */
                     88:     me->EntityMask     = DEFAULT_ENTITY_HEADERS;              /* Also these */
1.15      timbl      89:     return me;
                     90: }
                     91: 
                     92: 
1.49      frystyk    93: /*     Clear  a request structure
                     94: **     ---------------------------
                     95: **     This function clears the reguest structure so that only the
                     96: **     conversions remain. Everything else is as if it was created from
                     97: **     scratch.
                     98: */
                     99: PUBLIC void HTRequest_clear ARGS1(HTRequest *, req)
                    100: {
                    101:     HTList *conversions;
                    102:     if (!req) {
                    103:        if (TRACE)
1.67      frystyk   104:            fprintf(TDEST, "Clear....... request: Bad argument!\n");
1.49      frystyk   105:        return;
                    106:     }
                    107:     conversions = req->conversions;                 /* Save the conversions */
                    108:     HTErrorFree(req);
                    109:     HTAACleanup(req);
                    110:     memset(req, '\0', sizeof(HTRequest));
                    111: 
                    112:     /* Now initialize as from scratch but with the old list of conversions */
                    113:     req->conversions = conversions;
                    114:     req->output_format = WWW_PRESENT;      /* default it to present to user */
                    115: }
                    116: 
                    117: 
1.20      luotonen  118: /*     Delete a request structure
                    119: **     --------------------------
                    120: */
                    121: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
                    122: {
                    123:     if (req) {
1.59      frystyk   124:        FREE(req->redirect);
                    125:        FREE(req->authenticate);
                    126:        HTFormatDelete(req);
1.46      frystyk   127:        HTErrorFree(req);
1.34      frystyk   128:        HTAACleanup(req);
1.61      frystyk   129: 
                    130:        /* These are temporary until we get a MIME thingy */
                    131:        FREE(req->redirect);
                    132:        FREE(req->WWWAAScheme);
                    133:        FREE(req->WWWAARealm);
                    134:        FREE(req->WWWprotection);
                    135: 
1.34      frystyk   136:        FREE(req);
1.20      luotonen  137:     }
                    138: }
                    139: 
1.59      frystyk   140: /* --------------------------------------------------------------------------*/
                    141: /*                     Management of HTTP Methods                           */
                    142: /* --------------------------------------------------------------------------*/
1.20      luotonen  143: 
1.70      frystyk   144: static char *method_names[] =
1.22      luotonen  145: {
                    146:     "INVALID-METHOD",
                    147:     "GET",
                    148:     "HEAD",
                    149:     "POST",
                    150:     "PUT",
                    151:     "DELETE",
                    152:     "LINK",
                    153:     "UNLINK",
                    154:     NULL
                    155: };
                    156: 
                    157: /*     Get method enum value
                    158: **     ---------------------
                    159: */
1.70      frystyk   160: PUBLIC HTMethod HTMethod_enum ARGS1(CONST char *, name)
1.22      luotonen  161: {
                    162:     if (name) {
1.70      frystyk   163:        if (!strcmp(name, *(method_names+1)))
                    164:            return METHOD_GET;
                    165:        else if (!strcmp(name, *(method_names+2)))
                    166:            return METHOD_HEAD;
                    167:        else if (!strcmp(name, *(method_names+3)))
                    168:            return METHOD_POST;
                    169:        else if (!strcmp(name, *(method_names+4)))
                    170:            return METHOD_PUT;
                    171:        else if (!strcmp(name, *(method_names+5)))
                    172:            return METHOD_DELETE;
                    173:        else if (!strcmp(name, *(method_names+6)))
                    174:            return METHOD_LINK;
                    175:        else if (!strcmp(name, *(method_names+7)))
                    176:            return METHOD_UNLINK;
1.22      luotonen  177:     }
                    178:     return METHOD_INVALID;
                    179: }
                    180: 
                    181: 
                    182: /*     Get method name
                    183: **     ---------------
1.70      frystyk   184: **     Returns pointer to entry in static table in memory
1.22      luotonen  185: */
1.70      frystyk   186: PUBLIC CONST char * HTMethod_name ARGS1(HTMethod, method)
1.22      luotonen  187: {
1.70      frystyk   188:     if (method & METHOD_GET)
                    189:        return *(method_names+1);
                    190:     else if (method == METHOD_HEAD)
                    191:        return *(method_names+2);
                    192:     else if (method == METHOD_POST)
                    193:        return *(method_names+3);
                    194:     else if (method == METHOD_PUT)
                    195:        return *(method_names+4);
                    196:     else if (method == METHOD_DELETE)
                    197:        return *(method_names+5);
                    198:     else if (method == METHOD_LINK)
                    199:        return *(method_names+6);
                    200:     else if (method == METHOD_UNLINK)
                    201:        return *(method_names+7);
                    202:     else
                    203:        return *method_names;
                    204: #if 0
                    205:     if ((int)METHOD_INVALID  && (int)method < (int)MAX_METHODS)
1.22      luotonen  206:        return method_names[(int)method];
                    207:     else
                    208:        return method_names[(int)METHOD_INVALID];
1.70      frystyk   209: #endif
1.22      luotonen  210: }
                    211: 
                    212: 
1.70      frystyk   213: #if 0
                    214: /* NOT NEEDED AS METHODS IS NOT A BIT-FLAG */
1.22      luotonen  215: /*     Is method in a list of method names?
                    216: **     -----------------------------------
                    217: */
                    218: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod,    method,
                    219:                                  HTList *,     list)
                    220: {
                    221:     char * method_name = HTMethod_name(method);
                    222:     HTList *cur = list;
                    223:     char *item;
                    224: 
                    225:     while (NULL != (item = (char*)HTList_nextObject(cur))) {
1.67      frystyk   226:        if (PROT_TRACE)
                    227:            fprintf(TDEST, " %s", item);
1.22      luotonen  228:        if (0==strcasecomp(item, method_name))
                    229:            return YES;
                    230:     }
                    231:     return NO; /* Not found */
                    232: }
1.70      frystyk   233: #endif
1.22      luotonen  234: 
1.59      frystyk   235: /* --------------------------------------------------------------------------*/
                    236: /*                   Management of the HTProtocol structure                 */
                    237: /* --------------------------------------------------------------------------*/
1.22      luotonen  238: 
1.63      frystyk   239: /*
                    240: **     Register a Protocol as an active access method
1.1       timbl     241: */
1.56      frystyk   242: PUBLIC BOOL HTRegisterProtocol ARGS1(HTProtocol *, protocol)
1.1       timbl     243: {
                    244:     if (!protocols) protocols = HTList_new();
1.59      frystyk   245:     HTList_addObject(protocols, (void *) protocol);
1.1       timbl     246:     return YES;
                    247: }
                    248: 
1.63      frystyk   249: 
                    250: /*
                    251: **     Delete the list of registered access methods. This is called from
                    252: **     within HTLibTerminate. Written by Eric Sink, eric@spyglass.com
                    253: */
                    254: PUBLIC void HTDisposeProtocols NOARGS
                    255: {
                    256:     if (protocols) {
                    257:        HTList_delete(protocols);
                    258:        protocols = NULL;
                    259:     }
                    260: }
                    261: 
                    262: 
                    263: /*
1.65      frystyk   264: **     Is a protocol registered as BLOCKING? The default behavior registered
                    265: **     when the protocol module was registered can be overridden by the
                    266: **     BlockingIO field in the HTRequest structure
1.63      frystyk   267: */
1.59      frystyk   268: PUBLIC BOOL HTProtocolBlocking ARGS1(HTRequest *, me)
                    269: {
1.65      frystyk   270:     if (me) {
                    271:        return (me->BlockingIO || (me->anchor && me->anchor->protocol &&
                    272:                ((HTProtocol *) (me->anchor->protocol))->block == SOC_BLOCK));
                    273:     }
                    274:     return NO;
1.59      frystyk   275: }
                    276: 
1.61      frystyk   277: /* --------------------------------------------------------------------------*/
                    278: /*                Initialization and Termination of the Library             */
                    279: /* --------------------------------------------------------------------------*/
                    280: 
                    281: /*                                                                  HTLibInit
                    282: **
                    283: **     This function initiates the Library and it MUST be called when
                    284: **     starting up an application. See also HTLibTerminate()
                    285: */
                    286: PUBLIC BOOL HTLibInit NOARGS
                    287: {
1.67      frystyk   288: #ifdef NO_STDIO                                                  /* Open trace file */
                    289:     if ((TDEST = fopen(TRACE_FILE, "a")) != NULL) {
                    290:        if (setvbuf(TDEST, NULL, _IOLBF, 0) < 0) {  /* Change to line buffer */
1.70      frystyk   291:            printf("WWWLibInit.. Can't initialize TRACE buffer - no TRACE\n");
1.67      frystyk   292:            fclose(TDEST);
                    293:            TDEST = NULL;
                    294:            WWW_TraceFlag = 0;
                    295:        }
                    296:     } else
                    297:        WWW_TraceFlag = 0;
                    298: #endif
                    299: 
1.61      frystyk   300:     if (TRACE)
1.67      frystyk   301:        fprintf(TDEST, "WWWLibInit.. INITIALIZING LIBRARY OF COMMON CODE\n");
1.63      frystyk   302: 
1.73    ! frystyk   303:     /* Put up a global conversion list, but leave initialization
        !           304:        to the application */
        !           305:     HTBind_init();
        !           306:     if (!HTConversions)
        !           307:        HTConversions = HTList_new();
        !           308: 
        !           309:     /* Initialize the bindings between (access method, protocol module),
        !           310:        (file extension, media type)? */
1.70      frystyk   311: #ifndef HT_NO_INIT
                    312:     HTAccessInit();             /* Bind access schemes and protocol modules */
                    313:     HTFileInit();                   /* Bind file extensions and media types */
1.63      frystyk   314: #endif
1.61      frystyk   315: 
1.62      frystyk   316: #ifdef WWWLIB_SIG
1.61      frystyk   317:     /* On Solaris (and others?) we get a BROKEN PIPE signal when connecting
1.67      frystyk   318:     ** to a port where we should get `connection refused'. We ignore this 
1.61      frystyk   319:     ** using the following function call
                    320:     */
                    321:     HTSetSignal();                                /* Set signals in library */
1.1       timbl     322: #endif
                    323: 
1.67      frystyk   324: #ifdef _WINDOWS
                    325:     /*
                    326:     ** Initialise WinSock DLL. This must also be shut down! PMH
                    327:     */
                    328:     {
                    329:         WSADATA            wsadata;
                    330:        if (WSAStartup(DESIRED_WINSOCK_VERSION, &wsadata)) {
                    331:            if (TRACE)
                    332:                fprintf(TDEST, "WWWLibInit.. Can't initialize WinSoc\n");
                    333:             WSACleanup();
                    334:             return NO;
                    335:         }
                    336:         if (wsadata.wVersion < MINIMUM_WINSOCK_VERSION) {
                    337:             if (TRACE)
                    338:                fprintf(TDEST, "WWWLibInit.. Bad version of WinSoc\n");
                    339:             WSACleanup();
                    340:             return NO;
                    341:         }
                    342:     }
                    343: #endif /* _WINDOWS */
                    344: 
1.71      frystyk   345: #ifndef NO_TIMEGM
                    346:     HTGetTimeZoneOffset();        /* Find offset from GMT if using mktime() */
                    347: #endif
1.70      frystyk   348:     HTTmp_setRoot(NULL);                    /* Set up default tmp directory */
1.61      frystyk   349:     HTThreadInit();                                /* Initialize bit arrays */
                    350:     return YES;
                    351: }
                    352: 
                    353: 
                    354: /*                                                              HTLibTerminate
                    355: **
                    356: **     This function frees memory kept by the Library and should be called
1.63      frystyk   357: **     before exit of an application (if you are on a PC platform)
1.61      frystyk   358: */
                    359: PUBLIC BOOL HTLibTerminate NOARGS
                    360: {
                    361:     if (TRACE)
1.67      frystyk   362:        fprintf(TDEST, "WWWLibTerm.. Cleaning up LIBRARY OF COMMON CODE\n");
1.63      frystyk   363:     HTAtom_deleteAll();
                    364:     HTDisposeConversions();
                    365:     HTTCPCacheRemoveAll();
1.73    ! frystyk   366: 
        !           367: #ifndef HT_NO_INIT
        !           368:     HTDisposeProtocols();    /* Remove bindings between access and protocols */
        !           369:     HTBind_deleteAll();            /* Remove bindings between suffixes, media types */
        !           370: #endif
        !           371: 
1.63      frystyk   372:     HTFreeHostName();
                    373:     HTFreeMailAddress();
1.70      frystyk   374:     HTCache_freeRoot();
                    375:     HTTmp_freeRoot();
1.67      frystyk   376: 
                    377: #ifdef _WINDOWS
                    378:     WSACleanup();
                    379: #endif
                    380: 
                    381: #ifdef NO_STDIO                                                 /* Close trace file */
                    382:     if (TDEST) {
                    383:        fclose(TDEST);
                    384:        TDEST = NULL;
                    385:        WWW_TraceFlag = 0;
                    386:     }
                    387: #endif
1.61      frystyk   388:     return YES;
                    389: }
                    390: 
1.59      frystyk   391: /* --------------------------------------------------------------------------*/
                    392: /*                     Physical Anchor Address Manager                      */
                    393: /* --------------------------------------------------------------------------*/
1.33      luotonen  394: 
                    395: /*                                                     override_proxy()
                    396: **
                    397: **     Check the no_proxy environment variable to get the list
                    398: **     of hosts for which proxy server is not consulted.
                    399: **
                    400: **     no_proxy is a comma- or space-separated list of machine
                    401: **     or domain names, with optional :port part.  If no :port
                    402: **     part is present, it applies to all ports on that domain.
                    403: **
                    404: **     Example:
                    405: **             no_proxy="cern.ch,some.domain:8001"
                    406: **
                    407: */
                    408: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
                    409: {
                    410:     CONST char * no_proxy = getenv("no_proxy");
                    411:     char * p = NULL;
                    412:     char * host = NULL;
                    413:     int port = 0;
                    414:     int h_len = 0;
                    415: 
                    416:     if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
                    417:        return NO;
                    418:     if (!*host) { free(host); return NO; }
                    419: 
1.34      frystyk   420:     if ((p = strchr(host, ':')) != NULL) {     /* Port specified */
1.33      luotonen  421:        *p++ = 0;                       /* Chop off port */
                    422:        port = atoi(p);
                    423:     }
                    424:     else {                             /* Use default port */
                    425:        char * access = HTParse(addr, "", PARSE_ACCESS);
                    426:        if (access) {
                    427:            if      (!strcmp(access,"http"))    port = 80;
                    428:            else if (!strcmp(access,"gopher"))  port = 70;
                    429:            else if (!strcmp(access,"ftp"))     port = 21;
                    430:            free(access);
                    431:        }
                    432:     }
                    433:     if (!port) port = 80;              /* Default */
                    434:     h_len = strlen(host);
                    435: 
                    436:     while (*no_proxy) {
                    437:        CONST char * end;
                    438:        CONST char * colon = NULL;
                    439:        int templ_port = 0;
                    440:        int t_len;
                    441: 
                    442:        while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
                    443:            no_proxy++;                 /* Skip whitespace and separators */
                    444: 
                    445:        end = no_proxy;
                    446:        while (*end && !WHITE(*end) && *end != ',') {   /* Find separator */
                    447:            if (*end==':') colon = end;                 /* Port number given */
                    448:            end++;
                    449:        }
                    450: 
                    451:        if (colon) {
                    452:            templ_port = atoi(colon+1);
                    453:            t_len = colon - no_proxy;
                    454:        }
                    455:        else {
                    456:            t_len = end - no_proxy;
                    457:        }
                    458: 
                    459:        if ((!templ_port || templ_port == port)  &&
                    460:            (t_len > 0  &&  t_len <= h_len  &&
                    461:             !strncmp(host + h_len - t_len, no_proxy, t_len))) {
                    462:            free(host);
                    463:            return YES;
                    464:        }
                    465:        if (*end) no_proxy = end+1;
                    466:        else break;
                    467:     }
                    468: 
                    469:     free(host);
                    470:     return NO;
                    471: }
                    472: 
                    473: 
                    474: 
1.2       timbl     475: /*             Find physical name and access protocol
                    476: **             --------------------------------------
1.1       timbl     477: **
                    478: **
                    479: ** On entry,
                    480: **     addr            must point to the fully qualified hypertext reference.
                    481: **     anchor          a pareent anchor with whose address is addr
                    482: **
1.59      frystyk   483: ** On exit,    
                    484: **     returns         HT_NO_ACCESS            no protocol module found
                    485: **                     HT_FORBIDDEN            Error has occured.
1.2       timbl     486: **                     HT_OK                   Success
1.1       timbl     487: **
                    488: */
1.21      luotonen  489: PRIVATE int get_physical ARGS1(HTRequest *, req)
                    490: {    
1.1       timbl     491:     char * access=0;   /* Name of access method */
1.21      luotonen  492:     char * addr = HTAnchor_address((HTAnchor*)req->anchor);    /* free me */
1.27      luotonen  493: 
1.70      frystyk   494: #ifndef HT_NO_RULES
1.47      luotonen  495:     if (HTImServer) {  /* cern_httpd has already done its own translations */
1.45      luotonen  496:        HTAnchor_setPhysical(req->anchor, HTImServer);
1.47      luotonen  497:        StrAllocCopy(addr, HTImServer); /* Oops, queries thru many proxies */
                    498:                                        /* didn't work without this -- AL  */
                    499:     }
1.21      luotonen  500:     else {
1.27      luotonen  501:        char * physical = HTTranslate(addr);
1.21      luotonen  502:        if (!physical) {
1.47      luotonen  503:            free(addr);
1.21      luotonen  504:            return HT_FORBIDDEN;
                    505:        }
                    506:        HTAnchor_setPhysical(req->anchor, physical);
                    507:        free(physical);                 /* free our copy */
1.2       timbl     508:     }
                    509: #else
1.21      luotonen  510:     HTAnchor_setPhysical(req->anchor, addr);
1.70      frystyk   511: #endif /* HT_NO_RULES */
1.2       timbl     512: 
1.21      luotonen  513:     access =  HTParse(HTAnchor_physical(req->anchor),
1.27      luotonen  514:                      "file:", PARSE_ACCESS);
1.1       timbl     515: 
                    516: /*     Check whether gateway access has been set up for this
1.8       timbl     517: **     This function can be replaced by the rule system above.
1.1       timbl     518: */
1.70      frystyk   519: #ifndef HT_NO_PROXY
1.39      luotonen  520: 
                    521:     /* make sure the using_proxy variable is false */
1.70      frystyk   522:     req->using_proxy = NO;
1.39      luotonen  523: 
1.33      luotonen  524:     if (!override_proxy(addr)) {
1.27      luotonen  525:        char * gateway_parameter, *gateway, *proxy;
                    526: 
1.2       timbl     527:        gateway_parameter = (char *)malloc(strlen(access)+20);
                    528:        if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27      luotonen  529: 
                    530:        /* search for proxy gateways */
1.2       timbl     531:        strcpy(gateway_parameter, "WWW_");
                    532:        strcat(gateway_parameter, access);
                    533:        strcat(gateway_parameter, "_GATEWAY");
                    534:        gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27      luotonen  535: 
                    536:        /* search for proxy servers */
                    537:        strcpy(gateway_parameter, access);
                    538:        strcat(gateway_parameter, "_proxy");
                    539:        proxy = (char *)getenv(gateway_parameter);
                    540: 
1.2       timbl     541:        free(gateway_parameter);
1.27      luotonen  542: 
1.68      frystyk   543: #ifndef HT_DIRECT_WAIS
1.9       timbl     544:        if (!gateway && 0==strcmp(access, "wais")) {
1.69      frystyk   545:            gateway = HT_DEFAULT_WAIS_GATEWAY;
1.8       timbl     546:        }
                    547: #endif
1.27      luotonen  548: 
1.70      frystyk   549:        if (TRACE && gateway)
                    550:            fprintf(TDEST,"Gateway..... Found: `%s\'\n", gateway);
                    551:        if (TRACE && proxy)
                    552:            fprintf(TDEST,"Proxy....... Found: `%s\'\n", proxy);
                    553: 
1.27      luotonen  554:        /* proxy servers have precedence over gateway servers */
1.60      frystyk   555:        if (proxy && *proxy) {
1.27      luotonen  556:            char * gatewayed=0;
                    557: 
                    558:             StrAllocCopy(gatewayed,proxy);
                    559:            StrAllocCat(gatewayed,addr);
1.70      frystyk   560:            req->using_proxy = YES;
1.27      luotonen  561:            HTAnchor_setPhysical(req->anchor, gatewayed);
                    562:            free(gatewayed);
                    563:            free(access);
                    564: 
                    565:            access =  HTParse(HTAnchor_physical(req->anchor),
                    566:                              "http:", PARSE_ACCESS);
1.60      frystyk   567:        } else if (gateway && *gateway) {
1.9       timbl     568:            char * path = HTParse(addr, "",
                    569:                PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
                    570:                /* Chop leading / off to make host into part of path */
                    571:            char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
                    572:            free(path);
1.21      luotonen  573:             HTAnchor_setPhysical(req->anchor, gatewayed);
1.9       timbl     574:            free(gatewayed);
1.2       timbl     575:            free(access);
1.9       timbl     576:            
1.21      luotonen  577:            access =  HTParse(HTAnchor_physical(req->anchor),
1.8       timbl     578:                "http:", PARSE_ACCESS);
1.2       timbl     579:        }
                    580:     }
1.70      frystyk   581: #endif /* HT_NO_PROXY */
1.1       timbl     582: 
1.19      timbl     583:     free(addr);
1.1       timbl     584: 
1.61      frystyk   585:     /* Search registered protocols to find suitable one */
1.1       timbl     586:     {
1.61      frystyk   587:        HTList *cur = protocols;
1.20      luotonen  588:        HTProtocol *p;
1.61      frystyk   589:        if (!cur) {
                    590:            if (TRACE)
1.67      frystyk   591:                fprintf(TDEST, "HTAccess.... NO PROTOCOL MODULES INITIATED\n");
1.61      frystyk   592:        } else {
                    593:            while ((p = (HTProtocol*)HTList_nextObject(cur))) {
                    594:                if (strcmp(p->name, access)==0) {
                    595:                    HTAnchor_setProtocol(req->anchor, p);
                    596:                    free(access);
                    597:                    return (HT_OK);
                    598:                }
1.1       timbl     599:            }
                    600:        }
                    601:     }
                    602:     free(access);
1.2       timbl     603:     return HT_NO_ACCESS;
1.1       timbl     604: }
                    605: 
1.59      frystyk   606: /* --------------------------------------------------------------------------*/
                    607: /*                             Document Loader                              */
                    608: /* --------------------------------------------------------------------------*/
1.1       timbl     609: 
                    610: /*             Load a document
                    611: **             ---------------
                    612: **
1.2       timbl     613: **     This is an internal routine, which has an address AND a matching
                    614: **     anchor.  (The public routines are called with one OR the other.)
                    615: **
                    616: ** On entry,
1.15      timbl     617: **     request->
1.35      luotonen  618: **         anchor              a parent anchor with fully qualified
                    619: **                             hypertext reference as its address set
1.15      timbl     620: **         output_format       valid
                    621: **         output_stream       valid on NULL
1.2       timbl     622: **
                    623: ** On exit,
1.59      frystyk   624: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    625: **                     HT_ERROR        Error has occured
1.2       timbl     626: **                     HT_LOADED       Success
                    627: **                     HT_NO_DATA      Success, but no document loaded.
1.8       timbl     628: **                                     (telnet sesssion started etc)
1.72      frystyk   629: **                     HT_RETRY        if service isn't available before
                    630: **                                     request->retry_after
1.2       timbl     631: */
1.52      frystyk   632: PUBLIC int HTLoad ARGS2(HTRequest *, request, BOOL, keep_error_stack)
1.2       timbl     633: {
1.25      frystyk   634:     char       *arg = NULL;
                    635:     HTProtocol *p;
                    636:     int        status;
                    637: 
1.22      luotonen  638:     if (request->method == METHOD_INVALID)
                    639:        request->method = METHOD_GET;
1.52      frystyk   640:     if (!keep_error_stack) {
                    641:        HTErrorFree(request);
                    642:        request->error_block = NO;
                    643:     }
                    644: 
1.59      frystyk   645:     if ((status = get_physical(request)) < 0) {
                    646:        if (status == HT_FORBIDDEN) {
                    647:            char *url = HTAnchor_address((HTAnchor *) request->anchor);
                    648:            if (url) {
                    649:                HTUnEscape(url);
                    650:                HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    651:                           (void *) url, (int) strlen(url), "HTLoad");
                    652:                free(url);
                    653:            } else {
                    654:                HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    655:                           NULL, 0, "HTLoad");
                    656:            }
                    657:        } 
                    658:        return HT_ERROR;                       /* Can't resolve or forbidden */
1.2       timbl     659:     }
1.25      frystyk   660: 
                    661:     if(!(arg = HTAnchor_physical(request->anchor)) || !*arg) 
1.59      frystyk   662:        return HT_ERROR;
1.27      luotonen  663: 
1.56      frystyk   664:     p = (HTProtocol *) HTAnchor_protocol(request->anchor);
1.17      timbl     665:     return (*(p->load))(request);
1.2       timbl     666: }
                    667: 
                    668: 
1.61      frystyk   669: /*             Terminate a LOAD
                    670: **             ----------------
                    671: **
                    672: **     This function looks at the status code from the HTLoadDocument
                    673: **     function and updates logfiles, creates error messages etc.
                    674: **
                    675: **    On Entry,
                    676: **     Status code from load function
                    677: */
                    678: PUBLIC BOOL HTLoadTerminate ARGS2(HTRequest *, request, int, status)
                    679: {
                    680:     char * uri = HTAnchor_address((HTAnchor*)request->anchor);
                    681: 
1.70      frystyk   682:     HTLog_request(request);
1.61      frystyk   683: 
                    684:     /* The error stack might contain general information to the client
                    685:        about what has been going on in the library (not only errors) */
                    686:     if (!HTImProxy && request->error_stack)
                    687:        HTErrorMsg(request);
                    688: 
                    689:     switch (status) {
                    690:       case HT_LOADED:
                    691:        if (PROT_TRACE) {
1.72      frystyk   692:            fprintf(TDEST, "HTAccess.... OK: `%s\' has been accessed.\n", uri);
1.61      frystyk   693:        }
                    694:        break;
                    695: 
                    696:       case HT_NO_DATA:
                    697:        if (PROT_TRACE) {
1.72      frystyk   698:            fprintf(TDEST, "HTAccess.... OK BUT NO DATA: `%s\'\n", uri);
1.61      frystyk   699:        }
                    700:        break;
                    701: 
                    702:       case HT_WOULD_BLOCK:
                    703:        if (PROT_TRACE) {
1.72      frystyk   704:            fprintf(TDEST, "HTAccess.... WOULD BLOCK: `%s\'\n", uri);
                    705:        }
                    706:        break;
                    707: 
                    708:       case HT_RETRY:
                    709:        if (PROT_TRACE) {
                    710:            fprintf(TDEST, "HTAccess.... NOT AVAILABLE, RETRY AT `%s\'\n",uri);
1.61      frystyk   711:        }
                    712:        break;
                    713: 
                    714:       case HT_ERROR:
                    715:        if (HTImProxy)
                    716:            HTErrorMsg(request);                     /* Only on a real error */
                    717:        if (PROT_TRACE) {
1.72      frystyk   718:            fprintf(TDEST, "HTAccess.... ERROR: Can't access `%s\'\n", uri);
1.61      frystyk   719:        }
                    720:        break;
                    721: 
                    722:       default:
                    723:        if (PROT_TRACE) {
1.67      frystyk   724:            fprintf(TDEST, "HTAccess.... **** Internal software error in CERN WWWLib version %s ****\n", HTLibraryVersion);
                    725:            fprintf(TDEST, "............ Please mail libwww@info.cern.ch quoting what software\n");
                    726:            fprintf(TDEST, "............ and version you are using including the URL:\n");
                    727:            fprintf(TDEST, "............ `%s\'\n", uri);
                    728:            fprintf(TDEST, "............ that caused the problem, thanks!\n");
1.61      frystyk   729:        }
                    730:        break;
                    731:     }
                    732:     free(uri);
                    733:     return YES;
                    734: }
                    735: 
                    736: 
1.2       timbl     737: /*             Load a document - with logging etc
                    738: **             ----------------------------------
                    739: **
                    740: **     - Checks or documents already loaded
                    741: **     - Logs the access
                    742: **     - Trace ouput and error messages
                    743: **
1.1       timbl     744: **    On Entry,
1.19      timbl     745: **        request->anchor      valid for of the document to be accessed.
                    746: **      request->childAnchor   optional anchor within doc to be selected
                    747: **
1.15      timbl     748: **       request->anchor   is the node_anchor for the document
                    749: **       request->output_format is valid
                    750: **
1.59      frystyk   751: ** On exit,
                    752: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    753: **                     HT_ERROR        Error has occured
                    754: **                     HT_LOADED       Success
                    755: **                     HT_NO_DATA      Success, but no document loaded.
                    756: **                                     (telnet sesssion started etc)
1.72      frystyk   757: **                     HT_RETRY        if service isn't available before
                    758: **                                     request->retry_after
1.1       timbl     759: */
1.59      frystyk   760: PRIVATE int HTLoadDocument ARGS2(HTRequest *,  request,
                    761:                                 BOOL,          keep_error_stack)
1.1       timbl     762: 
                    763: {
                    764:     int                status;
                    765:     HText *    text;
1.19      timbl     766:     char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
1.54      frystyk   767: 
1.67      frystyk   768:     if (PROT_TRACE) fprintf (TDEST, "HTAccess.... Loading document %s\n",
1.59      frystyk   769:                             full_address);
1.1       timbl     770: 
1.18      timbl     771:     request->using_cache = NULL;
                    772:     
1.15      timbl     773:     if (!request->output_format) request->output_format = WWW_PRESENT;
1.25      frystyk   774: 
1.67      frystyk   775:     /* Check if document is already loaded or in cache */
1.70      frystyk   776:     if (!request->ForceReload) {
1.67      frystyk   777:        if ((text=(HText *)HTAnchor_document(request->anchor))) {
                    778:            if (PROT_TRACE)
                    779:                fprintf(TDEST, "HTAccess.... Document already in memory.\n");
                    780:            if (request->childAnchor) {
                    781:                HText_selectAnchor(text, request->childAnchor);
                    782:            } else {
                    783:                HText_select(text);     
                    784:            }
                    785:            free(full_address);
                    786:            return HT_LOADED;
1.19      timbl     787:        }
1.67      frystyk   788:        
                    789:        /* Check the Cache */
                    790:        /* Bug: for each format, we only check whether it is ok, we
                    791:           don't check them all and chose the best */
                    792:        if (request->anchor->cacheItems) {
                    793:            HTList * list = request->anchor->cacheItems;
                    794:            HTList * cur = list;
                    795:            HTCacheItem * item;
                    796:            while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
                    797:                HTStream * s;
                    798:                request->using_cache = item;
                    799:                s = HTStreamStack(item->format, request->output_format,
                    800:                                  request->output_stream, request, NO);
                    801:                if (s) {        /* format was suitable */
                    802:                    FILE * fp = fopen(item->filename, "r");
                    803:                    if (PROT_TRACE) 
1.70      frystyk   804:                        fprintf(TDEST, "Cache....... HIT file %s for %s\n",
1.67      frystyk   805:                                item->filename, 
                    806:                                full_address);
                    807:                    if (fp) {
                    808:                        HTFileCopy(fp, s);
                    809:                        (*s->isa->_free)(s); /* close up pipeline */
                    810:                        fclose(fp);
                    811:                        free(full_address);
                    812:                        return HT_LOADED;
                    813:                    } else {
                    814:                        fprintf(TDEST, "***** Can't read cache file %s !\n",
                    815:                                item->filename);
                    816:                    } /* file open ok */
                    817:                } /* stream ok */
                    818:            } /* next cache item */
                    819:        } /* if cache available for this anchor */
1.70      frystyk   820:     } else {                     /* Make sure that we don't use old headers */
                    821:        HTAnchor_clearHeader(request->anchor);
                    822:        request->HeaderMask += HT_PRAGMA;       /* Force reload through proxy */
1.1       timbl     823:     }
1.61      frystyk   824:     if ((status = HTLoad(request, keep_error_stack)) != HT_WOULD_BLOCK)
                    825:        HTLoadTerminate(request, status);
1.19      timbl     826:     free(full_address);
1.59      frystyk   827:     return status;
1.58      frystyk   828: }
1.1       timbl     829: 
                    830: 
                    831: /*             Load a document from absolute name
                    832: **             ---------------
                    833: **
1.59      frystyk   834: ** On Entry,
1.1       timbl     835: **        addr     The absolute address of the document to be accessed.
                    836: **
1.59      frystyk   837: ** On exit,
                    838: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    839: **                     HT_ERROR        Error has occured
                    840: **                     HT_LOADED       Success
                    841: **                     HT_NO_DATA      Success, but no document loaded.
                    842: **                                     (telnet sesssion started etc)
1.72      frystyk   843: **                     HT_RETRY        if service isn't available before
                    844: **                                     request->retry_after
1.1       timbl     845: */
1.59      frystyk   846: PUBLIC int HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2       timbl     847: {
1.19      timbl     848:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    849:    request->anchor = HTAnchor_parent(anchor);
                    850:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
                    851:                        NULL : (HTChildAnchor*) anchor;
1.52      frystyk   852:    return HTLoadDocument(request, NO);
1.2       timbl     853: }
                    854: 
                    855: 
                    856: /*             Load a document from absolute name to stream
                    857: **             --------------------------------------------
                    858: **
1.59      frystyk   859: ** On Entry,
1.2       timbl     860: **        addr     The absolute address of the document to be accessed.
1.15      timbl     861: **        request->output_stream     if non-NULL, send data down this stream
1.2       timbl     862: **
1.59      frystyk   863: ** On exit,
                    864: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    865: **                     HT_ERROR        Error has occured
                    866: **                     HT_LOADED       Success
                    867: **                     HT_NO_DATA      Success, but no document loaded.
                    868: **                                     (telnet sesssion started etc)
1.72      frystyk   869: **                     HT_RETRY        if service isn't available before
                    870: **                                     request->retry_after
1.2       timbl     871: */
1.59      frystyk   872: PUBLIC int HTLoadToStream ARGS3(CONST char *,  addr,
                    873:                                BOOL,           filter,
                    874:                                HTRequest*,     request)
1.1       timbl     875: {
1.63      frystyk   876:     HTAnchor * anchor = HTAnchor_findAddress(addr);
                    877:     request->anchor = HTAnchor_parent(anchor);
                    878:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
1.19      timbl     879:        (HTChildAnchor*) anchor;
1.15      timbl     880:     request->output_stream = request->output_stream;
1.52      frystyk   881:     return HTLoadDocument(request, NO);
1.1       timbl     882: }
                    883: 
                    884: 
                    885: /*             Load a document from relative name
                    886: **             ---------------
                    887: **
1.59      frystyk   888: ** On Entry,
1.2       timbl     889: **        relative_name     The relative address of the document
                    890: **                         to be accessed.
1.1       timbl     891: **
1.59      frystyk   892: ** On exit,
                    893: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    894: **                     HT_ERROR        Error has occured
                    895: **                     HT_LOADED       Success
                    896: **                     HT_NO_DATA      Success, but no document loaded.
                    897: **                                     (telnet sesssion started etc)
1.72      frystyk   898: **                     HT_RETRY        if service isn't available before
                    899: **                                     request->retry_after
1.1       timbl     900: */
1.59      frystyk   901: PUBLIC int HTLoadRelative ARGS3(CONST char *,          relative_name,
                    902:                                HTParentAnchor *,       here,
                    903:                                HTRequest *,            request)
1.1       timbl     904: {
                    905:     char *             full_address = 0;
1.65      frystyk   906:     int                result;
1.1       timbl     907:     char *             mycopy = 0;
                    908:     char *             stripped = 0;
                    909:     char *             current_address =
1.2       timbl     910:                                HTAnchor_address((HTAnchor*)here);
1.1       timbl     911: 
                    912:     StrAllocCopy(mycopy, relative_name);
                    913: 
                    914:     stripped = HTStrip(mycopy);
                    915:     full_address = HTParse(stripped,
                    916:                   current_address,
                    917:                   PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15      timbl     918:     result = HTLoadAbsolute(full_address, request);
1.1       timbl     919:     free(full_address);
                    920:     free(current_address);
                    921:     free(mycopy);  /* Memory leak fixed 10/7/92 -- JFG */
                    922:     return result;
                    923: }
                    924: 
                    925: 
                    926: /*             Load if necessary, and select an anchor
                    927: **             --------------------------------------
                    928: **
1.59      frystyk   929: ** On Entry,
1.1       timbl     930: **        destination              The child or parenet anchor to be loaded.
                    931: **
1.59      frystyk   932: ** On exit,
                    933: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    934: **                     HT_ERROR        Error has occured
                    935: **                     HT_LOADED       Success
                    936: **                     HT_NO_DATA      Success, but no document loaded.
                    937: **                                     (telnet sesssion started etc)
1.72      frystyk   938: **                     HT_RETRY        if service isn't available before
                    939: **                                     request->retry_after
1.1       timbl     940: */
1.59      frystyk   941: PUBLIC int HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1       timbl     942: {
1.70      frystyk   943:     if (!anchor || !request)
                    944:        return HT_ERROR;
                    945:     request->anchor = HTAnchor_parent(anchor);
1.59      frystyk   946:     request->childAnchor = ((HTAnchor *) request->anchor == anchor) ?
                    947:        NULL : (HTChildAnchor*) anchor;
                    948:     return HTLoadDocument(request, NO);
                    949: }
1.52      frystyk   950: 
                    951: 
                    952: /*             Load if necessary, and select an anchor
                    953: **             --------------------------------------
                    954: **
                    955: **     This function is almost identical to HTLoadAnchor, but it doesn't
                    956: **     clear the error stack so that the information in there is kept.
                    957: **
1.59      frystyk   958: ** On Entry,
1.52      frystyk   959: **        destination              The child or parenet anchor to be loaded.
                    960: **
1.59      frystyk   961: ** On exit,
                    962: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    963: **                     HT_ERROR        Error has occured
                    964: **                     HT_LOADED       Success
                    965: **                     HT_NO_DATA      Success, but no document loaded.
                    966: **                                     (telnet sesssion started etc)
1.72      frystyk   967: **                     HT_RETRY        if service isn't available before
                    968: **                                     request->retry_after
1.52      frystyk   969: */
1.59      frystyk   970: PUBLIC int HTLoadAnchorRecursive ARGS2(HTAnchor*,      anchor,
                    971:                                       HTRequest *,     request)
1.52      frystyk   972: {
1.59      frystyk   973:     if (!anchor) return HT_ERROR;                                /* No link */
1.52      frystyk   974:     
                    975:     request->anchor  = HTAnchor_parent(anchor);
1.59      frystyk   976:     request->childAnchor = ((HTAnchor *) request->anchor == anchor) ?
                    977:        NULL : (HTChildAnchor*) anchor;
1.52      frystyk   978:     
1.59      frystyk   979:     return HTLoadDocument(request, YES);
                    980: }
1.1       timbl     981: 
                    982: 
                    983: /*             Search
                    984: **             ------
                    985: **  Performs a keyword search on word given by the user. Adds the keyword to 
                    986: **  the end of the current address and attempts to open the new address.
                    987: **
                    988: **  On Entry,
                    989: **       *keywords     space-separated keyword list or similar search list
1.2       timbl     990: **     here            is anchor search is to be done on.
1.59      frystyk   991: **
                    992: ** On exit,
                    993: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    994: **                     HT_ERROR        Error has occured
                    995: **                     HT_LOADED       Success
                    996: **                     HT_NO_DATA      Success, but no document loaded.
                    997: **                                     (telnet sesssion started etc)
1.72      frystyk   998: **                     HT_RETRY        if service isn't available before
                    999: **                                     request->retry_after
1.1       timbl    1000: */
1.56      frystyk  1001: PRIVATE char hex ARGS1(int, i)
1.2       timbl    1002: {
1.13      timbl    1003:     char * hexchars = "0123456789ABCDEF";
                   1004:     return hexchars[i];
1.2       timbl    1005: }
1.1       timbl    1006: 
1.59      frystyk  1007: PUBLIC int HTSearch ARGS3(CONST char *,                keywords,
                   1008:                          HTParentAnchor *,     here,
                   1009:                          HTRequest *,          request)
1.1       timbl    1010: {
1.2       timbl    1011: 
                   1012: #define acceptable \
                   1013: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
                   1014: 
                   1015:     char *q, *u;
                   1016:     CONST char * p, *s, *e;            /* Pointers into keywords */
                   1017:     char * address = HTAnchor_address((HTAnchor*)here);
1.65      frystyk  1018:     int result;
1.56      frystyk  1019:     char * escaped = (char *) malloc(strlen(keywords)*3+1);
1.2       timbl    1020: 
1.29      frystyk  1021:     /* static CONST BOOL isAcceptable[96] = */
                   1022:     /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30      luotonen 1023:     static BOOL isAcceptable[96] =
1.2       timbl    1024:     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
                   1025:     {    0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,      /* 2x   !"#$%&'()*+,-./  */
                   1026:          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,      /* 3x  0123456789:;<=>?  */
                   1027:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 4x  @ABCDEFGHIJKLMNO  */
                   1028:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /* 5X  PQRSTUVWXYZ[\]^_  */
                   1029:         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 6x  `abcdefghijklmno  */
                   1030:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };     /* 7X  pqrstuvwxyz{\}~  DEL */
                   1031: 
                   1032:     if (escaped == NULL) outofmem(__FILE__, "HTSearch");
                   1033:     
1.29      frystyk  1034: /* Convert spaces to + and hex escape unacceptable characters */
1.2       timbl    1035: 
1.29      frystyk  1036:     for(s=keywords; *s && WHITE(*s); s++); /*scan */    /* Skip white space */
                   1037:     for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--);     /* Skip trailers */
                   1038:     for(q=escaped, p=s; p<e; p++) {                  /* scan stripped field */
1.2       timbl    1039:         int c = (int)TOASCII(*p);
                   1040:         if (WHITE(*p)) {
                   1041:            *q++ = '+';
1.29      frystyk  1042:        } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13      timbl    1043:            *q++ = *p;                  /* 930706 TBL for MVS bug */
1.2       timbl    1044:        } else {
                   1045:            *q++ = '%';
                   1046:            *q++ = hex(c / 16);
                   1047:            *q++ = hex(c % 16);
                   1048:        }
                   1049:     } /* Loop over string */
1.1       timbl    1050:     
1.2       timbl    1051:     *q=0;
                   1052:                                /* terminate escaped sctring */
                   1053:     u=strchr(address, '?');            /* Find old search string */
                   1054:     if (u) *u = 0;                             /* Chop old search off */
1.1       timbl    1055: 
                   1056:     StrAllocCat(address, "?");
1.2       timbl    1057:     StrAllocCat(address, escaped);
                   1058:     free(escaped);
1.15      timbl    1059:     result = HTLoadRelative(address, here, request);
1.1       timbl    1060:     free(address);
1.2       timbl    1061:     
1.1       timbl    1062:     return result;
1.2       timbl    1063: }
                   1064: 
                   1065: 
                   1066: /*             Search Given Indexname
                   1067: **             ------
                   1068: **  Performs a keyword search on word given by the user. Adds the keyword to 
                   1069: **  the end of the current address and attempts to open the new address.
                   1070: **
1.59      frystyk  1071: ** On Entry,
1.2       timbl    1072: **       *keywords     space-separated keyword list or similar search list
                   1073: **     *addres         is name of object search is to be done on.
1.59      frystyk  1074: ** On exit,
                   1075: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1076: **                     HT_ERROR        Error has occured
                   1077: **                     HT_LOADED       Success
                   1078: **                     HT_NO_DATA      Success, but no document loaded.
                   1079: **                                     (telnet sesssion started etc)
1.72      frystyk  1080: **                     HT_RETRY        if service isn't available before
                   1081: **                                     request->retry_after
1.2       timbl    1082: */
1.59      frystyk  1083: PUBLIC int HTSearchAbsolute ARGS3(CONST char *,        keywords,
                   1084:                                  CONST char *,         indexname,
                   1085:                                  HTRequest *,          request)
1.2       timbl    1086: {
                   1087:     HTParentAnchor * anchor =
                   1088:        (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15      timbl    1089:     return HTSearch(keywords, anchor, request);
1.57      howcome  1090: }
                   1091: 
1.70      frystyk  1092: /* --------------------------------------------------------------------------*/
                   1093: /*                             Document Poster                              */
                   1094: /* --------------------------------------------------------------------------*/
                   1095: 
                   1096: /*             Get a save stream for a document
                   1097: **             --------------------------------
                   1098: */
                   1099: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
                   1100: {
                   1101:     HTProtocol * p;
                   1102:     int status;
                   1103:     request->method = METHOD_PUT;
                   1104:     status = get_physical(request);
                   1105:     if (status == HT_FORBIDDEN) {
                   1106:        char *url = HTAnchor_address((HTAnchor *) request->anchor);
                   1107:        if (url) {
                   1108:            HTUnEscape(url);
                   1109:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                   1110:                       (void *) url, (int) strlen(url), "HTLoad");
                   1111:            free(url);
                   1112:        } else {
                   1113:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                   1114:                       NULL, 0, "HTLoad");
                   1115:        }
                   1116:        return NULL;    /* should return error status? */
                   1117:     }
                   1118:     if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
                   1119:     
                   1120:     p = (HTProtocol *) HTAnchor_protocol(request->anchor);
                   1121:     if (!p) return NULL;
                   1122:     
                   1123:     return (*p->saveStream)(request);
                   1124:     
                   1125: }
                   1126: 
                   1127: /*     COPY AN ANCHOR
                   1128: **     --------------
                   1129: **  Fetch the URL (possibly local file URL) and send it using either PUT
                   1130: **  or POST to the remote destination using HTTP. The caller can decide the
                   1131: **  exact method used and which HTTP header fields to transmit by setting the
                   1132: **  user fields in the request structure.
                   1133: **
                   1134: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1135: **                     HT_ERROR        Error has occured
                   1136: **                     HT_LOADED       Success
                   1137: **                     HT_NO_DATA      Success, but no document loaded.
1.72      frystyk  1138: **                     HT_RETRY        if service isn't available before
                   1139: **                                     request->retry_after
1.70      frystyk  1140: */
                   1141: PUBLIC int HTCopyAnchor ARGS4(HTAnchor *,      src_anchor,
                   1142:                              HTRequest *,      src_req,
                   1143:                              HTParentAnchor *, dest_anchor,
                   1144:                              HTRequest *,      dest_req)
                   1145: {
                   1146:     if (!(src_anchor && src_req && dest_anchor && dest_req))
                   1147:        return HT_ERROR;
                   1148: 
                   1149:     if (!(dest_anchor->methods & dest_req->method)) {
                   1150:        char buf[80];
                   1151:        sprintf(buf, "It might not be allowed to %s to this destination, continue?", HTMethod_name(dest_req->method));
                   1152:        if (!HTConfirm(buf))
                   1153:            return HT_ERROR;
                   1154:     }
                   1155: 
                   1156:     /* First open the destination then open the source */
                   1157:     if (HTLoadAnchor((HTAnchor *) dest_anchor, dest_req) != HT_ERROR) {
                   1158:        src_req->ForceReload = YES;
                   1159:        src_req->HeaderMask += HT_DATE;                  /* Send date header */
                   1160:        if (src_req->output_format == WWW_PRESENT)             /* Use source */
                   1161:            src_req->output_format = WWW_SOURCE;
                   1162: 
                   1163:        /* Now make the link between the two request structures. First setup
                   1164:           the output stream of the source so that data get redirected to
                   1165:           the destination. Then set up the call back function so that
                   1166:           the destination can call for more data */
                   1167:        src_req->output_stream = dest_req->input_stream;
                   1168:        dest_req->CopyRequest = src_req;
                   1169:        dest_req->PostCallBack = HTSocketRead;
                   1170: 
                   1171:        return HTLoadAnchor(src_anchor, src_req);
                   1172:     }
                   1173:     return HT_ERROR;
                   1174: }
                   1175: 
                   1176: 
                   1177: /*     UPLOAD AN ANCHOR
                   1178: **     ----------------
                   1179: **  Send the contents (in hyperdoc) of the source anchor using either PUT
                   1180: **  or POST to the remote destination using HTTP. The caller can decide the
                   1181: **  exact method used and which HTTP header fields to transmit by setting the
                   1182: **  user fields in the request structure.
                   1183: **
                   1184: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1185: **                     HT_ERROR        Error has occured
                   1186: **                     HT_LOADED       Success
                   1187: **                     HT_NO_DATA      Success, but no document loaded.
1.72      frystyk  1188: **                     HT_RETRY        if service isn't available before
                   1189: **                                     request->retry_after
1.70      frystyk  1190: */
                   1191: PUBLIC int HTUploadAnchor ARGS3(HTAnchor *,            src_anchor,
                   1192:                                HTParentAnchor *,       dest_anchor,
                   1193:                                HTRequest *,            dest_req)
                   1194: {
                   1195:     if (!(src_anchor && dest_anchor && dest_req))
                   1196:        return HT_ERROR;
                   1197: 
                   1198:     if (!(dest_anchor->methods & dest_req->method)) {
                   1199:        char buf[80];
                   1200:        sprintf(buf, "It might not be allowed to %s to this destination, continue?", HTMethod_name(dest_req->method));
                   1201:        if (!HTConfirm(buf))
                   1202:            return HT_ERROR;
                   1203:     }
                   1204: 
                   1205:     return HT_ERROR;
                   1206: }
                   1207: 
                   1208: /* --------------------------------------------------------------------------*/
                   1209: /*                             Anchor help routines                         */
                   1210: /* --------------------------------------------------------------------------*/
1.57      howcome  1211: 
                   1212: /*
                   1213: **             Find Related Name
                   1214: **
                   1215: **  Creates a string that can be used as a related name when 
                   1216: **  calling HTParse initially. 
                   1217: **  
                   1218: **  The code for this routine originates from the Linemode 
                   1219: **  browser and was moved here by howcome@dxcern.cern.ch
                   1220: **  in order for all clients to take advantage.
                   1221: **
1.59      frystyk  1222: **  The string returned must be freed by the caller
1.57      howcome  1223: */
                   1224: PUBLIC char * HTFindRelatedName NOARGS
                   1225: {
1.59      frystyk  1226:     char* default_default = NULL;            /* Parse home relative to this */
                   1227:     CONST char *host = HTGetHostName(); 
1.57      howcome  1228:     StrAllocCopy(default_default, "file://");
1.59      frystyk  1229:     if (host)
                   1230:        StrAllocCat(default_default, host);
                   1231:     else
                   1232:        StrAllocCat(default_default, "localhost");
                   1233:     {
                   1234:        char wd[HT_MAX_PATH+1];
1.67      frystyk  1235: 
                   1236: #ifdef NO_GETWD
                   1237: #ifdef HAS_GETCWD            /* System V variant SIGN CHANGED TBL 921006 !! */
                   1238:        char *result = (char *) getcwd(wd, sizeof(wd)); 
                   1239: #else
                   1240:        char *result = NULL;
                   1241:        HTAlert("This platform does not support neither getwd nor getcwd\n");
                   1242: #endif
                   1243: #else
                   1244:        char *result = (char *) getwd(wd);
                   1245: #endif
1.59      frystyk  1246:        *(wd+HT_MAX_PATH) = '\0';
1.57      howcome  1247:        if (result) {
                   1248: #ifdef VMS 
                   1249:             /* convert directory name to Unix-style syntax */
                   1250:            char * disk = strchr (wd, ':');
                   1251:            char * dir = strchr (wd, '[');
                   1252:            if (disk) {
                   1253:                *disk = '\0';
                   1254:                StrAllocCat (default_default, "/");  /* needs delimiter */
                   1255:                StrAllocCat (default_default, wd);
                   1256:            }
                   1257:            if (dir) {
                   1258:                char *p;
                   1259:                *dir = '/';  /* Convert leading '[' */
                   1260:                for (p = dir ; *p != ']'; ++p)
                   1261:                        if (*p == '.') *p = '/';
                   1262:                *p = '\0';  /* Cut on final ']' */
                   1263:                StrAllocCat (default_default, dir);
                   1264:            }
1.70      frystyk  1265: #else
                   1266: #ifdef WIN32
                   1267:            char * p = wd ;     /* a colon */
                   1268:            StrAllocCat(default_default, "/");
                   1269: 
                   1270:            /**p++ = '|' ;        /* change to '|' */
                   1271:            while( *p != 0 ) { 
                   1272:                if (*p == '\\')                  /* change to one true slash */
                   1273:                    *p = '/' ;
                   1274:                p++;
                   1275:            }
                   1276:            StrAllocCat( default_default, wd) ;
                   1277: #else
1.57      howcome  1278:            StrAllocCat (default_default, wd);
1.70      frystyk  1279: #endif /* not WIN32 */
1.67      frystyk  1280: #endif /* not VMS */
1.57      howcome  1281:        }
1.67      frystyk  1282:     }
1.57      howcome  1283:     StrAllocCat(default_default, "/default.html");
                   1284:     return default_default;
1.2       timbl    1285: }
                   1286: 
                   1287: 
                   1288: /*             Generate the anchor for the home page
                   1289: **             -------------------------------------
                   1290: **
                   1291: **     As it involves file access, this should only be done once
                   1292: **     when the program first runs.
1.10      timbl    1293: **     This is a default algorithm -- browser don't HAVE to use this.
                   1294: **     But consistency betwen browsers is STRONGLY recommended!
1.2       timbl    1295: **
1.10      timbl    1296: **     Priority order is:
                   1297: **
                   1298: **             1       WWW_HOME environment variable (logical name, etc)
                   1299: **             2       ~/WWW/default.html
                   1300: **             3       /usr/local/bin/default.html
1.70      frystyk  1301: **             4       http://www.w3.org/default.html
1.10      timbl    1302: **
1.2       timbl    1303: */
                   1304: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
                   1305: {
1.12      timbl    1306:     char * my_home_document = NULL;
1.70      frystyk  1307:     char * home = (char *) getenv(LOGICAL_DEFAULT);
1.2       timbl    1308:     char * ref;
                   1309:     HTParentAnchor * anchor;
1.1       timbl    1310:     
1.70      frystyk  1311:     /* Someone telnets in, they get a special home */
1.12      timbl    1312:     if (home) {
                   1313:         StrAllocCopy(my_home_document, home);
1.70      frystyk  1314:     } else  if (HTClientHost) {                                    /* Telnet server */
1.12      timbl    1315:        FILE * fp = fopen(REMOTE_POINTER, "r");
                   1316:        char * status;
                   1317:        if (fp) {
1.59      frystyk  1318:            my_home_document = (char*) malloc(HT_MAX_PATH);
                   1319:            status = fgets(my_home_document, HT_MAX_PATH, fp);
1.12      timbl    1320:            if (!status) {
                   1321:                free(my_home_document);
                   1322:                my_home_document = NULL;
                   1323:            }
                   1324:            fclose(fp);
                   1325:        }
                   1326:        if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
                   1327:     }
                   1328: 
1.67      frystyk  1329: #ifdef unix
1.10      timbl    1330:     if (!my_home_document) {
                   1331:        FILE * fp = NULL;
1.70      frystyk  1332:        char * home = (char *) getenv("HOME");
1.10      timbl    1333:        if (home) { 
                   1334:            my_home_document = (char *)malloc(
                   1335:                strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
                   1336:            if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
                   1337:            sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
                   1338:            fp = fopen(my_home_document, "r");
                   1339:        }
                   1340:        
                   1341:        if (!fp) {
                   1342:            StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
                   1343:            fp = fopen(my_home_document, "r");
                   1344:        }
1.2       timbl    1345:        if (fp) {
                   1346:            fclose(fp);
                   1347:        } else {
1.62      frystyk  1348:            if (TRACE)
1.67      frystyk  1349:                fprintf(TDEST,
1.62      frystyk  1350:                        "HTBrowse: No local home document ~/%s or %s\n",
                   1351:                        PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11      timbl    1352:            free(my_home_document);
                   1353:            my_home_document = NULL;
1.2       timbl    1354:        }
                   1355:     }
1.67      frystyk  1356: #endif
1.70      frystyk  1357:     ref = HTParse(my_home_document ? my_home_document :
                   1358:                  HTClientHost ? REMOTE_ADDRESS : LAST_RESORT, "file:",
                   1359:                  PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10      timbl    1360:     if (my_home_document) {
1.62      frystyk  1361:        if (TRACE)
1.67      frystyk  1362:            fprintf(TDEST,
1.62      frystyk  1363:                   "HTAccess.... `%s\' used for custom home page as\n`%s\'\n",
                   1364:                    my_home_document, ref);
1.10      timbl    1365:        free(my_home_document);
1.2       timbl    1366:     }
                   1367:     anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
                   1368:     free(ref);
                   1369:     return anchor;
1.1       timbl    1370: }
1.26      frystyk  1371: 
                   1372: 
                   1373: /*             Bind an Anchor to the request structure
                   1374: **             ---------------------------------------
                   1375: **
                   1376: **    On Entry,
                   1377: **     anchor          The child or parenet anchor to be binded
                   1378: **     request         The request sturcture
                   1379: **    On Exit,
                   1380: **        returns    YES     Success
                   1381: **                   NO      Failure 
                   1382: **
                   1383: **  Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
                   1384: **                                             Henrik Frystyk 17/02-94
                   1385: */
                   1386: 
                   1387: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
                   1388: {
                   1389:     if (!anchor) return NO;    /* No link */
                   1390:     
                   1391:     request->anchor  = HTAnchor_parent(anchor);
                   1392:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                   1393:                                        : (HTChildAnchor*) anchor;
                   1394:        
1.29      frystyk  1395:     return YES;
1.70      frystyk  1396: }
1.59      frystyk  1397: 
1.26      frystyk  1398: 

Webmaster