Annotation of libwww/Library/src/HTAccess.c, revision 1.78

1.61      frystyk     1: /*                                                                  HTAccess.c
                      2: **     ACCESS MANAGER
                      3: **
1.75      frystyk     4: **     (c) COPYRIGHT MIT 1995.
1.61      frystyk     5: **     Please first read the full copyright statement in the file COPYRIGH.
1.1       timbl       6: **
                      7: ** Authors
                      8: **     TBL     Tim Berners-Lee timbl@info.cern.ch
1.4       timbl       9: **     JFG     Jean-Francois Groff jfg@dxcern.cern.ch
1.1       timbl      10: **     DD      Denis DeLaRoca (310) 825-4580  <CSP1DWD@mvs.oac.ucla.edu>
                     11: ** History
                     12: **       8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
                     13: **     26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42      frystyk    14: **      6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1       timbl      15: **     17 Dec 92 Tn3270 added, bug fix. DD
1.2       timbl      16: **      4 Feb 93 Access registration, Search escapes bad chars TBL
1.9       timbl      17: **               PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
                     18: **     28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19      timbl      19: **        Dec 93 Bug change around, more reentrant, etc
1.42      frystyk    20: **     09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.53      duns       21: **      8 Jul 94 Insulate free() from _free structure element.
1.2       timbl      22: ** Bugs
                     23: **     This module assumes that that the graphic object is hypertext, as it
1.9       timbl      24: **     needs to select it when it has been loaded.  A superclass needs to be
1.2       timbl      25: **     defined which accepts select and select_anchor.
1.1       timbl      26: */
                     27: 
1.68      frystyk    28: #if !defined(HT_DIRECT_WAIS) && !defined(HT_DEFAULT_WAIS_GATEWAY)
                     29: #define HT_DEFAULT_WAIS_GATEWAY "http://www.w3.org:8001/"
1.54      frystyk    30: #endif
1.8       timbl      31: 
1.67      frystyk    32: /* Library include files */
                     33: #include "tcp.h"
                     34: #include "HTUtils.h"
1.78    ! frystyk    35: #include "HTString.h"
1.1       timbl      36: #include "HTParse.h"
1.78    ! frystyk    37: #include "HTAlert.h"
        !            38: #include "HTError.h"
1.2       timbl      39: #include "HTList.h"
1.78    ! frystyk    40: #include "HTAABrow.h"                          /* Should be HTAAUtil.html! */
1.67      frystyk    41: #include "HTFWrite.h"  /* for cache stuff */
1.70      frystyk    42: #include "HTLog.h"
1.77      frystyk    43: #include "HTSocket.h"
1.57      howcome    44: #include "HTTCP.h"      /* HWL: for HTFindRelatedName */
1.59      frystyk    45: #include "HTThread.h"
1.63      frystyk    46: #include "HTEvent.h"
1.73      frystyk    47: #include "HTBind.h"
1.70      frystyk    48: #include "HTInit.h"
1.77      frystyk    49: #include "HTProxy.h"
1.78    ! frystyk    50: #include "HTML.h"              /* SCW */
        !            51: #include "HText.h"     /* See bugs above */
1.74      frystyk    52: 
1.67      frystyk    53: #ifndef NO_RULES
                     54: #include "HTRules.h"
                     55: #endif
1.74      frystyk    56: 
1.67      frystyk    57: #include "HTAccess.h"                                   /* Implemented here */
1.2       timbl      58: 
1.54      frystyk    59: /* These flags may be set to modify the operation of this module */
1.73      frystyk    60: PUBLIC int  HTMaxRedirections = 10;           /* Max number of redirections */
                     61: 
1.78    ! frystyk    62: PUBLIC char * HTClientHost = NULL;      /* Name of remote login host if any */
1.70      frystyk    63: PUBLIC BOOL HTSecure = NO;              /* Disable access for telnet users? */
1.41      luotonen   64: 
1.43      luotonen   65: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.70      frystyk    66: PUBLIC BOOL HTImProxy = NO;                       /* cern_httpd as a proxy? */
1.1       timbl      67: 
1.74      frystyk    68: #ifdef _WINDOWS 
                     69: PUBLIC HWND HTsocketWin = 0 ;
                     70: unsigned long HTwinMsg = 0 ;
                     71: #endif 
                     72: 
1.78    ! frystyk    73: /* Variables and typedefs local to this module */
        !            74: PRIVATE HTList * protocols = NULL;           /* List of registered protocols */
        !            75: 
1.63      frystyk    76: /* Superclass defn */
1.24      timbl      77: struct _HTStream {
                     78:        HTStreamClass * isa;
                     79:        /* ... */
                     80: };
                     81: 
1.59      frystyk    82: /* --------------------------------------------------------------------------*/
                     83: /*                     Management of the HTRequest structure                */
                     84: /* --------------------------------------------------------------------------*/
                     85: 
1.15      timbl      86: /*     Create  a request structure
                     87: **     ---------------------------
                     88: */
                     89: PUBLIC HTRequest * HTRequest_new NOARGS
                     90: {
1.28      luotonen   91:     HTRequest * me = (HTRequest*) calloc(1, sizeof(*me));  /* zero fill */
1.15      timbl      92:     if (!me) outofmem(__FILE__, "HTRequest_new()");
                     93:     
1.77      frystyk    94:     /* User preferences for this particular request. Only empty lists! */
                     95:     me->conversions = HTList_new();
                     96:     me->encodings = HTList_new();
                     97:     me->languages = HTList_new();
                     98:     me->charsets = HTList_new();
                     99: 
                    100:     /* Format of output */
1.70      frystyk   101:     me->output_format  = WWW_PRESENT;      /* default it to present to user */
1.72      frystyk   102:     me->error_format   = WWW_HTML;      /* default format of error messages */
1.77      frystyk   103: 
                    104:     /* HTTP headers */
                    105:     me->GenMask                = DEFAULT_GENERAL_HEADERS;
                    106:     me->RequestMask    = DEFAULT_REQUEST_HEADERS;
                    107:     me->EntityMask     = DEFAULT_ENTITY_HEADERS;
                    108: 
                    109:     /* Content negotiation */
                    110:     me->ContentNegotiation = NO;                      /* Do this by default */
1.74      frystyk   111: 
                    112: #ifdef _WINDOWS
                    113:     me->hwnd = HTsocketWin;
                    114:     me->winMsg = HTwinMsg;
                    115: #endif
                    116: 
1.15      timbl     117:     return me;
                    118: }
                    119: 
                    120: 
1.49      frystyk   121: /*     Clear  a request structure
                    122: **     ---------------------------
                    123: **     This function clears the reguest structure so that only the
                    124: **     conversions remain. Everything else is as if it was created from
                    125: **     scratch.
                    126: */
                    127: PUBLIC void HTRequest_clear ARGS1(HTRequest *, req)
                    128: {
                    129:     HTList *conversions;
                    130:     if (!req) {
                    131:        if (TRACE)
1.67      frystyk   132:            fprintf(TDEST, "Clear....... request: Bad argument!\n");
1.49      frystyk   133:        return;
                    134:     }
                    135:     conversions = req->conversions;                 /* Save the conversions */
                    136:     HTErrorFree(req);
                    137:     HTAACleanup(req);
                    138:     memset(req, '\0', sizeof(HTRequest));
                    139: 
                    140:     /* Now initialize as from scratch but with the old list of conversions */
                    141:     req->conversions = conversions;
                    142:     req->output_format = WWW_PRESENT;      /* default it to present to user */
1.74      frystyk   143: 
                    144: #ifdef _WINDOWS   
1.76      frystyk   145:     req->hwnd = HTsocketWin ;
                    146:     req->winMsg = HTwinMsg ;
1.74      frystyk   147: #endif 
                    148: 
1.49      frystyk   149: }
                    150: 
                    151: 
1.20      luotonen  152: /*     Delete a request structure
                    153: **     --------------------------
                    154: */
                    155: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
                    156: {
                    157:     if (req) {
1.59      frystyk   158:        FREE(req->redirect);
                    159:        FREE(req->authenticate);
                    160:        HTFormatDelete(req);
1.46      frystyk   161:        HTErrorFree(req);
1.34      frystyk   162:        HTAACleanup(req);
1.78    ! frystyk   163:        if (req->CopyRequest)
        !           164:            HTRequest_delete(req->CopyRequest);
1.61      frystyk   165: 
                    166:        /* These are temporary until we get a MIME thingy */
                    167:        FREE(req->redirect);
                    168:        FREE(req->WWWAAScheme);
                    169:        FREE(req->WWWAARealm);
                    170:        FREE(req->WWWprotection);
                    171: 
1.34      frystyk   172:        FREE(req);
1.20      luotonen  173:     }
                    174: }
                    175: 
1.59      frystyk   176: /* --------------------------------------------------------------------------*/
                    177: /*                     Management of HTTP Methods                           */
                    178: /* --------------------------------------------------------------------------*/
1.20      luotonen  179: 
1.70      frystyk   180: static char *method_names[] =
1.22      luotonen  181: {
                    182:     "INVALID-METHOD",
                    183:     "GET",
                    184:     "HEAD",
                    185:     "POST",
                    186:     "PUT",
                    187:     "DELETE",
                    188:     "LINK",
                    189:     "UNLINK",
                    190:     NULL
                    191: };
                    192: 
                    193: /*     Get method enum value
                    194: **     ---------------------
                    195: */
1.70      frystyk   196: PUBLIC HTMethod HTMethod_enum ARGS1(CONST char *, name)
1.22      luotonen  197: {
                    198:     if (name) {
1.70      frystyk   199:        if (!strcmp(name, *(method_names+1)))
                    200:            return METHOD_GET;
                    201:        else if (!strcmp(name, *(method_names+2)))
                    202:            return METHOD_HEAD;
                    203:        else if (!strcmp(name, *(method_names+3)))
                    204:            return METHOD_POST;
                    205:        else if (!strcmp(name, *(method_names+4)))
                    206:            return METHOD_PUT;
                    207:        else if (!strcmp(name, *(method_names+5)))
                    208:            return METHOD_DELETE;
                    209:        else if (!strcmp(name, *(method_names+6)))
                    210:            return METHOD_LINK;
                    211:        else if (!strcmp(name, *(method_names+7)))
                    212:            return METHOD_UNLINK;
1.22      luotonen  213:     }
                    214:     return METHOD_INVALID;
                    215: }
                    216: 
                    217: 
                    218: /*     Get method name
                    219: **     ---------------
1.70      frystyk   220: **     Returns pointer to entry in static table in memory
1.22      luotonen  221: */
1.70      frystyk   222: PUBLIC CONST char * HTMethod_name ARGS1(HTMethod, method)
1.22      luotonen  223: {
1.70      frystyk   224:     if (method & METHOD_GET)
                    225:        return *(method_names+1);
                    226:     else if (method == METHOD_HEAD)
                    227:        return *(method_names+2);
                    228:     else if (method == METHOD_POST)
                    229:        return *(method_names+3);
                    230:     else if (method == METHOD_PUT)
                    231:        return *(method_names+4);
                    232:     else if (method == METHOD_DELETE)
                    233:        return *(method_names+5);
                    234:     else if (method == METHOD_LINK)
                    235:        return *(method_names+6);
                    236:     else if (method == METHOD_UNLINK)
                    237:        return *(method_names+7);
                    238:     else
                    239:        return *method_names;
                    240: #if 0
                    241:     if ((int)METHOD_INVALID  && (int)method < (int)MAX_METHODS)
1.22      luotonen  242:        return method_names[(int)method];
                    243:     else
                    244:        return method_names[(int)METHOD_INVALID];
1.70      frystyk   245: #endif
1.22      luotonen  246: }
                    247: 
                    248: 
1.70      frystyk   249: #if 0
                    250: /* NOT NEEDED AS METHODS IS NOT A BIT-FLAG */
1.22      luotonen  251: /*     Is method in a list of method names?
                    252: **     -----------------------------------
                    253: */
                    254: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod,    method,
                    255:                                  HTList *,     list)
                    256: {
                    257:     char * method_name = HTMethod_name(method);
                    258:     HTList *cur = list;
                    259:     char *item;
                    260: 
                    261:     while (NULL != (item = (char*)HTList_nextObject(cur))) {
1.67      frystyk   262:        if (PROT_TRACE)
                    263:            fprintf(TDEST, " %s", item);
1.22      luotonen  264:        if (0==strcasecomp(item, method_name))
                    265:            return YES;
                    266:     }
                    267:     return NO; /* Not found */
                    268: }
1.70      frystyk   269: #endif
1.22      luotonen  270: 
1.59      frystyk   271: /* --------------------------------------------------------------------------*/
                    272: /*                   Management of the HTProtocol structure                 */
                    273: /* --------------------------------------------------------------------------*/
1.22      luotonen  274: 
1.63      frystyk   275: /*
                    276: **     Register a Protocol as an active access method
1.1       timbl     277: */
1.56      frystyk   278: PUBLIC BOOL HTRegisterProtocol ARGS1(HTProtocol *, protocol)
1.1       timbl     279: {
                    280:     if (!protocols) protocols = HTList_new();
1.59      frystyk   281:     HTList_addObject(protocols, (void *) protocol);
1.1       timbl     282:     return YES;
                    283: }
                    284: 
1.63      frystyk   285: 
                    286: /*
                    287: **     Delete the list of registered access methods. This is called from
                    288: **     within HTLibTerminate. Written by Eric Sink, eric@spyglass.com
                    289: */
                    290: PUBLIC void HTDisposeProtocols NOARGS
                    291: {
                    292:     if (protocols) {
                    293:        HTList_delete(protocols);
                    294:        protocols = NULL;
                    295:     }
                    296: }
                    297: 
                    298: 
                    299: /*
1.65      frystyk   300: **     Is a protocol registered as BLOCKING? The default behavior registered
                    301: **     when the protocol module was registered can be overridden by the
                    302: **     BlockingIO field in the HTRequest structure
1.63      frystyk   303: */
1.59      frystyk   304: PUBLIC BOOL HTProtocolBlocking ARGS1(HTRequest *, me)
                    305: {
1.65      frystyk   306:     if (me) {
                    307:        return (me->BlockingIO || (me->anchor && me->anchor->protocol &&
                    308:                ((HTProtocol *) (me->anchor->protocol))->block == SOC_BLOCK));
                    309:     }
                    310:     return NO;
1.59      frystyk   311: }
                    312: 
1.61      frystyk   313: /* --------------------------------------------------------------------------*/
                    314: /*                Initialization and Termination of the Library             */
                    315: /* --------------------------------------------------------------------------*/
                    316: 
                    317: /*                                                                  HTLibInit
                    318: **
                    319: **     This function initiates the Library and it MUST be called when
                    320: **     starting up an application. See also HTLibTerminate()
                    321: */
                    322: PUBLIC BOOL HTLibInit NOARGS
                    323: {
1.67      frystyk   324: #ifdef NO_STDIO                                                  /* Open trace file */
                    325:     if ((TDEST = fopen(TRACE_FILE, "a")) != NULL) {
                    326:        if (setvbuf(TDEST, NULL, _IOLBF, 0) < 0) {  /* Change to line buffer */
1.70      frystyk   327:            printf("WWWLibInit.. Can't initialize TRACE buffer - no TRACE\n");
1.67      frystyk   328:            fclose(TDEST);
                    329:            TDEST = NULL;
                    330:            WWW_TraceFlag = 0;
                    331:        }
                    332:     } else
                    333:        WWW_TraceFlag = 0;
                    334: #endif
                    335: 
1.61      frystyk   336:     if (TRACE)
1.67      frystyk   337:        fprintf(TDEST, "WWWLibInit.. INITIALIZING LIBRARY OF COMMON CODE\n");
1.63      frystyk   338: 
1.77      frystyk   339:     /* Set up User preferences, but leave initialization to the application */
1.73      frystyk   340:     if (!HTConversions)
                    341:        HTConversions = HTList_new();
1.77      frystyk   342:     if (!HTEncodings)
                    343:        HTEncodings = HTList_new();
                    344:     if (!HTLanguages)
                    345:        HTLanguages = HTList_new();
                    346:     if (!HTCharsets)
                    347:        HTCharsets = HTList_new();
                    348: 
                    349:     /* Set up bindings to the local file system */
                    350:     HTBind_init();
1.73      frystyk   351: 
1.70      frystyk   352: #ifndef HT_NO_INIT
                    353:     HTAccessInit();             /* Bind access schemes and protocol modules */
                    354:     HTFileInit();                   /* Bind file extensions and media types */
1.63      frystyk   355: #endif
1.61      frystyk   356: 
1.77      frystyk   357: #ifndef HT_DIRECT_WAIS
                    358:     HTProxy_setGateway("wais", HT_DEFAULT_WAIS_GATEWAY);
                    359: #endif
                    360: 
1.62      frystyk   361: #ifdef WWWLIB_SIG
1.61      frystyk   362:     /* On Solaris (and others?) we get a BROKEN PIPE signal when connecting
1.67      frystyk   363:     ** to a port where we should get `connection refused'. We ignore this 
1.61      frystyk   364:     ** using the following function call
                    365:     */
                    366:     HTSetSignal();                                /* Set signals in library */
1.1       timbl     367: #endif
                    368: 
1.67      frystyk   369: #ifdef _WINDOWS
                    370:     /*
                    371:     ** Initialise WinSock DLL. This must also be shut down! PMH
                    372:     */
                    373:     {
                    374:         WSADATA            wsadata;
                    375:        if (WSAStartup(DESIRED_WINSOCK_VERSION, &wsadata)) {
                    376:            if (TRACE)
                    377:                fprintf(TDEST, "WWWLibInit.. Can't initialize WinSoc\n");
                    378:             WSACleanup();
                    379:             return NO;
                    380:         }
                    381:         if (wsadata.wVersion < MINIMUM_WINSOCK_VERSION) {
                    382:             if (TRACE)
                    383:                fprintf(TDEST, "WWWLibInit.. Bad version of WinSoc\n");
                    384:             WSACleanup();
                    385:             return NO;
                    386:         }
                    387:     }
                    388: #endif /* _WINDOWS */
                    389: 
1.71      frystyk   390: #ifndef NO_TIMEGM
                    391:     HTGetTimeZoneOffset();        /* Find offset from GMT if using mktime() */
                    392: #endif
1.70      frystyk   393:     HTTmp_setRoot(NULL);                    /* Set up default tmp directory */
1.61      frystyk   394:     HTThreadInit();                                /* Initialize bit arrays */
                    395:     return YES;
                    396: }
                    397: 
                    398: 
                    399: /*                                                              HTLibTerminate
                    400: **
                    401: **     This function frees memory kept by the Library and should be called
1.63      frystyk   402: **     before exit of an application (if you are on a PC platform)
1.61      frystyk   403: */
                    404: PUBLIC BOOL HTLibTerminate NOARGS
                    405: {
                    406:     if (TRACE)
1.67      frystyk   407:        fprintf(TDEST, "WWWLibTerm.. Cleaning up LIBRARY OF COMMON CODE\n");
1.63      frystyk   408:     HTAtom_deleteAll();
                    409:     HTDisposeConversions();
                    410:     HTTCPCacheRemoveAll();
1.73      frystyk   411: 
                    412: #ifndef HT_NO_INIT
                    413:     HTDisposeProtocols();    /* Remove bindings between access and protocols */
                    414:     HTBind_deleteAll();            /* Remove bindings between suffixes, media types */
                    415: #endif
                    416: 
1.77      frystyk   417:     HTProxy_deleteProxy();        /* Clean up lists of proxies and gateways */
                    418:     HTProxy_deleteNoProxy();
                    419:     HTProxy_deleteGateway();
                    420: 
                    421:     HTFreeHostName();                      /* Free up some internal strings */
1.63      frystyk   422:     HTFreeMailAddress();
1.70      frystyk   423:     HTCache_freeRoot();
                    424:     HTTmp_freeRoot();
1.67      frystyk   425: 
                    426: #ifdef _WINDOWS
                    427:     WSACleanup();
                    428: #endif
                    429: 
                    430: #ifdef NO_STDIO                                                 /* Close trace file */
                    431:     if (TDEST) {
                    432:        fclose(TDEST);
                    433:        TDEST = NULL;
                    434:        WWW_TraceFlag = 0;
                    435:     }
                    436: #endif
1.61      frystyk   437:     return YES;
                    438: }
                    439: 
1.59      frystyk   440: /* --------------------------------------------------------------------------*/
                    441: /*                     Physical Anchor Address Manager                      */
                    442: /* --------------------------------------------------------------------------*/
1.33      luotonen  443: 
1.77      frystyk   444: #ifdef OLD_CODE
1.33      luotonen  445: /*                                                     override_proxy()
                    446: **
                    447: **     Check the no_proxy environment variable to get the list
                    448: **     of hosts for which proxy server is not consulted.
                    449: **
                    450: **     no_proxy is a comma- or space-separated list of machine
                    451: **     or domain names, with optional :port part.  If no :port
                    452: **     part is present, it applies to all ports on that domain.
                    453: **
                    454: **     Example:
                    455: **             no_proxy="cern.ch,some.domain:8001"
                    456: **
                    457: */
                    458: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
                    459: {
                    460:     CONST char * no_proxy = getenv("no_proxy");
                    461:     char * p = NULL;
                    462:     char * host = NULL;
                    463:     int port = 0;
                    464:     int h_len = 0;
                    465: 
                    466:     if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
                    467:        return NO;
                    468:     if (!*host) { free(host); return NO; }
                    469: 
1.34      frystyk   470:     if ((p = strchr(host, ':')) != NULL) {     /* Port specified */
1.33      luotonen  471:        *p++ = 0;                       /* Chop off port */
                    472:        port = atoi(p);
                    473:     }
                    474:     else {                             /* Use default port */
                    475:        char * access = HTParse(addr, "", PARSE_ACCESS);
                    476:        if (access) {
                    477:            if      (!strcmp(access,"http"))    port = 80;
                    478:            else if (!strcmp(access,"gopher"))  port = 70;
                    479:            else if (!strcmp(access,"ftp"))     port = 21;
                    480:            free(access);
                    481:        }
                    482:     }
                    483:     if (!port) port = 80;              /* Default */
                    484:     h_len = strlen(host);
                    485: 
                    486:     while (*no_proxy) {
                    487:        CONST char * end;
                    488:        CONST char * colon = NULL;
                    489:        int templ_port = 0;
                    490:        int t_len;
                    491: 
                    492:        while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
                    493:            no_proxy++;                 /* Skip whitespace and separators */
                    494: 
                    495:        end = no_proxy;
                    496:        while (*end && !WHITE(*end) && *end != ',') {   /* Find separator */
                    497:            if (*end==':') colon = end;                 /* Port number given */
                    498:            end++;
                    499:        }
                    500: 
                    501:        if (colon) {
                    502:            templ_port = atoi(colon+1);
                    503:            t_len = colon - no_proxy;
                    504:        }
                    505:        else {
                    506:            t_len = end - no_proxy;
                    507:        }
                    508: 
                    509:        if ((!templ_port || templ_port == port)  &&
                    510:            (t_len > 0  &&  t_len <= h_len  &&
                    511:             !strncmp(host + h_len - t_len, no_proxy, t_len))) {
                    512:            free(host);
                    513:            return YES;
                    514:        }
                    515:        if (*end) no_proxy = end+1;
                    516:        else break;
                    517:     }
                    518: 
                    519:     free(host);
                    520:     return NO;
                    521: }
1.77      frystyk   522: #endif /* OLD_CODE */
1.33      luotonen  523: 
                    524: 
1.2       timbl     525: /*             Find physical name and access protocol
                    526: **             --------------------------------------
1.1       timbl     527: **
                    528: **
                    529: ** On entry,
                    530: **     addr            must point to the fully qualified hypertext reference.
                    531: **     anchor          a pareent anchor with whose address is addr
                    532: **
1.59      frystyk   533: ** On exit,    
                    534: **     returns         HT_NO_ACCESS            no protocol module found
                    535: **                     HT_FORBIDDEN            Error has occured.
1.2       timbl     536: **                     HT_OK                   Success
1.1       timbl     537: **
                    538: */
1.21      luotonen  539: PRIVATE int get_physical ARGS1(HTRequest *, req)
                    540: {    
                    541:     char * addr = HTAnchor_address((HTAnchor*)req->anchor);    /* free me */
1.27      luotonen  542: 
1.70      frystyk   543: #ifndef HT_NO_RULES
1.47      luotonen  544:     if (HTImServer) {  /* cern_httpd has already done its own translations */
1.45      luotonen  545:        HTAnchor_setPhysical(req->anchor, HTImServer);
1.47      luotonen  546:        StrAllocCopy(addr, HTImServer); /* Oops, queries thru many proxies */
                    547:                                        /* didn't work without this -- AL  */
                    548:     }
1.21      luotonen  549:     else {
1.27      luotonen  550:        char * physical = HTTranslate(addr);
1.21      luotonen  551:        if (!physical) {
1.47      luotonen  552:            free(addr);
1.21      luotonen  553:            return HT_FORBIDDEN;
                    554:        }
                    555:        HTAnchor_setPhysical(req->anchor, physical);
                    556:        free(physical);                 /* free our copy */
1.2       timbl     557:     }
                    558: #else
1.21      luotonen  559:     HTAnchor_setPhysical(req->anchor, addr);
1.70      frystyk   560: #endif /* HT_NO_RULES */
1.2       timbl     561: 
1.77      frystyk   562: #ifdef OLDCODE
1.21      luotonen  563:     access =  HTParse(HTAnchor_physical(req->anchor),
1.27      luotonen  564:                      "file:", PARSE_ACCESS);
1.1       timbl     565: 
1.77      frystyk   566:     if (!override_proxy(addr)) {
1.39      luotonen  567:     /* make sure the using_proxy variable is false */
1.70      frystyk   568:     req->using_proxy = NO;
1.39      luotonen  569: 
1.27      luotonen  570:        char * gateway_parameter, *gateway, *proxy;
                    571: 
1.2       timbl     572:        gateway_parameter = (char *)malloc(strlen(access)+20);
                    573:        if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27      luotonen  574: 
                    575:        /* search for proxy gateways */
1.2       timbl     576:        strcpy(gateway_parameter, "WWW_");
                    577:        strcat(gateway_parameter, access);
                    578:        strcat(gateway_parameter, "_GATEWAY");
                    579:        gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27      luotonen  580: 
                    581:        /* search for proxy servers */
                    582:        strcpy(gateway_parameter, access);
                    583:        strcat(gateway_parameter, "_proxy");
                    584:        proxy = (char *)getenv(gateway_parameter);
                    585: 
1.2       timbl     586:        free(gateway_parameter);
1.27      luotonen  587: 
1.68      frystyk   588: #ifndef HT_DIRECT_WAIS
1.9       timbl     589:        if (!gateway && 0==strcmp(access, "wais")) {
1.69      frystyk   590:            gateway = HT_DEFAULT_WAIS_GATEWAY;
1.8       timbl     591:        }
                    592: #endif
1.27      luotonen  593: 
1.70      frystyk   594:        if (TRACE && gateway)
                    595:            fprintf(TDEST,"Gateway..... Found: `%s\'\n", gateway);
                    596:        if (TRACE && proxy)
                    597:            fprintf(TDEST,"Proxy....... Found: `%s\'\n", proxy);
1.77      frystyk   598: #endif /* OLD_CODE */
1.70      frystyk   599: 
1.77      frystyk   600:     /*
                    601:     **  Check whether gateway or proxy access has been set up for this url
                    602:     */
                    603:     {
                    604:        char *proxy = HTProxy_getProxy(addr);
                    605:        char *gateway = HTProxy_getGateway(addr);
1.27      luotonen  606: 
1.77      frystyk   607:        /* Proxy servers have precedence over gateway servers */
                    608:        if (proxy) {
                    609:            StrAllocCat(proxy, addr);
1.70      frystyk   610:            req->using_proxy = YES;
1.77      frystyk   611:            HTAnchor_setPhysical(req->anchor, proxy);
                    612:            free(proxy);
                    613:        } else if (gateway) {
1.9       timbl     614:            char * path = HTParse(addr, "",
1.77      frystyk   615:                                  PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
1.9       timbl     616:                /* Chop leading / off to make host into part of path */
                    617:            char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
1.77      frystyk   618:             HTAnchor_setPhysical(req->anchor, gatewayed);
1.9       timbl     619:            free(path);
                    620:            free(gatewayed);
1.77      frystyk   621:            free(gateway);
                    622:        } else {
                    623:            req->using_proxy = NO;          /* We don't use proxy or gateway */
1.2       timbl     624:        }
                    625:     }
1.77      frystyk   626:     FREE(addr);
1.1       timbl     627: 
1.77      frystyk   628:     /*
                    629:     ** Search registered protocols to find suitable one
                    630:     */
1.1       timbl     631:     {
1.77      frystyk   632:        char *access = HTParse(HTAnchor_physical(req->anchor),"",PARSE_ACCESS);
1.61      frystyk   633:        HTList *cur = protocols;
1.20      luotonen  634:        HTProtocol *p;
1.61      frystyk   635:        if (!cur) {
                    636:            if (TRACE)
1.67      frystyk   637:                fprintf(TDEST, "HTAccess.... NO PROTOCOL MODULES INITIATED\n");
1.61      frystyk   638:        } else {
                    639:            while ((p = (HTProtocol*)HTList_nextObject(cur))) {
1.77      frystyk   640:                if (strcmp(p->name, access)==0) {       /* Case insensitive? */
1.61      frystyk   641:                    HTAnchor_setProtocol(req->anchor, p);
                    642:                    free(access);
                    643:                    return (HT_OK);
                    644:                }
1.1       timbl     645:            }
                    646:        }
1.77      frystyk   647:        free(access);
1.1       timbl     648:     }
1.2       timbl     649:     return HT_NO_ACCESS;
1.1       timbl     650: }
                    651: 
1.59      frystyk   652: /* --------------------------------------------------------------------------*/
                    653: /*                             Document Loader                              */
                    654: /* --------------------------------------------------------------------------*/
1.1       timbl     655: 
                    656: /*             Load a document
                    657: **             ---------------
                    658: **
1.2       timbl     659: **     This is an internal routine, which has an address AND a matching
                    660: **     anchor.  (The public routines are called with one OR the other.)
                    661: **
                    662: ** On entry,
1.15      timbl     663: **     request->
1.35      luotonen  664: **         anchor              a parent anchor with fully qualified
                    665: **                             hypertext reference as its address set
1.15      timbl     666: **         output_format       valid
                    667: **         output_stream       valid on NULL
1.2       timbl     668: **
                    669: ** On exit,
1.59      frystyk   670: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    671: **                     HT_ERROR        Error has occured
1.2       timbl     672: **                     HT_LOADED       Success
                    673: **                     HT_NO_DATA      Success, but no document loaded.
1.8       timbl     674: **                                     (telnet sesssion started etc)
1.72      frystyk   675: **                     HT_RETRY        if service isn't available before
                    676: **                                     request->retry_after
1.2       timbl     677: */
1.52      frystyk   678: PUBLIC int HTLoad ARGS2(HTRequest *, request, BOOL, keep_error_stack)
1.2       timbl     679: {
1.25      frystyk   680:     char       *arg = NULL;
                    681:     HTProtocol *p;
                    682:     int        status;
                    683: 
1.22      luotonen  684:     if (request->method == METHOD_INVALID)
                    685:        request->method = METHOD_GET;
1.52      frystyk   686:     if (!keep_error_stack) {
                    687:        HTErrorFree(request);
                    688:        request->error_block = NO;
                    689:     }
                    690: 
1.59      frystyk   691:     if ((status = get_physical(request)) < 0) {
                    692:        if (status == HT_FORBIDDEN) {
                    693:            char *url = HTAnchor_address((HTAnchor *) request->anchor);
                    694:            if (url) {
                    695:                HTUnEscape(url);
                    696:                HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    697:                           (void *) url, (int) strlen(url), "HTLoad");
                    698:                free(url);
                    699:            } else {
                    700:                HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    701:                           NULL, 0, "HTLoad");
                    702:            }
                    703:        } 
                    704:        return HT_ERROR;                       /* Can't resolve or forbidden */
1.2       timbl     705:     }
1.25      frystyk   706: 
                    707:     if(!(arg = HTAnchor_physical(request->anchor)) || !*arg) 
1.59      frystyk   708:        return HT_ERROR;
1.27      luotonen  709: 
1.56      frystyk   710:     p = (HTProtocol *) HTAnchor_protocol(request->anchor);
1.17      timbl     711:     return (*(p->load))(request);
1.2       timbl     712: }
                    713: 
                    714: 
1.61      frystyk   715: /*             Terminate a LOAD
                    716: **             ----------------
                    717: **
                    718: **     This function looks at the status code from the HTLoadDocument
                    719: **     function and updates logfiles, creates error messages etc.
                    720: **
                    721: **    On Entry,
                    722: **     Status code from load function
                    723: */
                    724: PUBLIC BOOL HTLoadTerminate ARGS2(HTRequest *, request, int, status)
                    725: {
                    726:     char * uri = HTAnchor_address((HTAnchor*)request->anchor);
                    727: 
1.70      frystyk   728:     HTLog_request(request);
1.61      frystyk   729: 
                    730:     /* The error stack might contain general information to the client
                    731:        about what has been going on in the library (not only errors) */
                    732:     if (!HTImProxy && request->error_stack)
                    733:        HTErrorMsg(request);
                    734: 
                    735:     switch (status) {
                    736:       case HT_LOADED:
                    737:        if (PROT_TRACE) {
1.72      frystyk   738:            fprintf(TDEST, "HTAccess.... OK: `%s\' has been accessed.\n", uri);
1.61      frystyk   739:        }
                    740:        break;
                    741: 
1.78    ! frystyk   742:       case HT_OK:
        !           743:        if (PROT_TRACE) {
        !           744:            fprintf(TDEST,"HTAccess.... SOURCE FINISHED LOADING: `%s\'\n",uri);
        !           745:        }
        !           746:        break;
        !           747: 
1.61      frystyk   748:       case HT_NO_DATA:
                    749:        if (PROT_TRACE) {
1.72      frystyk   750:            fprintf(TDEST, "HTAccess.... OK BUT NO DATA: `%s\'\n", uri);
1.61      frystyk   751:        }
                    752:        break;
                    753: 
                    754:       case HT_WOULD_BLOCK:
                    755:        if (PROT_TRACE) {
1.72      frystyk   756:            fprintf(TDEST, "HTAccess.... WOULD BLOCK: `%s\'\n", uri);
                    757:        }
                    758:        break;
                    759: 
                    760:       case HT_RETRY:
                    761:        if (PROT_TRACE) {
                    762:            fprintf(TDEST, "HTAccess.... NOT AVAILABLE, RETRY AT `%s\'\n",uri);
1.61      frystyk   763:        }
                    764:        break;
                    765: 
                    766:       case HT_ERROR:
                    767:        if (HTImProxy)
                    768:            HTErrorMsg(request);                     /* Only on a real error */
                    769:        if (PROT_TRACE) {
1.72      frystyk   770:            fprintf(TDEST, "HTAccess.... ERROR: Can't access `%s\'\n", uri);
1.61      frystyk   771:        }
                    772:        break;
                    773: 
                    774:       default:
                    775:        if (PROT_TRACE) {
1.67      frystyk   776:            fprintf(TDEST, "HTAccess.... **** Internal software error in CERN WWWLib version %s ****\n", HTLibraryVersion);
                    777:            fprintf(TDEST, "............ Please mail libwww@info.cern.ch quoting what software\n");
                    778:            fprintf(TDEST, "............ and version you are using including the URL:\n");
                    779:            fprintf(TDEST, "............ `%s\'\n", uri);
                    780:            fprintf(TDEST, "............ that caused the problem, thanks!\n");
1.61      frystyk   781:        }
                    782:        break;
                    783:     }
                    784:     free(uri);
                    785:     return YES;
                    786: }
                    787: 
                    788: 
1.2       timbl     789: /*             Load a document - with logging etc
                    790: **             ----------------------------------
                    791: **
                    792: **     - Checks or documents already loaded
                    793: **     - Logs the access
                    794: **     - Trace ouput and error messages
                    795: **
1.1       timbl     796: **    On Entry,
1.19      timbl     797: **        request->anchor      valid for of the document to be accessed.
                    798: **      request->childAnchor   optional anchor within doc to be selected
                    799: **
1.15      timbl     800: **       request->anchor   is the node_anchor for the document
                    801: **       request->output_format is valid
                    802: **
1.59      frystyk   803: ** On exit,
                    804: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    805: **                     HT_ERROR        Error has occured
                    806: **                     HT_LOADED       Success
                    807: **                     HT_NO_DATA      Success, but no document loaded.
                    808: **                                     (telnet sesssion started etc)
1.72      frystyk   809: **                     HT_RETRY        if service isn't available before
                    810: **                                     request->retry_after
1.1       timbl     811: */
1.59      frystyk   812: PRIVATE int HTLoadDocument ARGS2(HTRequest *,  request,
                    813:                                 BOOL,          keep_error_stack)
1.1       timbl     814: 
                    815: {
                    816:     int                status;
                    817:     HText *    text;
1.19      timbl     818:     char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
1.54      frystyk   819: 
1.67      frystyk   820:     if (PROT_TRACE) fprintf (TDEST, "HTAccess.... Loading document %s\n",
1.59      frystyk   821:                             full_address);
1.1       timbl     822: 
1.18      timbl     823:     request->using_cache = NULL;
                    824:     
1.15      timbl     825:     if (!request->output_format) request->output_format = WWW_PRESENT;
1.25      frystyk   826: 
1.67      frystyk   827:     /* Check if document is already loaded or in cache */
1.70      frystyk   828:     if (!request->ForceReload) {
1.67      frystyk   829:        if ((text=(HText *)HTAnchor_document(request->anchor))) {
                    830:            if (PROT_TRACE)
                    831:                fprintf(TDEST, "HTAccess.... Document already in memory.\n");
                    832:            if (request->childAnchor) {
                    833:                HText_selectAnchor(text, request->childAnchor);
                    834:            } else {
                    835:                HText_select(text);     
                    836:            }
                    837:            free(full_address);
                    838:            return HT_LOADED;
1.19      timbl     839:        }
1.67      frystyk   840:        
                    841:        /* Check the Cache */
                    842:        /* Bug: for each format, we only check whether it is ok, we
                    843:           don't check them all and chose the best */
                    844:        if (request->anchor->cacheItems) {
                    845:            HTList * list = request->anchor->cacheItems;
                    846:            HTList * cur = list;
                    847:            HTCacheItem * item;
                    848:            while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
                    849:                HTStream * s;
                    850:                request->using_cache = item;
                    851:                s = HTStreamStack(item->format, request->output_format,
                    852:                                  request->output_stream, request, NO);
                    853:                if (s) {        /* format was suitable */
                    854:                    FILE * fp = fopen(item->filename, "r");
                    855:                    if (PROT_TRACE) 
1.70      frystyk   856:                        fprintf(TDEST, "Cache....... HIT file %s for %s\n",
1.67      frystyk   857:                                item->filename, 
                    858:                                full_address);
                    859:                    if (fp) {
                    860:                        HTFileCopy(fp, s);
                    861:                        (*s->isa->_free)(s); /* close up pipeline */
                    862:                        fclose(fp);
                    863:                        free(full_address);
                    864:                        return HT_LOADED;
                    865:                    } else {
                    866:                        fprintf(TDEST, "***** Can't read cache file %s !\n",
                    867:                                item->filename);
                    868:                    } /* file open ok */
                    869:                } /* stream ok */
                    870:            } /* next cache item */
                    871:        } /* if cache available for this anchor */
1.70      frystyk   872:     } else {                     /* Make sure that we don't use old headers */
                    873:        HTAnchor_clearHeader(request->anchor);
1.77      frystyk   874:        request->RequestMask += HT_PRAGMA;     /* Force reload through proxy */
1.1       timbl     875:     }
1.61      frystyk   876:     if ((status = HTLoad(request, keep_error_stack)) != HT_WOULD_BLOCK)
                    877:        HTLoadTerminate(request, status);
1.19      timbl     878:     free(full_address);
1.59      frystyk   879:     return status;
1.58      frystyk   880: }
1.1       timbl     881: 
                    882: 
                    883: /*             Load a document from absolute name
                    884: **             ---------------
                    885: **
1.59      frystyk   886: ** On Entry,
1.1       timbl     887: **        addr     The absolute address of the document to be accessed.
                    888: **
1.59      frystyk   889: ** On exit,
                    890: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    891: **                     HT_ERROR        Error has occured
                    892: **                     HT_LOADED       Success
                    893: **                     HT_NO_DATA      Success, but no document loaded.
                    894: **                                     (telnet sesssion started etc)
1.72      frystyk   895: **                     HT_RETRY        if service isn't available before
                    896: **                                     request->retry_after
1.1       timbl     897: */
1.59      frystyk   898: PUBLIC int HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2       timbl     899: {
1.19      timbl     900:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    901:    request->anchor = HTAnchor_parent(anchor);
                    902:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
                    903:                        NULL : (HTChildAnchor*) anchor;
1.52      frystyk   904:    return HTLoadDocument(request, NO);
1.2       timbl     905: }
                    906: 
                    907: 
                    908: /*             Load a document from absolute name to stream
                    909: **             --------------------------------------------
                    910: **
1.59      frystyk   911: ** On Entry,
1.2       timbl     912: **        addr     The absolute address of the document to be accessed.
1.15      timbl     913: **        request->output_stream     if non-NULL, send data down this stream
1.2       timbl     914: **
1.59      frystyk   915: ** On exit,
                    916: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    917: **                     HT_ERROR        Error has occured
                    918: **                     HT_LOADED       Success
                    919: **                     HT_NO_DATA      Success, but no document loaded.
                    920: **                                     (telnet sesssion started etc)
1.72      frystyk   921: **                     HT_RETRY        if service isn't available before
                    922: **                                     request->retry_after
1.2       timbl     923: */
1.59      frystyk   924: PUBLIC int HTLoadToStream ARGS3(CONST char *,  addr,
                    925:                                BOOL,           filter,
                    926:                                HTRequest*,     request)
1.1       timbl     927: {
1.63      frystyk   928:     HTAnchor * anchor = HTAnchor_findAddress(addr);
                    929:     request->anchor = HTAnchor_parent(anchor);
                    930:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
1.19      timbl     931:        (HTChildAnchor*) anchor;
1.15      timbl     932:     request->output_stream = request->output_stream;
1.52      frystyk   933:     return HTLoadDocument(request, NO);
1.1       timbl     934: }
                    935: 
                    936: 
                    937: /*             Load a document from relative name
                    938: **             ---------------
                    939: **
1.59      frystyk   940: ** On Entry,
1.2       timbl     941: **        relative_name     The relative address of the document
                    942: **                         to be accessed.
1.1       timbl     943: **
1.59      frystyk   944: ** On exit,
                    945: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    946: **                     HT_ERROR        Error has occured
                    947: **                     HT_LOADED       Success
                    948: **                     HT_NO_DATA      Success, but no document loaded.
                    949: **                                     (telnet sesssion started etc)
1.72      frystyk   950: **                     HT_RETRY        if service isn't available before
                    951: **                                     request->retry_after
1.1       timbl     952: */
1.59      frystyk   953: PUBLIC int HTLoadRelative ARGS3(CONST char *,          relative_name,
                    954:                                HTParentAnchor *,       here,
                    955:                                HTRequest *,            request)
1.1       timbl     956: {
                    957:     char *             full_address = 0;
1.65      frystyk   958:     int                result;
1.1       timbl     959:     char *             mycopy = 0;
                    960:     char *             stripped = 0;
                    961:     char *             current_address =
1.2       timbl     962:                                HTAnchor_address((HTAnchor*)here);
1.1       timbl     963: 
                    964:     StrAllocCopy(mycopy, relative_name);
                    965: 
                    966:     stripped = HTStrip(mycopy);
                    967:     full_address = HTParse(stripped,
                    968:                   current_address,
                    969:                   PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15      timbl     970:     result = HTLoadAbsolute(full_address, request);
1.1       timbl     971:     free(full_address);
                    972:     free(current_address);
                    973:     free(mycopy);  /* Memory leak fixed 10/7/92 -- JFG */
                    974:     return result;
                    975: }
                    976: 
                    977: 
                    978: /*             Load if necessary, and select an anchor
                    979: **             --------------------------------------
                    980: **
1.59      frystyk   981: ** On Entry,
1.1       timbl     982: **        destination              The child or parenet anchor to be loaded.
                    983: **
1.59      frystyk   984: ** On exit,
                    985: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    986: **                     HT_ERROR        Error has occured
                    987: **                     HT_LOADED       Success
                    988: **                     HT_NO_DATA      Success, but no document loaded.
                    989: **                                     (telnet sesssion started etc)
1.72      frystyk   990: **                     HT_RETRY        if service isn't available before
                    991: **                                     request->retry_after
1.1       timbl     992: */
1.59      frystyk   993: PUBLIC int HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1       timbl     994: {
1.70      frystyk   995:     if (!anchor || !request)
                    996:        return HT_ERROR;
                    997:     request->anchor = HTAnchor_parent(anchor);
1.59      frystyk   998:     request->childAnchor = ((HTAnchor *) request->anchor == anchor) ?
                    999:        NULL : (HTChildAnchor*) anchor;
                   1000:     return HTLoadDocument(request, NO);
                   1001: }
1.52      frystyk  1002: 
                   1003: 
                   1004: /*             Load if necessary, and select an anchor
                   1005: **             --------------------------------------
                   1006: **
                   1007: **     This function is almost identical to HTLoadAnchor, but it doesn't
                   1008: **     clear the error stack so that the information in there is kept.
                   1009: **
1.59      frystyk  1010: ** On Entry,
1.52      frystyk  1011: **        destination              The child or parenet anchor to be loaded.
                   1012: **
1.59      frystyk  1013: ** On exit,
                   1014: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1015: **                     HT_ERROR        Error has occured
                   1016: **                     HT_LOADED       Success
                   1017: **                     HT_NO_DATA      Success, but no document loaded.
                   1018: **                                     (telnet sesssion started etc)
1.72      frystyk  1019: **                     HT_RETRY        if service isn't available before
                   1020: **                                     request->retry_after
1.52      frystyk  1021: */
1.59      frystyk  1022: PUBLIC int HTLoadAnchorRecursive ARGS2(HTAnchor*,      anchor,
                   1023:                                       HTRequest *,     request)
1.52      frystyk  1024: {
1.59      frystyk  1025:     if (!anchor) return HT_ERROR;                                /* No link */
1.52      frystyk  1026:     
                   1027:     request->anchor  = HTAnchor_parent(anchor);
1.59      frystyk  1028:     request->childAnchor = ((HTAnchor *) request->anchor == anchor) ?
                   1029:        NULL : (HTChildAnchor*) anchor;
1.52      frystyk  1030:     
1.59      frystyk  1031:     return HTLoadDocument(request, YES);
                   1032: }
1.1       timbl    1033: 
                   1034: 
                   1035: /*             Search
                   1036: **             ------
                   1037: **  Performs a keyword search on word given by the user. Adds the keyword to 
                   1038: **  the end of the current address and attempts to open the new address.
                   1039: **
                   1040: **  On Entry,
                   1041: **       *keywords     space-separated keyword list or similar search list
1.2       timbl    1042: **     here            is anchor search is to be done on.
1.59      frystyk  1043: **
                   1044: ** On exit,
                   1045: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1046: **                     HT_ERROR        Error has occured
                   1047: **                     HT_LOADED       Success
                   1048: **                     HT_NO_DATA      Success, but no document loaded.
                   1049: **                                     (telnet sesssion started etc)
1.72      frystyk  1050: **                     HT_RETRY        if service isn't available before
                   1051: **                                     request->retry_after
1.1       timbl    1052: */
1.56      frystyk  1053: PRIVATE char hex ARGS1(int, i)
1.2       timbl    1054: {
1.13      timbl    1055:     char * hexchars = "0123456789ABCDEF";
                   1056:     return hexchars[i];
1.2       timbl    1057: }
1.1       timbl    1058: 
1.59      frystyk  1059: PUBLIC int HTSearch ARGS3(CONST char *,                keywords,
                   1060:                          HTParentAnchor *,     here,
                   1061:                          HTRequest *,          request)
1.1       timbl    1062: {
1.2       timbl    1063: 
                   1064: #define acceptable \
                   1065: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
                   1066: 
                   1067:     char *q, *u;
                   1068:     CONST char * p, *s, *e;            /* Pointers into keywords */
                   1069:     char * address = HTAnchor_address((HTAnchor*)here);
1.65      frystyk  1070:     int result;
1.56      frystyk  1071:     char * escaped = (char *) malloc(strlen(keywords)*3+1);
1.2       timbl    1072: 
1.29      frystyk  1073:     /* static CONST BOOL isAcceptable[96] = */
                   1074:     /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30      luotonen 1075:     static BOOL isAcceptable[96] =
1.2       timbl    1076:     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
                   1077:     {    0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,      /* 2x   !"#$%&'()*+,-./  */
                   1078:          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,      /* 3x  0123456789:;<=>?  */
                   1079:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 4x  @ABCDEFGHIJKLMNO  */
                   1080:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /* 5X  PQRSTUVWXYZ[\]^_  */
                   1081:         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 6x  `abcdefghijklmno  */
                   1082:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };     /* 7X  pqrstuvwxyz{\}~  DEL */
                   1083: 
                   1084:     if (escaped == NULL) outofmem(__FILE__, "HTSearch");
                   1085:     
1.29      frystyk  1086: /* Convert spaces to + and hex escape unacceptable characters */
1.2       timbl    1087: 
1.29      frystyk  1088:     for(s=keywords; *s && WHITE(*s); s++); /*scan */    /* Skip white space */
                   1089:     for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--);     /* Skip trailers */
                   1090:     for(q=escaped, p=s; p<e; p++) {                  /* scan stripped field */
1.2       timbl    1091:         int c = (int)TOASCII(*p);
                   1092:         if (WHITE(*p)) {
                   1093:            *q++ = '+';
1.29      frystyk  1094:        } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13      timbl    1095:            *q++ = *p;                  /* 930706 TBL for MVS bug */
1.2       timbl    1096:        } else {
                   1097:            *q++ = '%';
                   1098:            *q++ = hex(c / 16);
                   1099:            *q++ = hex(c % 16);
                   1100:        }
                   1101:     } /* Loop over string */
1.1       timbl    1102:     
1.2       timbl    1103:     *q=0;
                   1104:                                /* terminate escaped sctring */
                   1105:     u=strchr(address, '?');            /* Find old search string */
                   1106:     if (u) *u = 0;                             /* Chop old search off */
1.1       timbl    1107: 
                   1108:     StrAllocCat(address, "?");
1.2       timbl    1109:     StrAllocCat(address, escaped);
                   1110:     free(escaped);
1.15      timbl    1111:     result = HTLoadRelative(address, here, request);
1.1       timbl    1112:     free(address);
1.2       timbl    1113:     
1.1       timbl    1114:     return result;
1.2       timbl    1115: }
                   1116: 
                   1117: 
                   1118: /*             Search Given Indexname
                   1119: **             ------
                   1120: **  Performs a keyword search on word given by the user. Adds the keyword to 
                   1121: **  the end of the current address and attempts to open the new address.
                   1122: **
1.59      frystyk  1123: ** On Entry,
1.2       timbl    1124: **       *keywords     space-separated keyword list or similar search list
                   1125: **     *addres         is name of object search is to be done on.
1.59      frystyk  1126: ** On exit,
                   1127: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1128: **                     HT_ERROR        Error has occured
                   1129: **                     HT_LOADED       Success
                   1130: **                     HT_NO_DATA      Success, but no document loaded.
                   1131: **                                     (telnet sesssion started etc)
1.72      frystyk  1132: **                     HT_RETRY        if service isn't available before
                   1133: **                                     request->retry_after
1.2       timbl    1134: */
1.59      frystyk  1135: PUBLIC int HTSearchAbsolute ARGS3(CONST char *,        keywords,
                   1136:                                  CONST char *,         indexname,
                   1137:                                  HTRequest *,          request)
1.2       timbl    1138: {
                   1139:     HTParentAnchor * anchor =
                   1140:        (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15      timbl    1141:     return HTSearch(keywords, anchor, request);
1.57      howcome  1142: }
                   1143: 
1.70      frystyk  1144: /* --------------------------------------------------------------------------*/
                   1145: /*                             Document Poster                              */
                   1146: /* --------------------------------------------------------------------------*/
                   1147: 
                   1148: /*             Get a save stream for a document
                   1149: **             --------------------------------
                   1150: */
                   1151: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
                   1152: {
                   1153:     HTProtocol * p;
                   1154:     int status;
                   1155:     request->method = METHOD_PUT;
                   1156:     status = get_physical(request);
                   1157:     if (status == HT_FORBIDDEN) {
                   1158:        char *url = HTAnchor_address((HTAnchor *) request->anchor);
                   1159:        if (url) {
                   1160:            HTUnEscape(url);
                   1161:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                   1162:                       (void *) url, (int) strlen(url), "HTLoad");
                   1163:            free(url);
                   1164:        } else {
                   1165:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                   1166:                       NULL, 0, "HTLoad");
                   1167:        }
                   1168:        return NULL;    /* should return error status? */
                   1169:     }
                   1170:     if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
                   1171:     
                   1172:     p = (HTProtocol *) HTAnchor_protocol(request->anchor);
                   1173:     if (!p) return NULL;
                   1174:     
                   1175:     return (*p->saveStream)(request);
                   1176:     
                   1177: }
                   1178: 
                   1179: /*     COPY AN ANCHOR
                   1180: **     --------------
                   1181: **  Fetch the URL (possibly local file URL) and send it using either PUT
                   1182: **  or POST to the remote destination using HTTP. The caller can decide the
                   1183: **  exact method used and which HTTP header fields to transmit by setting the
                   1184: **  user fields in the request structure.
                   1185: **
                   1186: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1187: **                     HT_ERROR        Error has occured
                   1188: **                     HT_LOADED       Success
                   1189: **                     HT_NO_DATA      Success, but no document loaded.
1.72      frystyk  1190: **                     HT_RETRY        if service isn't available before
                   1191: **                                     request->retry_after
1.70      frystyk  1192: */
1.78    ! frystyk  1193: PUBLIC int HTCopyAnchor ARGS3(HTAnchor *,      src_anchor,
1.70      frystyk  1194:                              HTParentAnchor *, dest_anchor,
                   1195:                              HTRequest *,      dest_req)
                   1196: {
1.78    ! frystyk  1197:     HTRequest *src_req;
        !          1198:     if (!(src_anchor && dest_anchor && dest_req))
1.70      frystyk  1199:        return HT_ERROR;
                   1200: 
                   1201:     if (!(dest_anchor->methods & dest_req->method)) {
                   1202:        char buf[80];
                   1203:        sprintf(buf, "It might not be allowed to %s to this destination, continue?", HTMethod_name(dest_req->method));
                   1204:        if (!HTConfirm(buf))
                   1205:            return HT_ERROR;
                   1206:     }
                   1207: 
1.78    ! frystyk  1208:     /* Get an internal HTRequest structure to handle the source part */
        !          1209:     src_req = HTRequest_new();
        !          1210:     HTAnchor_clearHeader((HTParentAnchor *) src_anchor);
        !          1211:     src_req->ForceReload = YES;
        !          1212: 
        !          1213:     /* Mark the source request so that there only is one HTThreadTerminate()
        !          1214:        function call handling the outcome of the destination request */
        !          1215:     src_req->Source = YES;
        !          1216: 
        !          1217:     /* Use SOURCE but at some point we can introduce format conversion here! */
        !          1218:     src_req->output_format = WWW_SOURCE;
        !          1219: 
        !          1220: #ifdef NO_UNIX_IO
        !          1221:     {
        !          1222:        char * addr = HTAnchor_address((HTAnchor *) src_anchor);
        !          1223:        char *access = HTParse(addr, "", PARSE_ACCESS);
        !          1224:        if (*access && !strcmp(access, "file"))
        !          1225:            dest_req->BlockingIO = YES;
        !          1226:        free(addr);
        !          1227:        free(access);
        !          1228:     }
        !          1229: #endif
        !          1230: 
        !          1231:     dest_req->GenMask += HT_DATE;                       /* Send date header */
        !          1232:     dest_req->CopyRequest = src_req;
        !          1233: 
1.70      frystyk  1234:     /* First open the destination then open the source */
                   1235:     if (HTLoadAnchor((HTAnchor *) dest_anchor, dest_req) != HT_ERROR) {
1.78    ! frystyk  1236:        int status;
        !          1237:        /*
        !          1238:        ** Now make the link between the two request structures. First setup
        !          1239:        ** the output stream of the source so that data get redirected to
        !          1240:        ** the destination. Then set up the call back function so that
        !          1241:        ** the destination can call for more data
        !          1242:        */
1.70      frystyk  1243:        src_req->output_stream = dest_req->input_stream;
                   1244:        dest_req->PostCallBack = HTSocketRead;
1.78    ! frystyk  1245:        if ((status = HTLoadAnchor(src_anchor, src_req)) != HT_LOADED)
        !          1246:            return status;
1.70      frystyk  1247:     }
                   1248:     return HT_ERROR;
                   1249: }
                   1250: 
                   1251: 
                   1252: /*     UPLOAD AN ANCHOR
                   1253: **     ----------------
                   1254: **  Send the contents (in hyperdoc) of the source anchor using either PUT
                   1255: **  or POST to the remote destination using HTTP. The caller can decide the
                   1256: **  exact method used and which HTTP header fields to transmit by setting the
                   1257: **  user fields in the request structure.
                   1258: **
                   1259: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1260: **                     HT_ERROR        Error has occured
                   1261: **                     HT_LOADED       Success
                   1262: **                     HT_NO_DATA      Success, but no document loaded.
1.72      frystyk  1263: **                     HT_RETRY        if service isn't available before
                   1264: **                                     request->retry_after
1.70      frystyk  1265: */
                   1266: PUBLIC int HTUploadAnchor ARGS3(HTAnchor *,            src_anchor,
                   1267:                                HTParentAnchor *,       dest_anchor,
                   1268:                                HTRequest *,            dest_req)
                   1269: {
                   1270:     if (!(src_anchor && dest_anchor && dest_req))
                   1271:        return HT_ERROR;
                   1272: 
                   1273:     if (!(dest_anchor->methods & dest_req->method)) {
                   1274:        char buf[80];
                   1275:        sprintf(buf, "It might not be allowed to %s to this destination, continue?", HTMethod_name(dest_req->method));
                   1276:        if (!HTConfirm(buf))
                   1277:            return HT_ERROR;
                   1278:     }
1.77      frystyk  1279: 
                   1280:     /* @@@ NOT FINISHED @@@ */
1.70      frystyk  1281: 
                   1282:     return HT_ERROR;
                   1283: }
                   1284: 
                   1285: /* --------------------------------------------------------------------------*/
                   1286: /*                             Anchor help routines                         */
                   1287: /* --------------------------------------------------------------------------*/
1.57      howcome  1288: 
                   1289: /*
                   1290: **             Find Related Name
                   1291: **
                   1292: **  Creates a string that can be used as a related name when 
                   1293: **  calling HTParse initially. 
                   1294: **  
                   1295: **  The code for this routine originates from the Linemode 
                   1296: **  browser and was moved here by howcome@dxcern.cern.ch
                   1297: **  in order for all clients to take advantage.
                   1298: **
1.59      frystyk  1299: **  The string returned must be freed by the caller
1.57      howcome  1300: */
                   1301: PUBLIC char * HTFindRelatedName NOARGS
                   1302: {
1.59      frystyk  1303:     char* default_default = NULL;            /* Parse home relative to this */
                   1304:     CONST char *host = HTGetHostName(); 
1.57      howcome  1305:     StrAllocCopy(default_default, "file://");
1.59      frystyk  1306:     if (host)
                   1307:        StrAllocCat(default_default, host);
                   1308:     else
                   1309:        StrAllocCat(default_default, "localhost");
                   1310:     {
                   1311:        char wd[HT_MAX_PATH+1];
1.67      frystyk  1312: 
                   1313: #ifdef NO_GETWD
                   1314: #ifdef HAS_GETCWD            /* System V variant SIGN CHANGED TBL 921006 !! */
                   1315:        char *result = (char *) getcwd(wd, sizeof(wd)); 
                   1316: #else
                   1317:        char *result = NULL;
                   1318:        HTAlert("This platform does not support neither getwd nor getcwd\n");
                   1319: #endif
                   1320: #else
                   1321:        char *result = (char *) getwd(wd);
                   1322: #endif
1.59      frystyk  1323:        *(wd+HT_MAX_PATH) = '\0';
1.57      howcome  1324:        if (result) {
                   1325: #ifdef VMS 
                   1326:             /* convert directory name to Unix-style syntax */
                   1327:            char * disk = strchr (wd, ':');
                   1328:            char * dir = strchr (wd, '[');
                   1329:            if (disk) {
                   1330:                *disk = '\0';
                   1331:                StrAllocCat (default_default, "/");  /* needs delimiter */
                   1332:                StrAllocCat (default_default, wd);
                   1333:            }
                   1334:            if (dir) {
                   1335:                char *p;
                   1336:                *dir = '/';  /* Convert leading '[' */
                   1337:                for (p = dir ; *p != ']'; ++p)
                   1338:                        if (*p == '.') *p = '/';
                   1339:                *p = '\0';  /* Cut on final ']' */
                   1340:                StrAllocCat (default_default, dir);
                   1341:            }
1.74      frystyk  1342: #else  /* not VMS */
1.70      frystyk  1343: #ifdef WIN32
                   1344:            char * p = wd ;     /* a colon */
                   1345:            StrAllocCat(default_default, "/");
                   1346:            while( *p != 0 ) { 
                   1347:                if (*p == '\\')                  /* change to one true slash */
                   1348:                    *p = '/' ;
                   1349:                p++;
                   1350:            }
1.74      frystyk  1351:            StrAllocCat( default_default, wd);
                   1352: #else /* not WIN32 */
1.57      howcome  1353:            StrAllocCat (default_default, wd);
1.70      frystyk  1354: #endif /* not WIN32 */
1.67      frystyk  1355: #endif /* not VMS */
1.57      howcome  1356:        }
1.67      frystyk  1357:     }
1.57      howcome  1358:     StrAllocCat(default_default, "/default.html");
                   1359:     return default_default;
1.2       timbl    1360: }
                   1361: 
                   1362: 
                   1363: /*             Generate the anchor for the home page
                   1364: **             -------------------------------------
                   1365: **
                   1366: **     As it involves file access, this should only be done once
                   1367: **     when the program first runs.
1.10      timbl    1368: **     This is a default algorithm -- browser don't HAVE to use this.
                   1369: **     But consistency betwen browsers is STRONGLY recommended!
1.2       timbl    1370: **
1.10      timbl    1371: **     Priority order is:
                   1372: **
                   1373: **             1       WWW_HOME environment variable (logical name, etc)
                   1374: **             2       ~/WWW/default.html
                   1375: **             3       /usr/local/bin/default.html
1.70      frystyk  1376: **             4       http://www.w3.org/default.html
1.10      timbl    1377: **
1.2       timbl    1378: */
                   1379: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
                   1380: {
1.12      timbl    1381:     char * my_home_document = NULL;
1.70      frystyk  1382:     char * home = (char *) getenv(LOGICAL_DEFAULT);
1.2       timbl    1383:     char * ref;
                   1384:     HTParentAnchor * anchor;
1.1       timbl    1385:     
1.70      frystyk  1386:     /* Someone telnets in, they get a special home */
1.12      timbl    1387:     if (home) {
                   1388:         StrAllocCopy(my_home_document, home);
1.70      frystyk  1389:     } else  if (HTClientHost) {                                    /* Telnet server */
1.12      timbl    1390:        FILE * fp = fopen(REMOTE_POINTER, "r");
                   1391:        char * status;
                   1392:        if (fp) {
1.59      frystyk  1393:            my_home_document = (char*) malloc(HT_MAX_PATH);
                   1394:            status = fgets(my_home_document, HT_MAX_PATH, fp);
1.12      timbl    1395:            if (!status) {
                   1396:                free(my_home_document);
                   1397:                my_home_document = NULL;
                   1398:            }
                   1399:            fclose(fp);
                   1400:        }
                   1401:        if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
                   1402:     }
                   1403: 
1.67      frystyk  1404: #ifdef unix
1.10      timbl    1405:     if (!my_home_document) {
                   1406:        FILE * fp = NULL;
1.70      frystyk  1407:        char * home = (char *) getenv("HOME");
1.10      timbl    1408:        if (home) { 
                   1409:            my_home_document = (char *)malloc(
                   1410:                strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
                   1411:            if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
                   1412:            sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
                   1413:            fp = fopen(my_home_document, "r");
                   1414:        }
                   1415:        
                   1416:        if (!fp) {
                   1417:            StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
                   1418:            fp = fopen(my_home_document, "r");
                   1419:        }
1.2       timbl    1420:        if (fp) {
                   1421:            fclose(fp);
                   1422:        } else {
1.62      frystyk  1423:            if (TRACE)
1.67      frystyk  1424:                fprintf(TDEST,
1.62      frystyk  1425:                        "HTBrowse: No local home document ~/%s or %s\n",
                   1426:                        PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11      timbl    1427:            free(my_home_document);
                   1428:            my_home_document = NULL;
1.2       timbl    1429:        }
                   1430:     }
1.67      frystyk  1431: #endif
1.70      frystyk  1432:     ref = HTParse(my_home_document ? my_home_document :
                   1433:                  HTClientHost ? REMOTE_ADDRESS : LAST_RESORT, "file:",
                   1434:                  PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10      timbl    1435:     if (my_home_document) {
1.62      frystyk  1436:        if (TRACE)
1.67      frystyk  1437:            fprintf(TDEST,
1.62      frystyk  1438:                   "HTAccess.... `%s\' used for custom home page as\n`%s\'\n",
                   1439:                    my_home_document, ref);
1.10      timbl    1440:        free(my_home_document);
1.2       timbl    1441:     }
                   1442:     anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
                   1443:     free(ref);
                   1444:     return anchor;
1.1       timbl    1445: }
1.26      frystyk  1446: 
                   1447: 
                   1448: /*             Bind an Anchor to the request structure
                   1449: **             ---------------------------------------
                   1450: **
                   1451: **    On Entry,
                   1452: **     anchor          The child or parenet anchor to be binded
                   1453: **     request         The request sturcture
                   1454: **    On Exit,
                   1455: **        returns    YES     Success
                   1456: **                   NO      Failure 
                   1457: **
                   1458: **  Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
                   1459: **                                             Henrik Frystyk 17/02-94
                   1460: */
                   1461: 
                   1462: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
                   1463: {
                   1464:     if (!anchor) return NO;    /* No link */
                   1465:     
                   1466:     request->anchor  = HTAnchor_parent(anchor);
                   1467:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                   1468:                                        : (HTChildAnchor*) anchor;
                   1469:        
1.29      frystyk  1470:     return YES;
1.70      frystyk  1471: }
1.59      frystyk  1472: 
1.26      frystyk  1473: 

Webmaster