Annotation of libwww/Library/src/HTAccess.c, revision 1.70

1.61      frystyk     1: /*                                                                  HTAccess.c
                      2: **     ACCESS MANAGER
                      3: **
                      4: **     (c) COPYRIGHT CERN 1994.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
1.1       timbl       6: **
                      7: ** Authors
                      8: **     TBL     Tim Berners-Lee timbl@info.cern.ch
1.4       timbl       9: **     JFG     Jean-Francois Groff jfg@dxcern.cern.ch
1.1       timbl      10: **     DD      Denis DeLaRoca (310) 825-4580  <CSP1DWD@mvs.oac.ucla.edu>
                     11: ** History
                     12: **       8 Jun 92 Telnet hopping prohibited as telnet is not secure TBL
                     13: **     26 Jun 92 When over DECnet, suppressed FTP, Gopher and News. JFG
1.42      frystyk    14: **      6 Oct 92 Moved HTClientHost and HTlogfile into here. TBL
1.1       timbl      15: **     17 Dec 92 Tn3270 added, bug fix. DD
1.2       timbl      16: **      4 Feb 93 Access registration, Search escapes bad chars TBL
1.9       timbl      17: **               PARAMETERS TO HTSEARCH AND HTLOADRELATIVE CHANGED
                     18: **     28 May 93 WAIS gateway explicit if no WAIS library linked in.
1.19      timbl      19: **        Dec 93 Bug change around, more reentrant, etc
1.42      frystyk    20: **     09 May 94 logfile renamed to HTlogfile to avoid clash with WAIS
1.53      duns       21: **      8 Jul 94 Insulate free() from _free structure element.
1.2       timbl      22: ** Bugs
                     23: **     This module assumes that that the graphic object is hypertext, as it
1.9       timbl      24: **     needs to select it when it has been loaded.  A superclass needs to be
1.2       timbl      25: **     defined which accepts select and select_anchor.
1.1       timbl      26: */
                     27: 
1.68      frystyk    28: #if !defined(HT_DIRECT_WAIS) && !defined(HT_DEFAULT_WAIS_GATEWAY)
                     29: #define HT_DEFAULT_WAIS_GATEWAY "http://www.w3.org:8001/"
1.54      frystyk    30: #endif
1.8       timbl      31: 
1.67      frystyk    32: /* Library include files */
                     33: #include "tcp.h"
                     34: #include "HTUtils.h"
1.1       timbl      35: #include "HTParse.h"
1.4       timbl      36: #include "HTML.h"              /* SCW */
1.2       timbl      37: #include "HTList.h"
                     38: #include "HText.h"     /* See bugs above */
                     39: #include "HTAlert.h"
1.67      frystyk    40: #include "HTFWrite.h"  /* for cache stuff */
1.70    ! frystyk    41: #include "HTLog.h"
1.17      timbl      42: #include "HTTee.h"
1.46      frystyk    43: #include "HTError.h"
1.67      frystyk    44: #include "HTString.h"
1.57      howcome    45: #include "HTTCP.h"      /* HWL: for HTFindRelatedName */
1.63      frystyk    46: #include "HTFile.h"
1.59      frystyk    47: #include "HTThread.h"
1.63      frystyk    48: #include "HTEvent.h"
1.70    ! frystyk    49: #include "HTInit.h"
1.67      frystyk    50: #ifndef NO_RULES
                     51: #include "HTRules.h"
                     52: #endif
                     53: #include "HTAccess.h"                                   /* Implemented here */
1.2       timbl      54: 
1.54      frystyk    55: /* These flags may be set to modify the operation of this module */
                     56: PUBLIC char * HTClientHost = 0;                 /* Name of remote login host if any */
1.70    ! frystyk    57: PUBLIC BOOL HTSecure = NO;              /* Disable access for telnet users? */
1.41      luotonen   58: 
1.43      luotonen   59: PUBLIC char * HTImServer = NULL;/* cern_httpd sets this to the translated URL*/
1.70    ! frystyk    60: PUBLIC BOOL HTImProxy = NO;                       /* cern_httpd as a proxy? */
1.1       timbl      61: 
1.70    ! frystyk    62: PRIVATE HTList * protocols = NULL;           /* List of registered protocols */
1.43      luotonen   63: 
1.63      frystyk    64: /* Superclass defn */
1.24      timbl      65: struct _HTStream {
                     66:        HTStreamClass * isa;
                     67:        /* ... */
                     68: };
                     69: 
1.59      frystyk    70: /* --------------------------------------------------------------------------*/
                     71: /*                     Management of the HTRequest structure                */
                     72: /* --------------------------------------------------------------------------*/
                     73: 
1.15      timbl      74: /*     Create  a request structure
                     75: **     ---------------------------
                     76: */
                     77: PUBLIC HTRequest * HTRequest_new NOARGS
                     78: {
1.28      luotonen   79:     HTRequest * me = (HTRequest*) calloc(1, sizeof(*me));  /* zero fill */
1.15      timbl      80:     if (!me) outofmem(__FILE__, "HTRequest_new()");
                     81:     
1.70    ! frystyk    82:     me->conversions    = HTList_new();     /* No conversions registered yet */
        !            83:     me->output_format  = WWW_PRESENT;      /* default it to present to user */
        !            84:     me->HeaderMask     = DEFAULT_HEADERS;             /* Send these headers */
        !            85:     me->EntityMask     = DEFAULT_ENTITY_HEADERS;              /* Also these */
1.15      timbl      86:     return me;
                     87: }
                     88: 
                     89: 
1.49      frystyk    90: /*     Clear  a request structure
                     91: **     ---------------------------
                     92: **     This function clears the reguest structure so that only the
                     93: **     conversions remain. Everything else is as if it was created from
                     94: **     scratch.
                     95: */
                     96: PUBLIC void HTRequest_clear ARGS1(HTRequest *, req)
                     97: {
                     98:     HTList *conversions;
                     99:     if (!req) {
                    100:        if (TRACE)
1.67      frystyk   101:            fprintf(TDEST, "Clear....... request: Bad argument!\n");
1.49      frystyk   102:        return;
                    103:     }
                    104:     conversions = req->conversions;                 /* Save the conversions */
                    105:     HTErrorFree(req);
                    106:     HTAACleanup(req);
                    107:     memset(req, '\0', sizeof(HTRequest));
                    108: 
                    109:     /* Now initialize as from scratch but with the old list of conversions */
                    110:     req->conversions = conversions;
                    111:     req->output_format = WWW_PRESENT;      /* default it to present to user */
                    112: }
                    113: 
                    114: 
1.20      luotonen  115: /*     Delete a request structure
                    116: **     --------------------------
                    117: */
                    118: PUBLIC void HTRequest_delete ARGS1(HTRequest *, req)
                    119: {
                    120:     if (req) {
1.59      frystyk   121:        FREE(req->redirect);
                    122:        FREE(req->authenticate);
                    123:        HTFormatDelete(req);
1.46      frystyk   124:        HTErrorFree(req);
1.34      frystyk   125:        HTAACleanup(req);
1.61      frystyk   126: 
                    127:        /* These are temporary until we get a MIME thingy */
                    128:        FREE(req->redirect);
                    129:        FREE(req->WWWAAScheme);
                    130:        FREE(req->WWWAARealm);
                    131:        FREE(req->WWWprotection);
                    132: 
1.34      frystyk   133:        FREE(req);
1.20      luotonen  134:     }
                    135: }
                    136: 
1.59      frystyk   137: /* --------------------------------------------------------------------------*/
                    138: /*                     Management of HTTP Methods                           */
                    139: /* --------------------------------------------------------------------------*/
1.20      luotonen  140: 
1.70    ! frystyk   141: static char *method_names[] =
1.22      luotonen  142: {
                    143:     "INVALID-METHOD",
                    144:     "GET",
                    145:     "HEAD",
                    146:     "POST",
                    147:     "PUT",
                    148:     "DELETE",
                    149:     "LINK",
                    150:     "UNLINK",
                    151:     NULL
                    152: };
                    153: 
                    154: /*     Get method enum value
                    155: **     ---------------------
                    156: */
1.70    ! frystyk   157: PUBLIC HTMethod HTMethod_enum ARGS1(CONST char *, name)
1.22      luotonen  158: {
                    159:     if (name) {
1.70    ! frystyk   160:        if (!strcmp(name, *(method_names+1)))
        !           161:            return METHOD_GET;
        !           162:        else if (!strcmp(name, *(method_names+2)))
        !           163:            return METHOD_HEAD;
        !           164:        else if (!strcmp(name, *(method_names+3)))
        !           165:            return METHOD_POST;
        !           166:        else if (!strcmp(name, *(method_names+4)))
        !           167:            return METHOD_PUT;
        !           168:        else if (!strcmp(name, *(method_names+5)))
        !           169:            return METHOD_DELETE;
        !           170:        else if (!strcmp(name, *(method_names+6)))
        !           171:            return METHOD_LINK;
        !           172:        else if (!strcmp(name, *(method_names+7)))
        !           173:            return METHOD_UNLINK;
1.22      luotonen  174:     }
                    175:     return METHOD_INVALID;
                    176: }
                    177: 
                    178: 
                    179: /*     Get method name
                    180: **     ---------------
1.70    ! frystyk   181: **     Returns pointer to entry in static table in memory
1.22      luotonen  182: */
1.70    ! frystyk   183: PUBLIC CONST char * HTMethod_name ARGS1(HTMethod, method)
1.22      luotonen  184: {
1.70    ! frystyk   185:     if (method & METHOD_GET)
        !           186:        return *(method_names+1);
        !           187:     else if (method == METHOD_HEAD)
        !           188:        return *(method_names+2);
        !           189:     else if (method == METHOD_POST)
        !           190:        return *(method_names+3);
        !           191:     else if (method == METHOD_PUT)
        !           192:        return *(method_names+4);
        !           193:     else if (method == METHOD_DELETE)
        !           194:        return *(method_names+5);
        !           195:     else if (method == METHOD_LINK)
        !           196:        return *(method_names+6);
        !           197:     else if (method == METHOD_UNLINK)
        !           198:        return *(method_names+7);
        !           199:     else
        !           200:        return *method_names;
        !           201: #if 0
        !           202:     if ((int)METHOD_INVALID  && (int)method < (int)MAX_METHODS)
1.22      luotonen  203:        return method_names[(int)method];
                    204:     else
                    205:        return method_names[(int)METHOD_INVALID];
1.70    ! frystyk   206: #endif
1.22      luotonen  207: }
                    208: 
                    209: 
1.70    ! frystyk   210: #if 0
        !           211: /* NOT NEEDED AS METHODS IS NOT A BIT-FLAG */
1.22      luotonen  212: /*     Is method in a list of method names?
                    213: **     -----------------------------------
                    214: */
                    215: PUBLIC BOOL HTMethod_inList ARGS2(HTMethod,    method,
                    216:                                  HTList *,     list)
                    217: {
                    218:     char * method_name = HTMethod_name(method);
                    219:     HTList *cur = list;
                    220:     char *item;
                    221: 
                    222:     while (NULL != (item = (char*)HTList_nextObject(cur))) {
1.67      frystyk   223:        if (PROT_TRACE)
                    224:            fprintf(TDEST, " %s", item);
1.22      luotonen  225:        if (0==strcasecomp(item, method_name))
                    226:            return YES;
                    227:     }
                    228:     return NO; /* Not found */
                    229: }
1.70    ! frystyk   230: #endif
1.22      luotonen  231: 
1.59      frystyk   232: /* --------------------------------------------------------------------------*/
                    233: /*                   Management of the HTProtocol structure                 */
                    234: /* --------------------------------------------------------------------------*/
1.22      luotonen  235: 
1.63      frystyk   236: /*
                    237: **     Register a Protocol as an active access method
1.1       timbl     238: */
1.56      frystyk   239: PUBLIC BOOL HTRegisterProtocol ARGS1(HTProtocol *, protocol)
1.1       timbl     240: {
                    241:     if (!protocols) protocols = HTList_new();
1.59      frystyk   242:     HTList_addObject(protocols, (void *) protocol);
1.1       timbl     243:     return YES;
                    244: }
                    245: 
1.63      frystyk   246: 
                    247: /*
                    248: **     Delete the list of registered access methods. This is called from
                    249: **     within HTLibTerminate. Written by Eric Sink, eric@spyglass.com
                    250: */
                    251: PUBLIC void HTDisposeProtocols NOARGS
                    252: {
                    253:     if (protocols) {
                    254:        HTList_delete(protocols);
                    255:        protocols = NULL;
                    256:     }
                    257: }
                    258: 
                    259: 
                    260: /*
1.65      frystyk   261: **     Is a protocol registered as BLOCKING? The default behavior registered
                    262: **     when the protocol module was registered can be overridden by the
                    263: **     BlockingIO field in the HTRequest structure
1.63      frystyk   264: */
1.59      frystyk   265: PUBLIC BOOL HTProtocolBlocking ARGS1(HTRequest *, me)
                    266: {
1.65      frystyk   267:     if (me) {
                    268:        return (me->BlockingIO || (me->anchor && me->anchor->protocol &&
                    269:                ((HTProtocol *) (me->anchor->protocol))->block == SOC_BLOCK));
                    270:     }
                    271:     return NO;
1.59      frystyk   272: }
                    273: 
1.61      frystyk   274: /* --------------------------------------------------------------------------*/
                    275: /*                Initialization and Termination of the Library             */
                    276: /* --------------------------------------------------------------------------*/
                    277: 
1.1       timbl     278: /*     Register all known protocols
                    279: **     ----------------------------
                    280: **
1.61      frystyk   281: **     Add to or subtract from this list if you add or remove protocol
                    282: **     modules. This function is called from HTLibInit()
1.1       timbl     283: **
1.70    ! frystyk   284: **     Compiling with HT_NO_INIT prevents all known protocols from being
        !           285: **     force in at link time.
1.1       timbl     286: */
1.70    ! frystyk   287: #ifndef HT_NO_INIT
1.61      frystyk   288: PRIVATE void HTAccessInit NOARGS
1.1       timbl     289: {
1.59      frystyk   290:     GLOBALREF HTProtocol HTTP, HTFile, HTTelnet, HTTn3270, HTRlogin;
1.1       timbl     291: #ifndef DECNET
1.54      frystyk   292: #ifdef NEW_CODE
1.59      frystyk   293:     GLOBALREF  HTProtocol HTFTP, HTNews, HTNNTP, HTGopher;
                    294: #endif
                    295:     GLOBALREF  HTProtocol HTFTP, HTNews, HTGopher;
1.68      frystyk   296: #ifdef HT_DIRECT_WAIS
1.59      frystyk   297:     GLOBALREF  HTProtocol HTWAIS;
1.54      frystyk   298: #endif
1.42      frystyk   299: 
1.2       timbl     300:     HTRegisterProtocol(&HTFTP);
                    301:     HTRegisterProtocol(&HTNews);
1.54      frystyk   302: #ifdef NEW_CODE
                    303:     HTRegisterProtocol(&HTNNTP);
                    304: #endif
1.2       timbl     305:     HTRegisterProtocol(&HTGopher);
1.42      frystyk   306: 
1.68      frystyk   307: #ifdef HT_DIRECT_WAIS
1.3       timbl     308:     HTRegisterProtocol(&HTWAIS);
                    309: #endif
1.1       timbl     310: 
1.54      frystyk   311: #endif /* DECNET */
1.2       timbl     312:     HTRegisterProtocol(&HTTP);
                    313:     HTRegisterProtocol(&HTFile);
                    314:     HTRegisterProtocol(&HTTelnet);
                    315:     HTRegisterProtocol(&HTTn3270);
                    316:     HTRegisterProtocol(&HTRlogin);
1.1       timbl     317: }
1.70    ! frystyk   318: #endif /* !HT_NO_INIT */
1.61      frystyk   319: 
                    320: /*                                                                  HTLibInit
                    321: **
                    322: **     This function initiates the Library and it MUST be called when
                    323: **     starting up an application. See also HTLibTerminate()
                    324: */
                    325: PUBLIC BOOL HTLibInit NOARGS
                    326: {
1.67      frystyk   327: #ifdef NO_STDIO                                                  /* Open trace file */
                    328:     if ((TDEST = fopen(TRACE_FILE, "a")) != NULL) {
                    329:        if (setvbuf(TDEST, NULL, _IOLBF, 0) < 0) {  /* Change to line buffer */
1.70    ! frystyk   330:            printf("WWWLibInit.. Can't initialize TRACE buffer - no TRACE\n");
1.67      frystyk   331:            fclose(TDEST);
                    332:            TDEST = NULL;
                    333:            WWW_TraceFlag = 0;
                    334:        }
                    335:     } else
                    336:        WWW_TraceFlag = 0;
                    337: #endif
                    338: 
1.61      frystyk   339:     if (TRACE)
1.67      frystyk   340:        fprintf(TDEST, "WWWLibInit.. INITIALIZING LIBRARY OF COMMON CODE\n");
1.63      frystyk   341: 
1.70    ! frystyk   342: /* Shall we initialize the bindings between (access method, protocol module),
        !           343:    (file extension, media type)? */
        !           344: #ifndef HT_NO_INIT
        !           345:     HTAccessInit();             /* Bind access schemes and protocol modules */
        !           346:     HTFileInit();                   /* Bind file extensions and media types */
1.63      frystyk   347: #endif
1.61      frystyk   348: 
1.70    ! frystyk   349:     /* Put up a global conversion list, but leave initialization
        !           350:        to the application */
        !           351:     if (!HTConversions)
        !           352:        HTConversions = HTList_new();
        !           353: 
1.62      frystyk   354: #ifdef WWWLIB_SIG
1.61      frystyk   355:     /* On Solaris (and others?) we get a BROKEN PIPE signal when connecting
1.67      frystyk   356:     ** to a port where we should get `connection refused'. We ignore this 
1.61      frystyk   357:     ** using the following function call
                    358:     */
                    359:     HTSetSignal();                                /* Set signals in library */
1.1       timbl     360: #endif
                    361: 
1.67      frystyk   362: #ifdef _WINDOWS
                    363:     /*
                    364:     ** Initialise WinSock DLL. This must also be shut down! PMH
                    365:     */
                    366:     {
                    367:         WSADATA            wsadata;
                    368:        if (WSAStartup(DESIRED_WINSOCK_VERSION, &wsadata)) {
                    369:            if (TRACE)
                    370:                fprintf(TDEST, "WWWLibInit.. Can't initialize WinSoc\n");
                    371:             WSACleanup();
                    372:             return NO;
                    373:         }
                    374:         if (wsadata.wVersion < MINIMUM_WINSOCK_VERSION) {
                    375:             if (TRACE)
                    376:                fprintf(TDEST, "WWWLibInit.. Bad version of WinSoc\n");
                    377:             WSACleanup();
                    378:             return NO;
                    379:         }
                    380:     }
                    381: #endif /* _WINDOWS */
                    382: 
1.70    ! frystyk   383:     HTGetTimeZoneOffset();                          /* Find offset from GMT */
        !           384:     HTTmp_setRoot(NULL);                    /* Set up default tmp directory */
1.61      frystyk   385:     HTThreadInit();                                /* Initialize bit arrays */
                    386:     return YES;
                    387: }
                    388: 
                    389: 
                    390: /*                                                              HTLibTerminate
                    391: **
                    392: **     This function frees memory kept by the Library and should be called
1.63      frystyk   393: **     before exit of an application (if you are on a PC platform)
1.61      frystyk   394: */
                    395: PUBLIC BOOL HTLibTerminate NOARGS
                    396: {
                    397:     if (TRACE)
1.67      frystyk   398:        fprintf(TDEST, "WWWLibTerm.. Cleaning up LIBRARY OF COMMON CODE\n");
1.63      frystyk   399:     HTAtom_deleteAll();
                    400:     HTDisposeProtocols();
                    401:     HTDisposeConversions();
                    402:     HTFile_deleteSuffixes();
                    403:     HTTCPCacheRemoveAll();
                    404:     HTFreeHostName();
                    405:     HTFreeMailAddress();
1.70    ! frystyk   406:     HTCache_freeRoot();
        !           407:     HTTmp_freeRoot();
1.67      frystyk   408: 
                    409: #ifdef _WINDOWS
                    410:     WSACleanup();
                    411: #endif
                    412: 
                    413: #ifdef NO_STDIO                                                 /* Close trace file */
                    414:     if (TDEST) {
                    415:        fclose(TDEST);
                    416:        TDEST = NULL;
                    417:        WWW_TraceFlag = 0;
                    418:     }
                    419: #endif
1.61      frystyk   420:     return YES;
                    421: }
                    422: 
1.59      frystyk   423: /* --------------------------------------------------------------------------*/
                    424: /*                     Physical Anchor Address Manager                      */
                    425: /* --------------------------------------------------------------------------*/
1.33      luotonen  426: 
                    427: /*                                                     override_proxy()
                    428: **
                    429: **     Check the no_proxy environment variable to get the list
                    430: **     of hosts for which proxy server is not consulted.
                    431: **
                    432: **     no_proxy is a comma- or space-separated list of machine
                    433: **     or domain names, with optional :port part.  If no :port
                    434: **     part is present, it applies to all ports on that domain.
                    435: **
                    436: **     Example:
                    437: **             no_proxy="cern.ch,some.domain:8001"
                    438: **
                    439: */
                    440: PRIVATE BOOL override_proxy ARGS1(CONST char *, addr)
                    441: {
                    442:     CONST char * no_proxy = getenv("no_proxy");
                    443:     char * p = NULL;
                    444:     char * host = NULL;
                    445:     int port = 0;
                    446:     int h_len = 0;
                    447: 
                    448:     if (!no_proxy || !addr || !(host = HTParse(addr, "", PARSE_HOST)))
                    449:        return NO;
                    450:     if (!*host) { free(host); return NO; }
                    451: 
1.34      frystyk   452:     if ((p = strchr(host, ':')) != NULL) {     /* Port specified */
1.33      luotonen  453:        *p++ = 0;                       /* Chop off port */
                    454:        port = atoi(p);
                    455:     }
                    456:     else {                             /* Use default port */
                    457:        char * access = HTParse(addr, "", PARSE_ACCESS);
                    458:        if (access) {
                    459:            if      (!strcmp(access,"http"))    port = 80;
                    460:            else if (!strcmp(access,"gopher"))  port = 70;
                    461:            else if (!strcmp(access,"ftp"))     port = 21;
                    462:            free(access);
                    463:        }
                    464:     }
                    465:     if (!port) port = 80;              /* Default */
                    466:     h_len = strlen(host);
                    467: 
                    468:     while (*no_proxy) {
                    469:        CONST char * end;
                    470:        CONST char * colon = NULL;
                    471:        int templ_port = 0;
                    472:        int t_len;
                    473: 
                    474:        while (*no_proxy && (WHITE(*no_proxy) || *no_proxy==','))
                    475:            no_proxy++;                 /* Skip whitespace and separators */
                    476: 
                    477:        end = no_proxy;
                    478:        while (*end && !WHITE(*end) && *end != ',') {   /* Find separator */
                    479:            if (*end==':') colon = end;                 /* Port number given */
                    480:            end++;
                    481:        }
                    482: 
                    483:        if (colon) {
                    484:            templ_port = atoi(colon+1);
                    485:            t_len = colon - no_proxy;
                    486:        }
                    487:        else {
                    488:            t_len = end - no_proxy;
                    489:        }
                    490: 
                    491:        if ((!templ_port || templ_port == port)  &&
                    492:            (t_len > 0  &&  t_len <= h_len  &&
                    493:             !strncmp(host + h_len - t_len, no_proxy, t_len))) {
                    494:            free(host);
                    495:            return YES;
                    496:        }
                    497:        if (*end) no_proxy = end+1;
                    498:        else break;
                    499:     }
                    500: 
                    501:     free(host);
                    502:     return NO;
                    503: }
                    504: 
                    505: 
                    506: 
1.2       timbl     507: /*             Find physical name and access protocol
                    508: **             --------------------------------------
1.1       timbl     509: **
                    510: **
                    511: ** On entry,
                    512: **     addr            must point to the fully qualified hypertext reference.
                    513: **     anchor          a pareent anchor with whose address is addr
                    514: **
1.59      frystyk   515: ** On exit,    
                    516: **     returns         HT_NO_ACCESS            no protocol module found
                    517: **                     HT_FORBIDDEN            Error has occured.
1.2       timbl     518: **                     HT_OK                   Success
1.1       timbl     519: **
                    520: */
1.21      luotonen  521: PRIVATE int get_physical ARGS1(HTRequest *, req)
                    522: {    
1.1       timbl     523:     char * access=0;   /* Name of access method */
1.21      luotonen  524:     char * addr = HTAnchor_address((HTAnchor*)req->anchor);    /* free me */
1.27      luotonen  525: 
1.70    ! frystyk   526: #ifndef HT_NO_RULES
1.47      luotonen  527:     if (HTImServer) {  /* cern_httpd has already done its own translations */
1.45      luotonen  528:        HTAnchor_setPhysical(req->anchor, HTImServer);
1.47      luotonen  529:        StrAllocCopy(addr, HTImServer); /* Oops, queries thru many proxies */
                    530:                                        /* didn't work without this -- AL  */
                    531:     }
1.21      luotonen  532:     else {
1.27      luotonen  533:        char * physical = HTTranslate(addr);
1.21      luotonen  534:        if (!physical) {
1.47      luotonen  535:            free(addr);
1.21      luotonen  536:            return HT_FORBIDDEN;
                    537:        }
                    538:        HTAnchor_setPhysical(req->anchor, physical);
                    539:        free(physical);                 /* free our copy */
1.2       timbl     540:     }
                    541: #else
1.21      luotonen  542:     HTAnchor_setPhysical(req->anchor, addr);
1.70    ! frystyk   543: #endif /* HT_NO_RULES */
1.2       timbl     544: 
1.21      luotonen  545:     access =  HTParse(HTAnchor_physical(req->anchor),
1.27      luotonen  546:                      "file:", PARSE_ACCESS);
1.1       timbl     547: 
                    548: /*     Check whether gateway access has been set up for this
1.8       timbl     549: **     This function can be replaced by the rule system above.
1.1       timbl     550: */
1.70    ! frystyk   551: #ifndef HT_NO_PROXY
1.39      luotonen  552: 
                    553:     /* make sure the using_proxy variable is false */
1.70    ! frystyk   554:     req->using_proxy = NO;
1.39      luotonen  555: 
1.33      luotonen  556:     if (!override_proxy(addr)) {
1.27      luotonen  557:        char * gateway_parameter, *gateway, *proxy;
                    558: 
1.2       timbl     559:        gateway_parameter = (char *)malloc(strlen(access)+20);
                    560:        if (gateway_parameter == NULL) outofmem(__FILE__, "HTLoad");
1.27      luotonen  561: 
                    562:        /* search for proxy gateways */
1.2       timbl     563:        strcpy(gateway_parameter, "WWW_");
                    564:        strcat(gateway_parameter, access);
                    565:        strcat(gateway_parameter, "_GATEWAY");
                    566:        gateway = (char *)getenv(gateway_parameter); /* coerce for decstation */
1.27      luotonen  567: 
                    568:        /* search for proxy servers */
                    569:        strcpy(gateway_parameter, access);
                    570:        strcat(gateway_parameter, "_proxy");
                    571:        proxy = (char *)getenv(gateway_parameter);
                    572: 
1.2       timbl     573:        free(gateway_parameter);
1.27      luotonen  574: 
1.68      frystyk   575: #ifndef HT_DIRECT_WAIS
1.9       timbl     576:        if (!gateway && 0==strcmp(access, "wais")) {
1.69      frystyk   577:            gateway = HT_DEFAULT_WAIS_GATEWAY;
1.8       timbl     578:        }
                    579: #endif
1.27      luotonen  580: 
1.70    ! frystyk   581:        if (TRACE && gateway)
        !           582:            fprintf(TDEST,"Gateway..... Found: `%s\'\n", gateway);
        !           583:        if (TRACE && proxy)
        !           584:            fprintf(TDEST,"Proxy....... Found: `%s\'\n", proxy);
        !           585: 
1.27      luotonen  586:        /* proxy servers have precedence over gateway servers */
1.60      frystyk   587:        if (proxy && *proxy) {
1.27      luotonen  588:            char * gatewayed=0;
                    589: 
                    590:             StrAllocCopy(gatewayed,proxy);
                    591:            StrAllocCat(gatewayed,addr);
1.70    ! frystyk   592:            req->using_proxy = YES;
1.27      luotonen  593:            HTAnchor_setPhysical(req->anchor, gatewayed);
                    594:            free(gatewayed);
                    595:            free(access);
                    596: 
                    597:            access =  HTParse(HTAnchor_physical(req->anchor),
                    598:                              "http:", PARSE_ACCESS);
1.60      frystyk   599:        } else if (gateway && *gateway) {
1.9       timbl     600:            char * path = HTParse(addr, "",
                    601:                PARSE_HOST + PARSE_PATH + PARSE_PUNCTUATION);
                    602:                /* Chop leading / off to make host into part of path */
                    603:            char * gatewayed = HTParse(path+1, gateway, PARSE_ALL);
                    604:            free(path);
1.21      luotonen  605:             HTAnchor_setPhysical(req->anchor, gatewayed);
1.9       timbl     606:            free(gatewayed);
1.2       timbl     607:            free(access);
1.9       timbl     608:            
1.21      luotonen  609:            access =  HTParse(HTAnchor_physical(req->anchor),
1.8       timbl     610:                "http:", PARSE_ACCESS);
1.2       timbl     611:        }
                    612:     }
1.70    ! frystyk   613: #endif /* HT_NO_PROXY */
1.1       timbl     614: 
1.19      timbl     615:     free(addr);
1.1       timbl     616: 
1.61      frystyk   617:     /* Search registered protocols to find suitable one */
1.1       timbl     618:     {
1.61      frystyk   619:        HTList *cur = protocols;
1.20      luotonen  620:        HTProtocol *p;
1.61      frystyk   621:        if (!cur) {
                    622:            if (TRACE)
1.67      frystyk   623:                fprintf(TDEST, "HTAccess.... NO PROTOCOL MODULES INITIATED\n");
1.61      frystyk   624:        } else {
                    625:            while ((p = (HTProtocol*)HTList_nextObject(cur))) {
                    626:                if (strcmp(p->name, access)==0) {
                    627:                    HTAnchor_setProtocol(req->anchor, p);
                    628:                    free(access);
                    629:                    return (HT_OK);
                    630:                }
1.1       timbl     631:            }
                    632:        }
                    633:     }
                    634:     free(access);
1.2       timbl     635:     return HT_NO_ACCESS;
1.1       timbl     636: }
                    637: 
1.59      frystyk   638: /* --------------------------------------------------------------------------*/
                    639: /*                             Document Loader                              */
                    640: /* --------------------------------------------------------------------------*/
1.1       timbl     641: 
                    642: /*             Load a document
                    643: **             ---------------
                    644: **
1.2       timbl     645: **     This is an internal routine, which has an address AND a matching
                    646: **     anchor.  (The public routines are called with one OR the other.)
                    647: **
                    648: ** On entry,
1.15      timbl     649: **     request->
1.35      luotonen  650: **         anchor              a parent anchor with fully qualified
                    651: **                             hypertext reference as its address set
1.15      timbl     652: **         output_format       valid
                    653: **         output_stream       valid on NULL
1.2       timbl     654: **
                    655: ** On exit,
1.59      frystyk   656: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    657: **                     HT_ERROR        Error has occured
1.2       timbl     658: **                     HT_LOADED       Success
                    659: **                     HT_NO_DATA      Success, but no document loaded.
1.8       timbl     660: **                                     (telnet sesssion started etc)
1.2       timbl     661: **
                    662: */
1.52      frystyk   663: PUBLIC int HTLoad ARGS2(HTRequest *, request, BOOL, keep_error_stack)
1.2       timbl     664: {
1.25      frystyk   665:     char       *arg = NULL;
                    666:     HTProtocol *p;
                    667:     int        status;
                    668: 
1.22      luotonen  669:     if (request->method == METHOD_INVALID)
                    670:        request->method = METHOD_GET;
1.52      frystyk   671:     if (!keep_error_stack) {
                    672:        HTErrorFree(request);
                    673:        request->error_block = NO;
                    674:     }
                    675: 
1.59      frystyk   676:     if ((status = get_physical(request)) < 0) {
                    677:        if (status == HT_FORBIDDEN) {
                    678:            char *url = HTAnchor_address((HTAnchor *) request->anchor);
                    679:            if (url) {
                    680:                HTUnEscape(url);
                    681:                HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    682:                           (void *) url, (int) strlen(url), "HTLoad");
                    683:                free(url);
                    684:            } else {
                    685:                HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
                    686:                           NULL, 0, "HTLoad");
                    687:            }
                    688:        } 
                    689:        return HT_ERROR;                       /* Can't resolve or forbidden */
1.2       timbl     690:     }
1.25      frystyk   691: 
                    692:     if(!(arg = HTAnchor_physical(request->anchor)) || !*arg) 
1.59      frystyk   693:        return HT_ERROR;
1.27      luotonen  694: 
1.56      frystyk   695:     p = (HTProtocol *) HTAnchor_protocol(request->anchor);
1.17      timbl     696:     return (*(p->load))(request);
1.2       timbl     697: }
                    698: 
                    699: 
1.61      frystyk   700: /*             Terminate a LOAD
                    701: **             ----------------
                    702: **
                    703: **     This function looks at the status code from the HTLoadDocument
                    704: **     function and updates logfiles, creates error messages etc.
                    705: **
                    706: **    On Entry,
                    707: **     Status code from load function
                    708: */
                    709: PUBLIC BOOL HTLoadTerminate ARGS2(HTRequest *, request, int, status)
                    710: {
                    711:     char * uri = HTAnchor_address((HTAnchor*)request->anchor);
                    712: 
1.70    ! frystyk   713:     HTLog_request(request);
1.61      frystyk   714: 
                    715:     /* The error stack might contain general information to the client
                    716:        about what has been going on in the library (not only errors) */
                    717:     if (!HTImProxy && request->error_stack)
                    718:        HTErrorMsg(request);
                    719: 
                    720:     switch (status) {
                    721:       case HT_LOADED:
                    722:        if (PROT_TRACE) {
1.67      frystyk   723:            fprintf(TDEST, "HTAccess.... OK: `%s' has been accessed.\n", uri);
1.61      frystyk   724:        }
                    725:        break;
                    726: 
                    727:       case HT_NO_DATA:
                    728:        if (PROT_TRACE) {
1.67      frystyk   729:            fprintf(TDEST, "HTAccess.... OK BUT NO DATA: `%s'\n", uri);
1.61      frystyk   730:        }
                    731:        break;
                    732: 
                    733:       case HT_WOULD_BLOCK:
                    734:        if (PROT_TRACE) {
1.67      frystyk   735:            fprintf(TDEST, "HTAccess.... WOULD BLOCK: `%s'\n", uri);
1.61      frystyk   736:        }
                    737:        break;
                    738: 
                    739:       case HT_ERROR:
                    740:        if (HTImProxy)
                    741:            HTErrorMsg(request);                     /* Only on a real error */
                    742:        if (PROT_TRACE) {
1.67      frystyk   743:            fprintf(TDEST, "HTAccess.... ERROR: Can't access `%s'\n", uri);
1.61      frystyk   744:        }
                    745:        break;
                    746: 
                    747:       default:
                    748:        if (PROT_TRACE) {
1.67      frystyk   749:            fprintf(TDEST, "HTAccess.... **** Internal software error in CERN WWWLib version %s ****\n", HTLibraryVersion);
                    750:            fprintf(TDEST, "............ Please mail libwww@info.cern.ch quoting what software\n");
                    751:            fprintf(TDEST, "............ and version you are using including the URL:\n");
                    752:            fprintf(TDEST, "............ `%s\'\n", uri);
                    753:            fprintf(TDEST, "............ that caused the problem, thanks!\n");
1.61      frystyk   754:        }
                    755:        break;
                    756:     }
                    757:     free(uri);
                    758:     return YES;
                    759: }
                    760: 
                    761: 
1.2       timbl     762: /*             Load a document - with logging etc
                    763: **             ----------------------------------
                    764: **
                    765: **     - Checks or documents already loaded
                    766: **     - Logs the access
                    767: **     - Trace ouput and error messages
                    768: **
1.1       timbl     769: **    On Entry,
1.19      timbl     770: **        request->anchor      valid for of the document to be accessed.
                    771: **      request->childAnchor   optional anchor within doc to be selected
                    772: **
1.15      timbl     773: **       request->anchor   is the node_anchor for the document
                    774: **       request->output_format is valid
                    775: **
1.59      frystyk   776: ** On exit,
                    777: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    778: **                     HT_ERROR        Error has occured
                    779: **                     HT_LOADED       Success
                    780: **                     HT_NO_DATA      Success, but no document loaded.
                    781: **                                     (telnet sesssion started etc)
1.1       timbl     782: */
1.59      frystyk   783: PRIVATE int HTLoadDocument ARGS2(HTRequest *,  request,
                    784:                                 BOOL,          keep_error_stack)
1.1       timbl     785: 
                    786: {
                    787:     int                status;
                    788:     HText *    text;
1.19      timbl     789:     char * full_address = HTAnchor_address((HTAnchor*)request->anchor);
1.54      frystyk   790: 
1.67      frystyk   791:     if (PROT_TRACE) fprintf (TDEST, "HTAccess.... Loading document %s\n",
1.59      frystyk   792:                             full_address);
1.1       timbl     793: 
1.18      timbl     794:     request->using_cache = NULL;
                    795:     
1.15      timbl     796:     if (!request->output_format) request->output_format = WWW_PRESENT;
1.25      frystyk   797: 
1.67      frystyk   798:     /* Check if document is already loaded or in cache */
1.70    ! frystyk   799:     if (!request->ForceReload) {
1.67      frystyk   800:        if ((text=(HText *)HTAnchor_document(request->anchor))) {
                    801:            if (PROT_TRACE)
                    802:                fprintf(TDEST, "HTAccess.... Document already in memory.\n");
                    803:            if (request->childAnchor) {
                    804:                HText_selectAnchor(text, request->childAnchor);
                    805:            } else {
                    806:                HText_select(text);     
                    807:            }
                    808:            free(full_address);
                    809:            return HT_LOADED;
1.19      timbl     810:        }
1.67      frystyk   811:        
                    812:        /* Check the Cache */
                    813:        /* Bug: for each format, we only check whether it is ok, we
                    814:           don't check them all and chose the best */
                    815:        if (request->anchor->cacheItems) {
                    816:            HTList * list = request->anchor->cacheItems;
                    817:            HTList * cur = list;
                    818:            HTCacheItem * item;
                    819:            while ((item = (HTCacheItem*)HTList_nextObject(cur))) {
                    820:                HTStream * s;
                    821:                request->using_cache = item;
                    822:                s = HTStreamStack(item->format, request->output_format,
                    823:                                  request->output_stream, request, NO);
                    824:                if (s) {        /* format was suitable */
                    825:                    FILE * fp = fopen(item->filename, "r");
                    826:                    if (PROT_TRACE) 
1.70    ! frystyk   827:                        fprintf(TDEST, "Cache....... HIT file %s for %s\n",
1.67      frystyk   828:                                item->filename, 
                    829:                                full_address);
                    830:                    if (fp) {
                    831:                        HTFileCopy(fp, s);
                    832:                        (*s->isa->_free)(s); /* close up pipeline */
                    833:                        fclose(fp);
                    834:                        free(full_address);
                    835:                        return HT_LOADED;
                    836:                    } else {
                    837:                        fprintf(TDEST, "***** Can't read cache file %s !\n",
                    838:                                item->filename);
                    839:                    } /* file open ok */
                    840:                } /* stream ok */
                    841:            } /* next cache item */
                    842:        } /* if cache available for this anchor */
1.70    ! frystyk   843:     } else {                     /* Make sure that we don't use old headers */
        !           844:        HTAnchor_clearHeader(request->anchor);
        !           845:        request->HeaderMask += HT_PRAGMA;       /* Force reload through proxy */
1.1       timbl     846:     }
1.61      frystyk   847:     if ((status = HTLoad(request, keep_error_stack)) != HT_WOULD_BLOCK)
                    848:        HTLoadTerminate(request, status);
1.19      timbl     849:     free(full_address);
1.59      frystyk   850:     return status;
1.58      frystyk   851: }
1.1       timbl     852: 
                    853: 
                    854: /*             Load a document from absolute name
                    855: **             ---------------
                    856: **
1.59      frystyk   857: ** On Entry,
1.1       timbl     858: **        addr     The absolute address of the document to be accessed.
                    859: **
1.59      frystyk   860: ** On exit,
                    861: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    862: **                     HT_ERROR        Error has occured
                    863: **                     HT_LOADED       Success
                    864: **                     HT_NO_DATA      Success, but no document loaded.
                    865: **                                     (telnet sesssion started etc)
1.1       timbl     866: */
                    867: 
1.59      frystyk   868: PUBLIC int HTLoadAbsolute ARGS2(CONST char *,addr, HTRequest*, request)
1.2       timbl     869: {
1.19      timbl     870:    HTAnchor * anchor = HTAnchor_findAddress(addr);
                    871:    request->anchor = HTAnchor_parent(anchor);
                    872:    request->childAnchor = ((HTAnchor*)request->anchor == anchor) ?
                    873:                        NULL : (HTChildAnchor*) anchor;
1.52      frystyk   874:    return HTLoadDocument(request, NO);
1.2       timbl     875: }
                    876: 
                    877: 
                    878: /*             Load a document from absolute name to stream
                    879: **             --------------------------------------------
                    880: **
1.59      frystyk   881: ** On Entry,
1.2       timbl     882: **        addr     The absolute address of the document to be accessed.
1.15      timbl     883: **        request->output_stream     if non-NULL, send data down this stream
1.2       timbl     884: **
1.59      frystyk   885: ** On exit,
                    886: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    887: **                     HT_ERROR        Error has occured
                    888: **                     HT_LOADED       Success
                    889: **                     HT_NO_DATA      Success, but no document loaded.
                    890: **                                     (telnet sesssion started etc)
1.2       timbl     891: */
                    892: 
1.59      frystyk   893: PUBLIC int HTLoadToStream ARGS3(CONST char *,  addr,
                    894:                                BOOL,           filter,
                    895:                                HTRequest*,     request)
1.1       timbl     896: {
1.63      frystyk   897:     HTAnchor * anchor = HTAnchor_findAddress(addr);
                    898:     request->anchor = HTAnchor_parent(anchor);
                    899:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL :
1.19      timbl     900:        (HTChildAnchor*) anchor;
1.15      timbl     901:     request->output_stream = request->output_stream;
1.52      frystyk   902:     return HTLoadDocument(request, NO);
1.1       timbl     903: }
                    904: 
                    905: 
                    906: /*             Load a document from relative name
                    907: **             ---------------
                    908: **
1.59      frystyk   909: ** On Entry,
1.2       timbl     910: **        relative_name     The relative address of the document
                    911: **                         to be accessed.
1.1       timbl     912: **
1.59      frystyk   913: ** On exit,
                    914: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    915: **                     HT_ERROR        Error has occured
                    916: **                     HT_LOADED       Success
                    917: **                     HT_NO_DATA      Success, but no document loaded.
                    918: **                                     (telnet sesssion started etc)
1.1       timbl     919: */
                    920: 
1.59      frystyk   921: PUBLIC int HTLoadRelative ARGS3(CONST char *,          relative_name,
                    922:                                HTParentAnchor *,       here,
                    923:                                HTRequest *,            request)
1.1       timbl     924: {
                    925:     char *             full_address = 0;
1.65      frystyk   926:     int                result;
1.1       timbl     927:     char *             mycopy = 0;
                    928:     char *             stripped = 0;
                    929:     char *             current_address =
1.2       timbl     930:                                HTAnchor_address((HTAnchor*)here);
1.1       timbl     931: 
                    932:     StrAllocCopy(mycopy, relative_name);
                    933: 
                    934:     stripped = HTStrip(mycopy);
                    935:     full_address = HTParse(stripped,
                    936:                   current_address,
                    937:                   PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.15      timbl     938:     result = HTLoadAbsolute(full_address, request);
1.1       timbl     939:     free(full_address);
                    940:     free(current_address);
                    941:     free(mycopy);  /* Memory leak fixed 10/7/92 -- JFG */
                    942:     return result;
                    943: }
                    944: 
                    945: 
                    946: /*             Load if necessary, and select an anchor
                    947: **             --------------------------------------
                    948: **
1.59      frystyk   949: ** On Entry,
1.1       timbl     950: **        destination              The child or parenet anchor to be loaded.
                    951: **
1.59      frystyk   952: ** On exit,
                    953: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    954: **                     HT_ERROR        Error has occured
                    955: **                     HT_LOADED       Success
                    956: **                     HT_NO_DATA      Success, but no document loaded.
                    957: **                                     (telnet sesssion started etc)
1.1       timbl     958: */
                    959: 
1.59      frystyk   960: PUBLIC int HTLoadAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
1.1       timbl     961: {
1.70    ! frystyk   962:     if (!anchor || !request)
        !           963:        return HT_ERROR;
        !           964:     request->anchor = HTAnchor_parent(anchor);
1.59      frystyk   965:     request->childAnchor = ((HTAnchor *) request->anchor == anchor) ?
                    966:        NULL : (HTChildAnchor*) anchor;
                    967:     return HTLoadDocument(request, NO);
                    968: }
1.52      frystyk   969: 
                    970: 
                    971: /*             Load if necessary, and select an anchor
                    972: **             --------------------------------------
                    973: **
                    974: **     This function is almost identical to HTLoadAnchor, but it doesn't
                    975: **     clear the error stack so that the information in there is kept.
                    976: **
1.59      frystyk   977: ** On Entry,
1.52      frystyk   978: **        destination              The child or parenet anchor to be loaded.
                    979: **
1.59      frystyk   980: ** On exit,
                    981: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                    982: **                     HT_ERROR        Error has occured
                    983: **                     HT_LOADED       Success
                    984: **                     HT_NO_DATA      Success, but no document loaded.
                    985: **                                     (telnet sesssion started etc)
1.52      frystyk   986: */
                    987: 
1.59      frystyk   988: PUBLIC int HTLoadAnchorRecursive ARGS2(HTAnchor*,      anchor,
                    989:                                       HTRequest *,     request)
1.52      frystyk   990: {
1.59      frystyk   991:     if (!anchor) return HT_ERROR;                                /* No link */
1.52      frystyk   992:     
                    993:     request->anchor  = HTAnchor_parent(anchor);
1.59      frystyk   994:     request->childAnchor = ((HTAnchor *) request->anchor == anchor) ?
                    995:        NULL : (HTChildAnchor*) anchor;
1.52      frystyk   996:     
1.59      frystyk   997:     return HTLoadDocument(request, YES);
                    998: }
1.1       timbl     999: 
                   1000: 
                   1001: /*             Search
                   1002: **             ------
                   1003: **  Performs a keyword search on word given by the user. Adds the keyword to 
                   1004: **  the end of the current address and attempts to open the new address.
                   1005: **
                   1006: **  On Entry,
                   1007: **       *keywords     space-separated keyword list or similar search list
1.2       timbl    1008: **     here            is anchor search is to be done on.
1.59      frystyk  1009: **
                   1010: ** On exit,
                   1011: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1012: **                     HT_ERROR        Error has occured
                   1013: **                     HT_LOADED       Success
                   1014: **                     HT_NO_DATA      Success, but no document loaded.
                   1015: **                                     (telnet sesssion started etc)
1.1       timbl    1016: */
                   1017: 
1.56      frystyk  1018: PRIVATE char hex ARGS1(int, i)
1.2       timbl    1019: {
1.13      timbl    1020:     char * hexchars = "0123456789ABCDEF";
                   1021:     return hexchars[i];
1.2       timbl    1022: }
1.1       timbl    1023: 
1.59      frystyk  1024: PUBLIC int HTSearch ARGS3(CONST char *,                keywords,
                   1025:                          HTParentAnchor *,     here,
                   1026:                          HTRequest *,          request)
1.1       timbl    1027: {
1.2       timbl    1028: 
                   1029: #define acceptable \
                   1030: "1234567890abcdefghijlkmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-_"
                   1031: 
                   1032:     char *q, *u;
                   1033:     CONST char * p, *s, *e;            /* Pointers into keywords */
                   1034:     char * address = HTAnchor_address((HTAnchor*)here);
1.65      frystyk  1035:     int result;
1.56      frystyk  1036:     char * escaped = (char *) malloc(strlen(keywords)*3+1);
1.2       timbl    1037: 
1.29      frystyk  1038:     /* static CONST BOOL isAcceptable[96] = */
                   1039:     /* static AND const is not good for a gnu compiler! Frystyk 25/02-94 */
1.30      luotonen 1040:     static BOOL isAcceptable[96] =
1.2       timbl    1041:     /*   0 1 2 3 4 5 6 7 8 9 A B C D E F */
                   1042:     {    0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,      /* 2x   !"#$%&'()*+,-./  */
                   1043:          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,      /* 3x  0123456789:;<=>?  */
                   1044:         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 4x  @ABCDEFGHIJKLMNO  */
                   1045:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /* 5X  PQRSTUVWXYZ[\]^_  */
                   1046:         0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /* 6x  `abcdefghijklmno  */
                   1047:         1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };     /* 7X  pqrstuvwxyz{\}~  DEL */
                   1048: 
                   1049:     if (escaped == NULL) outofmem(__FILE__, "HTSearch");
                   1050:     
1.29      frystyk  1051: /* Convert spaces to + and hex escape unacceptable characters */
1.2       timbl    1052: 
1.29      frystyk  1053:     for(s=keywords; *s && WHITE(*s); s++); /*scan */    /* Skip white space */
                   1054:     for(e = s + strlen(s); e>s && WHITE(*(e-1)) ; e--);     /* Skip trailers */
                   1055:     for(q=escaped, p=s; p<e; p++) {                  /* scan stripped field */
1.2       timbl    1056:         int c = (int)TOASCII(*p);
                   1057:         if (WHITE(*p)) {
                   1058:            *q++ = '+';
1.29      frystyk  1059:        } else if (c>=32 && c<=127 && isAcceptable[c-32] != 0) {
1.13      timbl    1060:            *q++ = *p;                  /* 930706 TBL for MVS bug */
1.2       timbl    1061:        } else {
                   1062:            *q++ = '%';
                   1063:            *q++ = hex(c / 16);
                   1064:            *q++ = hex(c % 16);
                   1065:        }
                   1066:     } /* Loop over string */
1.1       timbl    1067:     
1.2       timbl    1068:     *q=0;
                   1069:                                /* terminate escaped sctring */
                   1070:     u=strchr(address, '?');            /* Find old search string */
                   1071:     if (u) *u = 0;                             /* Chop old search off */
1.1       timbl    1072: 
                   1073:     StrAllocCat(address, "?");
1.2       timbl    1074:     StrAllocCat(address, escaped);
                   1075:     free(escaped);
1.15      timbl    1076:     result = HTLoadRelative(address, here, request);
1.1       timbl    1077:     free(address);
1.2       timbl    1078:     
1.1       timbl    1079:     return result;
1.2       timbl    1080: }
                   1081: 
                   1082: 
                   1083: /*             Search Given Indexname
                   1084: **             ------
                   1085: **  Performs a keyword search on word given by the user. Adds the keyword to 
                   1086: **  the end of the current address and attempts to open the new address.
                   1087: **
1.59      frystyk  1088: ** On Entry,
1.2       timbl    1089: **       *keywords     space-separated keyword list or similar search list
                   1090: **     *addres         is name of object search is to be done on.
1.59      frystyk  1091: ** On exit,
                   1092: **     returns         HT_WOULD_BLOCK  An I/O operation would block
                   1093: **                     HT_ERROR        Error has occured
                   1094: **                     HT_LOADED       Success
                   1095: **                     HT_NO_DATA      Success, but no document loaded.
                   1096: **                                     (telnet sesssion started etc)
1.2       timbl    1097: */
                   1098: 
1.59      frystyk  1099: PUBLIC int HTSearchAbsolute ARGS3(CONST char *,        keywords,
                   1100:                                  CONST char *,         indexname,
                   1101:                                  HTRequest *,          request)
1.2       timbl    1102: {
                   1103:     HTParentAnchor * anchor =
                   1104:        (HTParentAnchor*) HTAnchor_findAddress(indexname);
1.15      timbl    1105:     return HTSearch(keywords, anchor, request);
1.57      howcome  1106: }
                   1107: 
1.70    ! frystyk  1108: /* --------------------------------------------------------------------------*/
        !          1109: /*                             Document Poster                              */
        !          1110: /* --------------------------------------------------------------------------*/
        !          1111: 
        !          1112: /*             Get a save stream for a document
        !          1113: **             --------------------------------
        !          1114: */
        !          1115: PUBLIC HTStream *HTSaveStream ARGS1(HTRequest *, request)
        !          1116: {
        !          1117:     HTProtocol * p;
        !          1118:     int status;
        !          1119:     request->method = METHOD_PUT;
        !          1120:     status = get_physical(request);
        !          1121:     if (status == HT_FORBIDDEN) {
        !          1122:        char *url = HTAnchor_address((HTAnchor *) request->anchor);
        !          1123:        if (url) {
        !          1124:            HTUnEscape(url);
        !          1125:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
        !          1126:                       (void *) url, (int) strlen(url), "HTLoad");
        !          1127:            free(url);
        !          1128:        } else {
        !          1129:            HTErrorAdd(request, ERR_FATAL, NO, HTERR_FORBIDDEN,
        !          1130:                       NULL, 0, "HTLoad");
        !          1131:        }
        !          1132:        return NULL;    /* should return error status? */
        !          1133:     }
        !          1134:     if (status < 0) return NULL; /* @@ error. Can't resolve or forbidden */
        !          1135:     
        !          1136:     p = (HTProtocol *) HTAnchor_protocol(request->anchor);
        !          1137:     if (!p) return NULL;
        !          1138:     
        !          1139:     return (*p->saveStream)(request);
        !          1140:     
        !          1141: }
        !          1142: 
        !          1143: /*     COPY AN ANCHOR
        !          1144: **     --------------
        !          1145: **  Fetch the URL (possibly local file URL) and send it using either PUT
        !          1146: **  or POST to the remote destination using HTTP. The caller can decide the
        !          1147: **  exact method used and which HTTP header fields to transmit by setting the
        !          1148: **  user fields in the request structure.
        !          1149: **
        !          1150: **     returns         HT_WOULD_BLOCK  An I/O operation would block
        !          1151: **                     HT_ERROR        Error has occured
        !          1152: **                     HT_LOADED       Success
        !          1153: **                     HT_NO_DATA      Success, but no document loaded.
        !          1154: */
        !          1155: PUBLIC int HTCopyAnchor ARGS4(HTAnchor *,      src_anchor,
        !          1156:                              HTRequest *,      src_req,
        !          1157:                              HTParentAnchor *, dest_anchor,
        !          1158:                              HTRequest *,      dest_req)
        !          1159: {
        !          1160:     if (!(src_anchor && src_req && dest_anchor && dest_req))
        !          1161:        return HT_ERROR;
        !          1162: 
        !          1163:     if (!(dest_anchor->methods & dest_req->method)) {
        !          1164:        char buf[80];
        !          1165:        sprintf(buf, "It might not be allowed to %s to this destination, continue?", HTMethod_name(dest_req->method));
        !          1166:        if (!HTConfirm(buf))
        !          1167:            return HT_ERROR;
        !          1168:     }
        !          1169: 
        !          1170:     /* First open the destination then open the source */
        !          1171:     if (HTLoadAnchor((HTAnchor *) dest_anchor, dest_req) != HT_ERROR) {
        !          1172:        src_req->ForceReload = YES;
        !          1173:        src_req->HeaderMask += HT_DATE;                  /* Send date header */
        !          1174:        if (src_req->output_format == WWW_PRESENT)             /* Use source */
        !          1175:            src_req->output_format = WWW_SOURCE;
        !          1176: 
        !          1177:        /* Now make the link between the two request structures. First setup
        !          1178:           the output stream of the source so that data get redirected to
        !          1179:           the destination. Then set up the call back function so that
        !          1180:           the destination can call for more data */
        !          1181:        src_req->output_stream = dest_req->input_stream;
        !          1182:        dest_req->CopyRequest = src_req;
        !          1183:        dest_req->PostCallBack = HTSocketRead;
        !          1184: 
        !          1185:        return HTLoadAnchor(src_anchor, src_req);
        !          1186:     }
        !          1187:     return HT_ERROR;
        !          1188: }
        !          1189: 
        !          1190: 
        !          1191: /*     UPLOAD AN ANCHOR
        !          1192: **     ----------------
        !          1193: **  Send the contents (in hyperdoc) of the source anchor using either PUT
        !          1194: **  or POST to the remote destination using HTTP. The caller can decide the
        !          1195: **  exact method used and which HTTP header fields to transmit by setting the
        !          1196: **  user fields in the request structure.
        !          1197: **
        !          1198: **     returns         HT_WOULD_BLOCK  An I/O operation would block
        !          1199: **                     HT_ERROR        Error has occured
        !          1200: **                     HT_LOADED       Success
        !          1201: **                     HT_NO_DATA      Success, but no document loaded.
        !          1202: */
        !          1203: PUBLIC int HTUploadAnchor ARGS3(HTAnchor *,            src_anchor,
        !          1204:                                HTParentAnchor *,       dest_anchor,
        !          1205:                                HTRequest *,            dest_req)
        !          1206: {
        !          1207:     if (!(src_anchor && dest_anchor && dest_req))
        !          1208:        return HT_ERROR;
        !          1209: 
        !          1210:     if (!(dest_anchor->methods & dest_req->method)) {
        !          1211:        char buf[80];
        !          1212:        sprintf(buf, "It might not be allowed to %s to this destination, continue?", HTMethod_name(dest_req->method));
        !          1213:        if (!HTConfirm(buf))
        !          1214:            return HT_ERROR;
        !          1215:     }
        !          1216: 
        !          1217:     return HT_ERROR;
        !          1218: }
        !          1219: 
        !          1220: /* --------------------------------------------------------------------------*/
        !          1221: /*                             Anchor help routines                         */
        !          1222: /* --------------------------------------------------------------------------*/
1.57      howcome  1223: 
                   1224: /*
                   1225: **             Find Related Name
                   1226: **
                   1227: **  Creates a string that can be used as a related name when 
                   1228: **  calling HTParse initially. 
                   1229: **  
                   1230: **  The code for this routine originates from the Linemode 
                   1231: **  browser and was moved here by howcome@dxcern.cern.ch
                   1232: **  in order for all clients to take advantage.
                   1233: **
1.59      frystyk  1234: **  The string returned must be freed by the caller
1.57      howcome  1235: */
                   1236: PUBLIC char * HTFindRelatedName NOARGS
                   1237: {
1.59      frystyk  1238:     char* default_default = NULL;            /* Parse home relative to this */
                   1239:     CONST char *host = HTGetHostName(); 
1.57      howcome  1240:     StrAllocCopy(default_default, "file://");
1.59      frystyk  1241:     if (host)
                   1242:        StrAllocCat(default_default, host);
                   1243:     else
                   1244:        StrAllocCat(default_default, "localhost");
                   1245:     {
                   1246:        char wd[HT_MAX_PATH+1];
1.67      frystyk  1247: 
                   1248: #ifdef NO_GETWD
                   1249: #ifdef HAS_GETCWD            /* System V variant SIGN CHANGED TBL 921006 !! */
                   1250:        char *result = (char *) getcwd(wd, sizeof(wd)); 
                   1251: #else
                   1252:        char *result = NULL;
                   1253:        HTAlert("This platform does not support neither getwd nor getcwd\n");
                   1254: #endif
                   1255: #else
                   1256:        char *result = (char *) getwd(wd);
                   1257: #endif
1.59      frystyk  1258:        *(wd+HT_MAX_PATH) = '\0';
1.57      howcome  1259:        if (result) {
                   1260: #ifdef VMS 
                   1261:             /* convert directory name to Unix-style syntax */
                   1262:            char * disk = strchr (wd, ':');
                   1263:            char * dir = strchr (wd, '[');
                   1264:            if (disk) {
                   1265:                *disk = '\0';
                   1266:                StrAllocCat (default_default, "/");  /* needs delimiter */
                   1267:                StrAllocCat (default_default, wd);
                   1268:            }
                   1269:            if (dir) {
                   1270:                char *p;
                   1271:                *dir = '/';  /* Convert leading '[' */
                   1272:                for (p = dir ; *p != ']'; ++p)
                   1273:                        if (*p == '.') *p = '/';
                   1274:                *p = '\0';  /* Cut on final ']' */
                   1275:                StrAllocCat (default_default, dir);
                   1276:            }
1.70    ! frystyk  1277: #else
        !          1278: #ifdef WIN32
        !          1279:            char * p = wd ;     /* a colon */
        !          1280:            StrAllocCat(default_default, "/");
        !          1281: 
        !          1282:            /**p++ = '|' ;        /* change to '|' */
        !          1283:            while( *p != 0 ) { 
        !          1284:                if (*p == '\\')                  /* change to one true slash */
        !          1285:                    *p = '/' ;
        !          1286:                p++;
        !          1287:            }
        !          1288:            StrAllocCat( default_default, wd) ;
        !          1289: #else
1.57      howcome  1290:            StrAllocCat (default_default, wd);
1.70    ! frystyk  1291: #endif /* not WIN32 */
1.67      frystyk  1292: #endif /* not VMS */
1.57      howcome  1293:        }
1.67      frystyk  1294:     }
1.57      howcome  1295:     StrAllocCat(default_default, "/default.html");
                   1296:     return default_default;
1.2       timbl    1297: }
                   1298: 
                   1299: 
                   1300: /*             Generate the anchor for the home page
                   1301: **             -------------------------------------
                   1302: **
                   1303: **     As it involves file access, this should only be done once
                   1304: **     when the program first runs.
1.10      timbl    1305: **     This is a default algorithm -- browser don't HAVE to use this.
                   1306: **     But consistency betwen browsers is STRONGLY recommended!
1.2       timbl    1307: **
1.10      timbl    1308: **     Priority order is:
                   1309: **
                   1310: **             1       WWW_HOME environment variable (logical name, etc)
                   1311: **             2       ~/WWW/default.html
                   1312: **             3       /usr/local/bin/default.html
1.70    ! frystyk  1313: **             4       http://www.w3.org/default.html
1.10      timbl    1314: **
1.2       timbl    1315: */
                   1316: PUBLIC HTParentAnchor * HTHomeAnchor NOARGS
                   1317: {
1.12      timbl    1318:     char * my_home_document = NULL;
1.70    ! frystyk  1319:     char * home = (char *) getenv(LOGICAL_DEFAULT);
1.2       timbl    1320:     char * ref;
                   1321:     HTParentAnchor * anchor;
1.1       timbl    1322:     
1.70    ! frystyk  1323:     /* Someone telnets in, they get a special home */
1.12      timbl    1324:     if (home) {
                   1325:         StrAllocCopy(my_home_document, home);
1.70    ! frystyk  1326:     } else  if (HTClientHost) {                                    /* Telnet server */
1.12      timbl    1327:        FILE * fp = fopen(REMOTE_POINTER, "r");
                   1328:        char * status;
                   1329:        if (fp) {
1.59      frystyk  1330:            my_home_document = (char*) malloc(HT_MAX_PATH);
                   1331:            status = fgets(my_home_document, HT_MAX_PATH, fp);
1.12      timbl    1332:            if (!status) {
                   1333:                free(my_home_document);
                   1334:                my_home_document = NULL;
                   1335:            }
                   1336:            fclose(fp);
                   1337:        }
                   1338:        if (!my_home_document) StrAllocCopy(my_home_document, REMOTE_ADDRESS);
                   1339:     }
                   1340: 
1.67      frystyk  1341: #ifdef unix
1.10      timbl    1342:     if (!my_home_document) {
                   1343:        FILE * fp = NULL;
1.70    ! frystyk  1344:        char * home = (char *) getenv("HOME");
1.10      timbl    1345:        if (home) { 
                   1346:            my_home_document = (char *)malloc(
                   1347:                strlen(home)+1+ strlen(PERSONAL_DEFAULT)+1);
                   1348:            if (my_home_document == NULL) outofmem(__FILE__, "HTLocalName");
                   1349:            sprintf(my_home_document, "%s/%s", home, PERSONAL_DEFAULT);
                   1350:            fp = fopen(my_home_document, "r");
                   1351:        }
                   1352:        
                   1353:        if (!fp) {
                   1354:            StrAllocCopy(my_home_document, LOCAL_DEFAULT_FILE);
                   1355:            fp = fopen(my_home_document, "r");
                   1356:        }
1.2       timbl    1357:        if (fp) {
                   1358:            fclose(fp);
                   1359:        } else {
1.62      frystyk  1360:            if (TRACE)
1.67      frystyk  1361:                fprintf(TDEST,
1.62      frystyk  1362:                        "HTBrowse: No local home document ~/%s or %s\n",
                   1363:                        PERSONAL_DEFAULT, LOCAL_DEFAULT_FILE);
1.11      timbl    1364:            free(my_home_document);
                   1365:            my_home_document = NULL;
1.2       timbl    1366:        }
                   1367:     }
1.67      frystyk  1368: #endif
1.70    ! frystyk  1369:     ref = HTParse(my_home_document ? my_home_document :
        !          1370:                  HTClientHost ? REMOTE_ADDRESS : LAST_RESORT, "file:",
        !          1371:                  PARSE_ACCESS|PARSE_HOST|PARSE_PATH|PARSE_PUNCTUATION);
1.10      timbl    1372:     if (my_home_document) {
1.62      frystyk  1373:        if (TRACE)
1.67      frystyk  1374:            fprintf(TDEST,
1.62      frystyk  1375:                   "HTAccess.... `%s\' used for custom home page as\n`%s\'\n",
                   1376:                    my_home_document, ref);
1.10      timbl    1377:        free(my_home_document);
1.2       timbl    1378:     }
                   1379:     anchor = (HTParentAnchor*) HTAnchor_findAddress(ref);
                   1380:     free(ref);
                   1381:     return anchor;
1.1       timbl    1382: }
1.26      frystyk  1383: 
                   1384: 
                   1385: /*             Bind an Anchor to the request structure
                   1386: **             ---------------------------------------
                   1387: **
                   1388: **    On Entry,
                   1389: **     anchor          The child or parenet anchor to be binded
                   1390: **     request         The request sturcture
                   1391: **    On Exit,
                   1392: **        returns    YES     Success
                   1393: **                   NO      Failure 
                   1394: **
                   1395: **  Note: Actually the same as HTLoadAnchor() but DOES NOT do the loading
                   1396: **                                             Henrik Frystyk 17/02-94
                   1397: */
                   1398: 
                   1399: PUBLIC BOOL HTBindAnchor ARGS2(HTAnchor*, anchor, HTRequest *, request)
                   1400: {
                   1401:     if (!anchor) return NO;    /* No link */
                   1402:     
                   1403:     request->anchor  = HTAnchor_parent(anchor);
                   1404:     request->childAnchor = ((HTAnchor*)request->anchor == anchor) ? NULL
                   1405:                                        : (HTChildAnchor*) anchor;
                   1406:        
1.29      frystyk  1407:     return YES;
1.70    ! frystyk  1408: }
1.59      frystyk  1409: 
1.26      frystyk  1410: 

Webmaster