Annotation of libwww/Library/src/HTTP.c, revision 1.21

1.1       timbl       1: /*     HyperText Tranfer Protocol      - Client implementation         HTTP.c
                      2: **     ==========================
1.2       timbl       3: **
                      4: ** Bugs:
                      5: **     Not implemented:
                      6: **             Forward
                      7: **             Redirection
                      8: **             Error handling
1.1       timbl       9: */
                     10: 
                     11: /*     Module parameters:
                     12: **     -----------------
                     13: **
                     14: **  These may be undefined and redefined by syspec.h
                     15: */
1.2       timbl      16: 
1.12      timbl      17: /*     MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
                     18: **     file from the URL.  It is STRICTLY illegal to do this!
                     19: */
                     20: 
1.2       timbl      21: /* Implements:
                     22: */
                     23: #include "HTTP.h"
                     24: 
                     25: #define HTTP_VERSION   "HTTP/1.0"
                     26: #define HTTP2                          /* Version is greater than 0.9 */
                     27: 
                     28: #define INIT_LINE_SIZE         1024    /* Start with line buffer this big */
                     29: #define LINE_EXTEND_THRESH     256     /* Minimum read size */
                     30: #define VERSION_LENGTH                 20      /* for returned protocol version */
                     31: 
                     32: /* Uses:
                     33: */
1.1       timbl      34: #include "HTParse.h"
                     35: #include "HTUtils.h"
                     36: #include "tcp.h"
                     37: #include "HTTCP.h"
                     38: #include "HTFormat.h"
1.2       timbl      39: #include <ctype.h>
                     40: #include "HTAlert.h"
                     41: #include "HTMIME.h"
1.5       timbl      42: #include "HTML.h"              /* SCW */
                     43: #include "HTInit.h"            /* SCW */
1.21    ! luotonen   44: #include "HTAccess.h"          /* HTRequest */
1.14      luotonen   45: #include "HTAABrow.h"          /* Access Authorization */
1.20      timbl      46: #include "HTTee.h"             /* Tee off a cache stream */
                     47: #include "HTFWriter.h"         /* Write to cache file */
1.1       timbl      48: 
1.2       timbl      49: struct _HTStream {
                     50:        HTStreamClass * isa;            /* all we need to know */
                     51: };
                     52: 
                     53: 
1.6       timbl      54: extern char * HTAppName;       /* Application name: please supply */
                     55: extern char * HTAppVersion;    /* Application version: please supply */
                     56: 
1.19      timbl      57: PUBLIC BOOL HTCacheHTTP = YES; /* Enable caching of HTTP-retrieved files */
                     58: 
1.21    ! luotonen   59: 
        !            60: PRIVATE void parse_401_headers ARGS2(HTRequest *,      req,
        !            61:                                     HTInputSocket *,   isoc)
        !            62: {
        !            63:     HTAAScheme scheme;
        !            64:     char *line;
        !            65:     int num_schemes = 0;
        !            66:     HTList *valid_schemes = HTList_new();
        !            67:     HTAssocList **scheme_specifics = NULL;
        !            68:     char *template = NULL;
        !            69: 
        !            70:     /* Read server reply header lines */
        !            71: 
        !            72:     if (TRACE)
        !            73:        fprintf(stderr, "Server 401 reply header lines:\n");
        !            74: 
        !            75:     while (NULL != (line = HTInputSocket_getUnfoldedLine(isoc)) &&
        !            76:           *line != 0) {
        !            77: 
        !            78:        if (TRACE) fprintf(stderr, "%s\n", line);
        !            79: 
        !            80:        if (strchr(line, ':')) {        /* Valid header line */
        !            81: 
        !            82:            char *p = line;
        !            83:            char *fieldname = HTNextField(&p);
        !            84:            char *arg1 = HTNextField(&p);
        !            85:            char *args = p;
        !            86:            
        !            87:            if (0==strcasecomp(fieldname, "WWW-Authenticate:")) {
        !            88:                if (HTAA_UNKNOWN != (scheme = HTAAScheme_enum(arg1))) {
        !            89:                    HTList_addObject(valid_schemes, (void*)scheme);
        !            90:                    if (!scheme_specifics) {
        !            91:                        int i;
        !            92:                        scheme_specifics = (HTAssocList**)
        !            93:                            malloc(HTAA_MAX_SCHEMES * sizeof(HTAssocList*));
        !            94:                        if (!scheme_specifics)
        !            95:                            outofmem(__FILE__, "parse_401_headers");
        !            96:                        for (i=0; i < HTAA_MAX_SCHEMES; i++)
        !            97:                            scheme_specifics[i] = NULL;
        !            98:                    }
        !            99:                    scheme_specifics[scheme] = HTAA_parseArgList(args);
        !           100:                    num_schemes++;
        !           101:                }
        !           102:                else if (TRACE) {
        !           103:                    fprintf(stderr, "Unknown scheme `%s' %s\n",
        !           104:                            (arg1 ? arg1 : "(null)"),
        !           105:                            "in WWW-Authenticate: field");
        !           106:                }
        !           107:            }
        !           108: 
        !           109:            else if (0==strcasecomp(fieldname, "WWW-Protection-Template:")) {
        !           110:                if (TRACE)
        !           111:                    fprintf(stderr, "Protection template set to `%s'\n", arg1);
        !           112:                StrAllocCopy(template, arg1);
        !           113:            }
        !           114: 
        !           115:        } /* if a valid header line */
        !           116:        else if (TRACE) {
        !           117:            fprintf(stderr, "Invalid header line `%s' ignored\n", line);
        !           118:        } /* else invalid header line */
        !           119:     } /* while header lines remain */
        !           120: 
        !           121:     req->valid_schemes = valid_schemes;
        !           122:     req->scheme_specifics = scheme_specifics;
        !           123:     req->prot_template = template;
        !           124: }
        !           125: 
        !           126: 
        !           127: 
1.1       timbl     128: /*             Load Document from HTTP Server                  HTLoadHTTP()
                    129: **             ==============================
                    130: **
                    131: **     Given a hypertext address, this routine loads a document.
                    132: **
                    133: **
                    134: ** On entry,
                    135: **     arg     is the hypertext reference of the article to be loaded.
                    136: **
                    137: ** On exit,
                    138: **     returns >=0     If no error, a good socket number
                    139: **             <0      Error.
                    140: **
                    141: **     The socket must be closed by the caller after the document has been
                    142: **     read.
                    143: **
                    144: */
1.19      timbl     145: PUBLIC int HTLoadHTTP ARGS1 (HTRequest *, request)
1.1       timbl     146: {
1.19      timbl     147:     CONST char * arg = HTAnchor_physical(request->anchor);
1.1       timbl     148:     int s;                             /* Socket number for returned data */
                    149:     int status;                                /* tcp return */
1.10      timbl     150:     char crlf[3];                      /* A CR LF equivalent string */
1.3       timbl     151:     HTStream * target = NULL;          /* Unconverted data */
                    152:     
1.2       timbl     153:     CONST char* gate = 0;              /* disable this feature */
1.1       timbl     154:     SockA soc_address;                 /* Binary network address */
                    155:     SockA * sin = &soc_address;
1.2       timbl     156:     BOOL extensions = YES;             /* Assume good HTTP server */
1.17      timbl     157: 
1.1       timbl     158:     if (!arg) return -3;               /* Bad if no name sepcified     */
                    159:     if (!*arg) return -2;              /* Bad if name had zero length  */
                    160: 
                    161: /*  Set up defaults:
                    162: */
                    163: #ifdef DECNET
1.2       timbl     164:     sin->sdn_family = AF_DECnet;           /* Family = DECnet, host order */
                    165:     sin->sdn_objnum = DNP_OBJ;          /* Default: http object number */
1.1       timbl     166: #else  /* Internet */
1.2       timbl     167:     sin->sin_family = AF_INET;     /* Family = internet, host order */
                    168:     sin->sin_port = htons(TCP_PORT);    /* Default: http port    */
1.1       timbl     169: #endif
                    170: 
1.10      timbl     171:     sprintf(crlf, "%c%c", CR, LF);     /* To be corect on Mac, VM, etc */
                    172:     
1.1       timbl     173:     if (TRACE) {
                    174:         if (gate) fprintf(stderr,
                    175:                "HTTPAccess: Using gateway %s for %s\n", gate, arg);
                    176:         else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
                    177:     }
                    178:     
                    179: /* Get node name and optional port number:
                    180: */
                    181:     {
                    182:        char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
                    183:        int status = HTParseInet(sin, p1);  /* TBL 920622 */
                    184:         free(p1);
                    185:        if (status) return status;   /* No such host for example */
                    186:     }
                    187:     
1.15      luotonen  188: /*
                    189: ** Compose authorization information (this was moved here
                    190: ** from after the making of the connection so that the connection
                    191: ** wouldn't have to wait while prompting username and password
                    192: ** from the user).                             -- AL 13.10.93
                    193: */
                    194: #ifdef ACCESS_AUTH
1.21    ! luotonen  195:     StrAllocCopy(request->argument, arg);
        !           196:     HTAA_composeAuth(request);
        !           197:     if (TRACE) {
        !           198:        if (request->authorization)
        !           199:            fprintf(stderr, "HTTP: Sending Authorization: %s\n",
        !           200:                    request->authorization);
        !           201:        else
        !           202:            fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
1.15      luotonen  203:     }
                    204: #endif /* ACCESS_AUTH */
1.1       timbl     205:    
1.10      timbl     206: /*     Now, let's get a socket set up from the server for the data:
1.1       timbl     207: */      
                    208: #ifdef DECNET
                    209:     s = socket(AF_DECnet, SOCK_STREAM, 0);
                    210: #else
                    211:     s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
                    212: #endif
                    213:     status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
                    214:     if (status < 0) {
                    215:            if (TRACE) fprintf(stderr, 
                    216:              "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
1.17      timbl     217: 
1.1       timbl     218:            return HTInetStatus("connect");
                    219:       }
                    220:     
                    221:     if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
                    222: 
1.17      timbl     223: 
                    224: /*     Compose and send command
                    225: **     ------------------------
                    226: */
                    227:     {
                    228:         char *command;                 /* The whole command */
                    229:        
1.1       timbl     230: /*     Ask that node for the document,
                    231: **     omitting the host name & anchor if not gatewayed.
                    232: */        
1.17      timbl     233:        if (gate) {
                    234:            command = malloc(4 + strlen(arg)+ 2 + 31);
                    235:            if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
                    236:            strcpy(command, "GET ");
                    237:            strcat(command, arg);
                    238:        } else { /* not gatewayed */
                    239:            char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
                    240:            command = malloc(4 + strlen(p1)+ 2 + 31);
                    241:            if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
                    242:            strcpy(command, "GET ");
                    243:            strcat(command, p1);
                    244:            free(p1);
                    245:        }
1.2       timbl     246: #ifdef HTTP2
1.17      timbl     247:        if (extensions) {
                    248:            strcat(command, " ");
                    249:            strcat(command, HTTP_VERSION);
                    250:        }
1.2       timbl     251: #endif
1.17      timbl     252:     
                    253:        strcat(command, crlf);  /* CR LF, as in rfc 977 */
                    254:     
                    255:        if (extensions) {
1.21    ! luotonen  256: 
1.17      timbl     257:            int i;
                    258:            HTAtom * present = WWW_PRESENT;
                    259:            char line[256];    /*@@@@ */
1.21    ! luotonen  260:            HTList *conversions[2];
        !           261: 
        !           262:            if (!HTConversions) HTFormatInit(HTConversions);
        !           263:            conversions[0] = HTConversions;
        !           264:            conversions[1] = request->conversions;
        !           265: 
        !           266:            for (i=0; i<2; i++) {
        !           267:                HTList *cur = conversions[i];
        !           268:                HTPresentation *pres;
        !           269: 
        !           270:                while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
        !           271:                    if (pres->rep_out == present) {
        !           272:                        if (pres->quality != 1.0) {
        !           273:                            sprintf(line, "Accept: %s q=%.3f%c%c",
        !           274:                                    HTAtom_name(pres->rep),
        !           275:                                    pres->quality, CR, LF);
        !           276:                        } else {
        !           277:                            sprintf(line, "Accept: %s%c%c",
        !           278:                                    HTAtom_name(pres->rep), CR, LF);
        !           279:                        }
        !           280:                        StrAllocCat(command, line);
1.17      timbl     281:                    }
                    282:                }
1.2       timbl     283:            }
1.17      timbl     284:            
                    285:            sprintf(line, "User-Agent:  %s/%s  libwww/%s%c%c",
                    286:                    HTAppName ? HTAppName : "unknown",
                    287:                    HTAppVersion ? HTAppVersion : "0.0",
                    288:                    HTLibraryVersion, CR, LF);
                    289:                    StrAllocCat(command, line);
                    290:     
1.14      luotonen  291: #ifdef ACCESS_AUTH
1.21    ! luotonen  292:            if (request->authorization != NULL) {
        !           293:                sprintf(line, "Authorization: %s%c%c",
        !           294:                        request->authorization, CR, LF);
1.17      timbl     295:                StrAllocCat(command, line);
                    296:            }
                    297: #endif /* ACCESS_AUTH */
1.14      luotonen  298:        }
1.17      timbl     299:     
                    300:        StrAllocCat(command, crlf);     /* Blank line means "end" */
                    301:     
                    302:        if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
                    303:     
                    304:     /* Translate into ASCII if necessary
                    305:     */
1.4       timbl     306: #ifdef NOT_ASCII
1.17      timbl     307:        {
                    308:            char * p;
                    309:            for(p = command; *p; p++) {
                    310:                *p = TOASCII(*p);
                    311:            }
1.1       timbl     312:        }
1.3       timbl     313: #endif
1.17      timbl     314:     
                    315:        status = NETWRITE(s, command, (int)strlen(command));
                    316:        free(command);
                    317:        if (status<0) {
                    318:            if (TRACE) fprintf(stderr,
                    319:                "HTTPAccess: Unable to send command.\n");
1.1       timbl     320:            return HTInetStatus("send");
1.17      timbl     321:        }
                    322:     } /* compose and send command */
                    323:     
1.2       timbl     324: 
1.17      timbl     325: /*     Read the response
                    326: **     -----------------
1.11      timbl     327: **
                    328: **     HTTP0 servers must return ASCII style text, though it can in
                    329: **     principle be just text without any markup at all.
                    330: **     Full HTTP servers must return a response
                    331: **     line and RFC822 style header.  The response must therefore in
                    332: **     either case have a CRLF somewhere soon.
                    333: **
                    334: **     This is the theory.  In practice, there are (1993) unfortunately
                    335: **     many binary documents just served up with HTTP0.9.  This
                    336: **     means we have to preserve the binary buffer (on the assumption that
                    337: **     conversion from ASCII may lose information) in case it turns
                    338: **     out that we want the binary original.
1.2       timbl     339: */
1.3       timbl     340: 
1.21    ! luotonen  341:     {  /* read response */
        !           342: 
1.17      timbl     343:        HTFormat format_in;             /* Format arriving in the message */
1.21    ! luotonen  344:        HTInputSocket *isoc = HTInputSocket_new(s);
        !           345:        char * status_line = HTInputSocket_getStatusLine(isoc);
1.2       timbl     346: 
1.11      timbl     347: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
                    348: ** First time we have enough, look at the stub in ASCII
                    349: ** and get out of here if it doesn't look right.
                    350: **
                    351: ** We also check for characters above 128 in the first few bytes, and
                    352: ** if we find them we forget the html default.
                    353: **
                    354: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
                    355: **     will be taken as a HTTP 1.0 server.  Failure.
                    356: **     An HTTP 0.9 server returning a binary document with
                    357: **     characters < 128 will be read as ASCII.
                    358: */
1.21    ! luotonen  359:        if (!status_line) {     /* HTTP0 response */
        !           360:            if (HTInputSocket_seemsBinary(isoc)) {
        !           361:                format_in = HTAtom_for("www/unknown");
        !           362:            }
        !           363:            else {
        !           364:                format_in = WWW_HTML;
        !           365:            }
        !           366:            goto copy;
        !           367:        } /* end kludge */
        !           368: 
        !           369:        if (status_line) {      /* Decode full HTTP response */
        !           370:            /*
        !           371:            ** We now have a terminated server status line, and we have
        !           372:            ** checked that it is most probably a legal one.  Parse it.
        !           373:            */
        !           374:            char server_version[VERSION_LENGTH+1];
        !           375:            int server_status;
        !           376: 
        !           377:            if (TRACE)
        !           378:                fprintf(stderr, "HTTP Status Line: Rx: %.70s\n", status_line);
1.17      timbl     379:     
1.21    ! luotonen  380:            sscanf(status_line, "%20s%d", server_version, &server_status);
1.2       timbl     381: 
1.21    ! luotonen  382:            format_in = HTAtom_for("www/mime");
1.7       timbl     383:     
1.21    ! luotonen  384:            switch (server_status / 100) {
1.2       timbl     385: 
1.21    ! luotonen  386:              default:          /* bad number */
        !           387:                HTAlert("Unknown status reply from server!");
        !           388:                break;
1.17      timbl     389:                    
1.21    ! luotonen  390:              case 3:           /* Various forms of redirection */
        !           391:                HTAlert(
1.17      timbl     392:            "Redirection response from server is not handled by this client");
1.21    ! luotonen  393:                break;
1.17      timbl     394:                    
1.21    ! luotonen  395:              case 4:           /* Access Authorization problem */
1.14      luotonen  396: #ifdef ACCESS_AUTH
1.21    ! luotonen  397:                switch (server_status) {
        !           398:                  case 401:
        !           399:                    parse_401_headers(request, isoc);
        !           400: 
        !           401:                    if (TRACE) fprintf(stderr, "%s %d %s\n",
        !           402:                                       "HTTP: close socket", s,
        !           403:                                       "to retry with Access Authorization");
        !           404:                    HTInputSocket_free(isoc);
        !           405:                    (void)NETCLOSE(s);
        !           406:                    if (HTAA_retryWithAuth(request, &HTLoadHTTP)) {
        !           407:                        status = HT_LOADED;/* @@ THIS ONLY WORKS ON LINEMODE */
        !           408:                        goto clean_up;
        !           409:                    }
        !           410:                    /* else falltrough */
        !           411:                  default:
1.14      luotonen  412:                    {
1.21    ! luotonen  413:                        char *p1 = HTParse(gate ? gate : arg, "",
        !           414:                                           PARSE_HOST);
        !           415:                        char * message;
        !           416: 
        !           417:                        if (!(message = (char*)malloc(strlen(status_line) +
        !           418:                                                      strlen(p1) + 100)))
        !           419:                            outofmem(__FILE__, "HTTP 4xx status");
1.14      luotonen  420:                        sprintf(message,
1.21    ! luotonen  421:                                "HTTP server at %s replies:\n%s\n\n%s\n",
        !           422:                                p1, status_line,
        !           423:                                ((server_status == 401) 
        !           424:                                 ? "Access Authorization package giving up.\n"
        !           425:                                 : ""));
        !           426:                        status = HTLoadError(request->output_stream,
        !           427:                                             server_status, message);
1.14      luotonen  428:                        free(message);
                    429:                        free(p1);
                    430:                        goto clean_up;
                    431:                    }
1.21    ! luotonen  432:                } /* switch */
        !           433:                goto clean_up;
        !           434:                break;
        !           435: #else
        !           436:                /* case 4 without Access Authorization falls through */
        !           437:                /* to case 5 (previously "I think I goofed").  -- AL */
        !           438: #endif /* ACCESS_AUTH */
        !           439: 
        !           440:              case 5:           /* I think you goofed */
        !           441:                {
        !           442:                    char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
        !           443:                    char * message = (char*)malloc(strlen(status_line) + 
        !           444:                                                   strlen(p1) + 100);
        !           445:                    if (!message) outofmem(__FILE__, "HTTP 5xx status");
        !           446:                    sprintf(message,
        !           447:                            "HTTP server at %s replies:\n%s", p1, status_line);
        !           448:                    status = HTLoadError(request->output_stream,
        !           449:                                         server_status, message);
        !           450:                    free(message);
        !           451:                    free(p1);
        !           452:                    goto clean_up;
        !           453:                }
        !           454:                break;
1.17      timbl     455:                    
1.21    ! luotonen  456:              case 2:           /* Good: Got MIME object */
        !           457:                break;
1.17      timbl     458:     
1.21    ! luotonen  459:            } /* switch on response code */
1.17      timbl     460:            
1.21    ! luotonen  461:        } /* Full HTTP reply */
1.17      timbl     462:            
                    463:     
1.3       timbl     464: /*     Set up the stream stack to handle the body of the message
                    465: */
1.21    ! luotonen  466: 
1.13      duns      467: copy:
1.21    ! luotonen  468: 
1.18      timbl     469:        target = HTStreamStack(format_in, request);
1.21    ! luotonen  470: 
1.17      timbl     471:        if (!target) {
                    472:            char buffer[1024];  /* @@@@@@@@ */
                    473:            sprintf(buffer, "Sorry, no known way of converting %s to %s.",
                    474:                    HTAtom_name(format_in), HTAtom_name(request->output_format));
                    475:            fprintf(stderr, "HTTP: %s", buffer);
                    476:            status = HTLoadError(request->output_stream, 501, buffer);
                    477:            goto clean_up;
                    478:        }
                    479:     
1.19      timbl     480:         /* @@ Bug: The decision of whether or not to cache should also be
1.21    ! luotonen  481:        ** made contingent on a IP address match or non match.
        !           482:        */
1.19      timbl     483:         if (HTCacheHTTP) {
                    484:            target = HTTee(target, HTCacheWriter(request, NULL, format_in,
1.21    ! luotonen  485:                                                 request->output_format,
        !           486:                                                 request->output_stream));
1.19      timbl     487:        }
                    488:        
1.11      timbl     489: /*     Push the data down the stream
1.3       timbl     490: **     We have to remember the end of the first buffer we just read
1.2       timbl     491: */
1.17      timbl     492:        if (format_in == WWW_HTML) {
                    493:            target = HTNetToText(target);       /* Pipe through CR stripper */
                    494:        }
1.21    ! luotonen  495: 
1.17      timbl     496:        (*target->isa->put_block)(target,
1.21    ! luotonen  497:                                  isoc->input_pointer,
        !           498:                                  isoc->input_limit - isoc->input_pointer);
        !           499:        HTInputSocket_free(isoc);
1.17      timbl     500:        HTCopy(s, target);
                    501:            
                    502:        (*target->isa->free)(target);
                    503:        status = HT_LOADED;
1.11      timbl     504:     
1.2       timbl     505: /*     Clean up
1.1       timbl     506: */
1.17      timbl     507:        
                    508: clean_up: 
                    509:        if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
                    510:        (void) NETCLOSE(s);
                    511:     
                    512:        return status;                  /* Good return */
1.3       timbl     513:     
1.17      timbl     514:     } /* read response */
                    515: } /* load HTTP */
1.1       timbl     516: 
                    517: /*     Protocol descriptor
                    518: */
                    519: 
1.17      timbl     520: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };
1.21    ! luotonen  521: 

Webmaster