Annotation of libwww/Library/src/HTTP.c, revision 1.21
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
1.12 timbl 17: /* MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
18: ** file from the URL. It is STRICTLY illegal to do this!
19: */
20:
1.2 timbl 21: /* Implements:
22: */
23: #include "HTTP.h"
24:
25: #define HTTP_VERSION "HTTP/1.0"
26: #define HTTP2 /* Version is greater than 0.9 */
27:
28: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
29: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
30: #define VERSION_LENGTH 20 /* for returned protocol version */
31:
32: /* Uses:
33: */
1.1 timbl 34: #include "HTParse.h"
35: #include "HTUtils.h"
36: #include "tcp.h"
37: #include "HTTCP.h"
38: #include "HTFormat.h"
1.2 timbl 39: #include <ctype.h>
40: #include "HTAlert.h"
41: #include "HTMIME.h"
1.5 timbl 42: #include "HTML.h" /* SCW */
43: #include "HTInit.h" /* SCW */
1.21 ! luotonen 44: #include "HTAccess.h" /* HTRequest */
1.14 luotonen 45: #include "HTAABrow.h" /* Access Authorization */
1.20 timbl 46: #include "HTTee.h" /* Tee off a cache stream */
47: #include "HTFWriter.h" /* Write to cache file */
1.1 timbl 48:
1.2 timbl 49: struct _HTStream {
50: HTStreamClass * isa; /* all we need to know */
51: };
52:
53:
1.6 timbl 54: extern char * HTAppName; /* Application name: please supply */
55: extern char * HTAppVersion; /* Application version: please supply */
56:
1.19 timbl 57: PUBLIC BOOL HTCacheHTTP = YES; /* Enable caching of HTTP-retrieved files */
58:
1.21 ! luotonen 59:
! 60: PRIVATE void parse_401_headers ARGS2(HTRequest *, req,
! 61: HTInputSocket *, isoc)
! 62: {
! 63: HTAAScheme scheme;
! 64: char *line;
! 65: int num_schemes = 0;
! 66: HTList *valid_schemes = HTList_new();
! 67: HTAssocList **scheme_specifics = NULL;
! 68: char *template = NULL;
! 69:
! 70: /* Read server reply header lines */
! 71:
! 72: if (TRACE)
! 73: fprintf(stderr, "Server 401 reply header lines:\n");
! 74:
! 75: while (NULL != (line = HTInputSocket_getUnfoldedLine(isoc)) &&
! 76: *line != 0) {
! 77:
! 78: if (TRACE) fprintf(stderr, "%s\n", line);
! 79:
! 80: if (strchr(line, ':')) { /* Valid header line */
! 81:
! 82: char *p = line;
! 83: char *fieldname = HTNextField(&p);
! 84: char *arg1 = HTNextField(&p);
! 85: char *args = p;
! 86:
! 87: if (0==strcasecomp(fieldname, "WWW-Authenticate:")) {
! 88: if (HTAA_UNKNOWN != (scheme = HTAAScheme_enum(arg1))) {
! 89: HTList_addObject(valid_schemes, (void*)scheme);
! 90: if (!scheme_specifics) {
! 91: int i;
! 92: scheme_specifics = (HTAssocList**)
! 93: malloc(HTAA_MAX_SCHEMES * sizeof(HTAssocList*));
! 94: if (!scheme_specifics)
! 95: outofmem(__FILE__, "parse_401_headers");
! 96: for (i=0; i < HTAA_MAX_SCHEMES; i++)
! 97: scheme_specifics[i] = NULL;
! 98: }
! 99: scheme_specifics[scheme] = HTAA_parseArgList(args);
! 100: num_schemes++;
! 101: }
! 102: else if (TRACE) {
! 103: fprintf(stderr, "Unknown scheme `%s' %s\n",
! 104: (arg1 ? arg1 : "(null)"),
! 105: "in WWW-Authenticate: field");
! 106: }
! 107: }
! 108:
! 109: else if (0==strcasecomp(fieldname, "WWW-Protection-Template:")) {
! 110: if (TRACE)
! 111: fprintf(stderr, "Protection template set to `%s'\n", arg1);
! 112: StrAllocCopy(template, arg1);
! 113: }
! 114:
! 115: } /* if a valid header line */
! 116: else if (TRACE) {
! 117: fprintf(stderr, "Invalid header line `%s' ignored\n", line);
! 118: } /* else invalid header line */
! 119: } /* while header lines remain */
! 120:
! 121: req->valid_schemes = valid_schemes;
! 122: req->scheme_specifics = scheme_specifics;
! 123: req->prot_template = template;
! 124: }
! 125:
! 126:
! 127:
1.1 timbl 128: /* Load Document from HTTP Server HTLoadHTTP()
129: ** ==============================
130: **
131: ** Given a hypertext address, this routine loads a document.
132: **
133: **
134: ** On entry,
135: ** arg is the hypertext reference of the article to be loaded.
136: **
137: ** On exit,
138: ** returns >=0 If no error, a good socket number
139: ** <0 Error.
140: **
141: ** The socket must be closed by the caller after the document has been
142: ** read.
143: **
144: */
1.19 timbl 145: PUBLIC int HTLoadHTTP ARGS1 (HTRequest *, request)
1.1 timbl 146: {
1.19 timbl 147: CONST char * arg = HTAnchor_physical(request->anchor);
1.1 timbl 148: int s; /* Socket number for returned data */
149: int status; /* tcp return */
1.10 timbl 150: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 151: HTStream * target = NULL; /* Unconverted data */
152:
1.2 timbl 153: CONST char* gate = 0; /* disable this feature */
1.1 timbl 154: SockA soc_address; /* Binary network address */
155: SockA * sin = &soc_address;
1.2 timbl 156: BOOL extensions = YES; /* Assume good HTTP server */
1.17 timbl 157:
1.1 timbl 158: if (!arg) return -3; /* Bad if no name sepcified */
159: if (!*arg) return -2; /* Bad if name had zero length */
160:
161: /* Set up defaults:
162: */
163: #ifdef DECNET
1.2 timbl 164: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
165: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 166: #else /* Internet */
1.2 timbl 167: sin->sin_family = AF_INET; /* Family = internet, host order */
168: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 169: #endif
170:
1.10 timbl 171: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
172:
1.1 timbl 173: if (TRACE) {
174: if (gate) fprintf(stderr,
175: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
176: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
177: }
178:
179: /* Get node name and optional port number:
180: */
181: {
182: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
183: int status = HTParseInet(sin, p1); /* TBL 920622 */
184: free(p1);
185: if (status) return status; /* No such host for example */
186: }
187:
1.15 luotonen 188: /*
189: ** Compose authorization information (this was moved here
190: ** from after the making of the connection so that the connection
191: ** wouldn't have to wait while prompting username and password
192: ** from the user). -- AL 13.10.93
193: */
194: #ifdef ACCESS_AUTH
1.21 ! luotonen 195: StrAllocCopy(request->argument, arg);
! 196: HTAA_composeAuth(request);
! 197: if (TRACE) {
! 198: if (request->authorization)
! 199: fprintf(stderr, "HTTP: Sending Authorization: %s\n",
! 200: request->authorization);
! 201: else
! 202: fprintf(stderr, "HTTP: Not sending authorization (yet)\n");
1.15 luotonen 203: }
204: #endif /* ACCESS_AUTH */
1.1 timbl 205:
1.10 timbl 206: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 207: */
208: #ifdef DECNET
209: s = socket(AF_DECnet, SOCK_STREAM, 0);
210: #else
211: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
212: #endif
213: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
214: if (status < 0) {
215: if (TRACE) fprintf(stderr,
216: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
1.17 timbl 217:
1.1 timbl 218: return HTInetStatus("connect");
219: }
220:
221: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
222:
1.17 timbl 223:
224: /* Compose and send command
225: ** ------------------------
226: */
227: {
228: char *command; /* The whole command */
229:
1.1 timbl 230: /* Ask that node for the document,
231: ** omitting the host name & anchor if not gatewayed.
232: */
1.17 timbl 233: if (gate) {
234: command = malloc(4 + strlen(arg)+ 2 + 31);
235: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
236: strcpy(command, "GET ");
237: strcat(command, arg);
238: } else { /* not gatewayed */
239: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
240: command = malloc(4 + strlen(p1)+ 2 + 31);
241: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
242: strcpy(command, "GET ");
243: strcat(command, p1);
244: free(p1);
245: }
1.2 timbl 246: #ifdef HTTP2
1.17 timbl 247: if (extensions) {
248: strcat(command, " ");
249: strcat(command, HTTP_VERSION);
250: }
1.2 timbl 251: #endif
1.17 timbl 252:
253: strcat(command, crlf); /* CR LF, as in rfc 977 */
254:
255: if (extensions) {
1.21 ! luotonen 256:
1.17 timbl 257: int i;
258: HTAtom * present = WWW_PRESENT;
259: char line[256]; /*@@@@ */
1.21 ! luotonen 260: HTList *conversions[2];
! 261:
! 262: if (!HTConversions) HTFormatInit(HTConversions);
! 263: conversions[0] = HTConversions;
! 264: conversions[1] = request->conversions;
! 265:
! 266: for (i=0; i<2; i++) {
! 267: HTList *cur = conversions[i];
! 268: HTPresentation *pres;
! 269:
! 270: while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
! 271: if (pres->rep_out == present) {
! 272: if (pres->quality != 1.0) {
! 273: sprintf(line, "Accept: %s q=%.3f%c%c",
! 274: HTAtom_name(pres->rep),
! 275: pres->quality, CR, LF);
! 276: } else {
! 277: sprintf(line, "Accept: %s%c%c",
! 278: HTAtom_name(pres->rep), CR, LF);
! 279: }
! 280: StrAllocCat(command, line);
1.17 timbl 281: }
282: }
1.2 timbl 283: }
1.17 timbl 284:
285: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
286: HTAppName ? HTAppName : "unknown",
287: HTAppVersion ? HTAppVersion : "0.0",
288: HTLibraryVersion, CR, LF);
289: StrAllocCat(command, line);
290:
1.14 luotonen 291: #ifdef ACCESS_AUTH
1.21 ! luotonen 292: if (request->authorization != NULL) {
! 293: sprintf(line, "Authorization: %s%c%c",
! 294: request->authorization, CR, LF);
1.17 timbl 295: StrAllocCat(command, line);
296: }
297: #endif /* ACCESS_AUTH */
1.14 luotonen 298: }
1.17 timbl 299:
300: StrAllocCat(command, crlf); /* Blank line means "end" */
301:
302: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
303:
304: /* Translate into ASCII if necessary
305: */
1.4 timbl 306: #ifdef NOT_ASCII
1.17 timbl 307: {
308: char * p;
309: for(p = command; *p; p++) {
310: *p = TOASCII(*p);
311: }
1.1 timbl 312: }
1.3 timbl 313: #endif
1.17 timbl 314:
315: status = NETWRITE(s, command, (int)strlen(command));
316: free(command);
317: if (status<0) {
318: if (TRACE) fprintf(stderr,
319: "HTTPAccess: Unable to send command.\n");
1.1 timbl 320: return HTInetStatus("send");
1.17 timbl 321: }
322: } /* compose and send command */
323:
1.2 timbl 324:
1.17 timbl 325: /* Read the response
326: ** -----------------
1.11 timbl 327: **
328: ** HTTP0 servers must return ASCII style text, though it can in
329: ** principle be just text without any markup at all.
330: ** Full HTTP servers must return a response
331: ** line and RFC822 style header. The response must therefore in
332: ** either case have a CRLF somewhere soon.
333: **
334: ** This is the theory. In practice, there are (1993) unfortunately
335: ** many binary documents just served up with HTTP0.9. This
336: ** means we have to preserve the binary buffer (on the assumption that
337: ** conversion from ASCII may lose information) in case it turns
338: ** out that we want the binary original.
1.2 timbl 339: */
1.3 timbl 340:
1.21 ! luotonen 341: { /* read response */
! 342:
1.17 timbl 343: HTFormat format_in; /* Format arriving in the message */
1.21 ! luotonen 344: HTInputSocket *isoc = HTInputSocket_new(s);
! 345: char * status_line = HTInputSocket_getStatusLine(isoc);
1.2 timbl 346:
1.11 timbl 347: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
348: ** First time we have enough, look at the stub in ASCII
349: ** and get out of here if it doesn't look right.
350: **
351: ** We also check for characters above 128 in the first few bytes, and
352: ** if we find them we forget the html default.
353: **
354: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
355: ** will be taken as a HTTP 1.0 server. Failure.
356: ** An HTTP 0.9 server returning a binary document with
357: ** characters < 128 will be read as ASCII.
358: */
1.21 ! luotonen 359: if (!status_line) { /* HTTP0 response */
! 360: if (HTInputSocket_seemsBinary(isoc)) {
! 361: format_in = HTAtom_for("www/unknown");
! 362: }
! 363: else {
! 364: format_in = WWW_HTML;
! 365: }
! 366: goto copy;
! 367: } /* end kludge */
! 368:
! 369: if (status_line) { /* Decode full HTTP response */
! 370: /*
! 371: ** We now have a terminated server status line, and we have
! 372: ** checked that it is most probably a legal one. Parse it.
! 373: */
! 374: char server_version[VERSION_LENGTH+1];
! 375: int server_status;
! 376:
! 377: if (TRACE)
! 378: fprintf(stderr, "HTTP Status Line: Rx: %.70s\n", status_line);
1.17 timbl 379:
1.21 ! luotonen 380: sscanf(status_line, "%20s%d", server_version, &server_status);
1.2 timbl 381:
1.21 ! luotonen 382: format_in = HTAtom_for("www/mime");
1.7 timbl 383:
1.21 ! luotonen 384: switch (server_status / 100) {
1.2 timbl 385:
1.21 ! luotonen 386: default: /* bad number */
! 387: HTAlert("Unknown status reply from server!");
! 388: break;
1.17 timbl 389:
1.21 ! luotonen 390: case 3: /* Various forms of redirection */
! 391: HTAlert(
1.17 timbl 392: "Redirection response from server is not handled by this client");
1.21 ! luotonen 393: break;
1.17 timbl 394:
1.21 ! luotonen 395: case 4: /* Access Authorization problem */
1.14 luotonen 396: #ifdef ACCESS_AUTH
1.21 ! luotonen 397: switch (server_status) {
! 398: case 401:
! 399: parse_401_headers(request, isoc);
! 400:
! 401: if (TRACE) fprintf(stderr, "%s %d %s\n",
! 402: "HTTP: close socket", s,
! 403: "to retry with Access Authorization");
! 404: HTInputSocket_free(isoc);
! 405: (void)NETCLOSE(s);
! 406: if (HTAA_retryWithAuth(request, &HTLoadHTTP)) {
! 407: status = HT_LOADED;/* @@ THIS ONLY WORKS ON LINEMODE */
! 408: goto clean_up;
! 409: }
! 410: /* else falltrough */
! 411: default:
1.14 luotonen 412: {
1.21 ! luotonen 413: char *p1 = HTParse(gate ? gate : arg, "",
! 414: PARSE_HOST);
! 415: char * message;
! 416:
! 417: if (!(message = (char*)malloc(strlen(status_line) +
! 418: strlen(p1) + 100)))
! 419: outofmem(__FILE__, "HTTP 4xx status");
1.14 luotonen 420: sprintf(message,
1.21 ! luotonen 421: "HTTP server at %s replies:\n%s\n\n%s\n",
! 422: p1, status_line,
! 423: ((server_status == 401)
! 424: ? "Access Authorization package giving up.\n"
! 425: : ""));
! 426: status = HTLoadError(request->output_stream,
! 427: server_status, message);
1.14 luotonen 428: free(message);
429: free(p1);
430: goto clean_up;
431: }
1.21 ! luotonen 432: } /* switch */
! 433: goto clean_up;
! 434: break;
! 435: #else
! 436: /* case 4 without Access Authorization falls through */
! 437: /* to case 5 (previously "I think I goofed"). -- AL */
! 438: #endif /* ACCESS_AUTH */
! 439:
! 440: case 5: /* I think you goofed */
! 441: {
! 442: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
! 443: char * message = (char*)malloc(strlen(status_line) +
! 444: strlen(p1) + 100);
! 445: if (!message) outofmem(__FILE__, "HTTP 5xx status");
! 446: sprintf(message,
! 447: "HTTP server at %s replies:\n%s", p1, status_line);
! 448: status = HTLoadError(request->output_stream,
! 449: server_status, message);
! 450: free(message);
! 451: free(p1);
! 452: goto clean_up;
! 453: }
! 454: break;
1.17 timbl 455:
1.21 ! luotonen 456: case 2: /* Good: Got MIME object */
! 457: break;
1.17 timbl 458:
1.21 ! luotonen 459: } /* switch on response code */
1.17 timbl 460:
1.21 ! luotonen 461: } /* Full HTTP reply */
1.17 timbl 462:
463:
1.3 timbl 464: /* Set up the stream stack to handle the body of the message
465: */
1.21 ! luotonen 466:
1.13 duns 467: copy:
1.21 ! luotonen 468:
1.18 timbl 469: target = HTStreamStack(format_in, request);
1.21 ! luotonen 470:
1.17 timbl 471: if (!target) {
472: char buffer[1024]; /* @@@@@@@@ */
473: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
474: HTAtom_name(format_in), HTAtom_name(request->output_format));
475: fprintf(stderr, "HTTP: %s", buffer);
476: status = HTLoadError(request->output_stream, 501, buffer);
477: goto clean_up;
478: }
479:
1.19 timbl 480: /* @@ Bug: The decision of whether or not to cache should also be
1.21 ! luotonen 481: ** made contingent on a IP address match or non match.
! 482: */
1.19 timbl 483: if (HTCacheHTTP) {
484: target = HTTee(target, HTCacheWriter(request, NULL, format_in,
1.21 ! luotonen 485: request->output_format,
! 486: request->output_stream));
1.19 timbl 487: }
488:
1.11 timbl 489: /* Push the data down the stream
1.3 timbl 490: ** We have to remember the end of the first buffer we just read
1.2 timbl 491: */
1.17 timbl 492: if (format_in == WWW_HTML) {
493: target = HTNetToText(target); /* Pipe through CR stripper */
494: }
1.21 ! luotonen 495:
1.17 timbl 496: (*target->isa->put_block)(target,
1.21 ! luotonen 497: isoc->input_pointer,
! 498: isoc->input_limit - isoc->input_pointer);
! 499: HTInputSocket_free(isoc);
1.17 timbl 500: HTCopy(s, target);
501:
502: (*target->isa->free)(target);
503: status = HT_LOADED;
1.11 timbl 504:
1.2 timbl 505: /* Clean up
1.1 timbl 506: */
1.17 timbl 507:
508: clean_up:
509: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
510: (void) NETCLOSE(s);
511:
512: return status; /* Good return */
1.3 timbl 513:
1.17 timbl 514: } /* read response */
515: } /* load HTTP */
1.1 timbl 516:
517: /* Protocol descriptor
518: */
519:
1.17 timbl 520: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0, 0 };
1.21 ! luotonen 521:
Webmaster