Annotation of libwww/Library/src/HTTP.c, revision 1.13
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 timbl 3: **
4: ** Bugs:
5: ** Not implemented:
6: ** Forward
7: ** Redirection
8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 timbl 16:
1.12 timbl 17: /* MOSAIC_HACK2 is a kludge to guess the file type of trabsferred
18: ** file from the URL. It is STRICTLY illegal to do this!
19: */
20:
1.2 timbl 21: /* Implements:
22: */
23: #include "HTTP.h"
24:
25: #define HTTP_VERSION "HTTP/1.0"
26: #define HTTP2 /* Version is greater than 0.9 */
27:
28: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
29: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
30: #define VERSION_LENGTH 20 /* for returned protocol version */
31:
32: /* Uses:
33: */
1.1 timbl 34: #include "HTParse.h"
35: #include "HTUtils.h"
36: #include "tcp.h"
37: #include "HTTCP.h"
38: #include "HTFormat.h"
1.2 timbl 39: #include <ctype.h>
40: #include "HTAlert.h"
41: #include "HTMIME.h"
1.5 timbl 42: #include "HTML.h" /* SCW */
43: #include "HTInit.h" /* SCW */
1.1 timbl 44:
1.2 timbl 45: struct _HTStream {
46: HTStreamClass * isa; /* all we need to know */
47: };
48:
49:
1.6 timbl 50: extern char * HTAppName; /* Application name: please supply */
51: extern char * HTAppVersion; /* Application version: please supply */
52:
1.1 timbl 53: /* Load Document from HTTP Server HTLoadHTTP()
54: ** ==============================
55: **
56: ** Given a hypertext address, this routine loads a document.
57: **
58: **
59: ** On entry,
60: ** arg is the hypertext reference of the article to be loaded.
61: ** gate is nill if no gateway, else the gateway address.
62: **
63: ** On exit,
64: ** returns >=0 If no error, a good socket number
65: ** <0 Error.
66: **
67: ** The socket must be closed by the caller after the document has been
68: ** read.
69: **
70: */
1.2 timbl 71: PUBLIC int HTLoadHTTP ARGS4 (
72: CONST char *, arg,
73: /* CONST char *, gate, */
74: HTParentAnchor *, anAnchor,
75: HTFormat, format_out,
76: HTStream*, sink)
1.1 timbl 77: {
78: int s; /* Socket number for returned data */
79: char *command; /* The whole command */
1.3 timbl 80: char * eol = 0; /* End of line if found */
1.7 timbl 81: char * start_of_data; /* Start of body of reply */
1.11 timbl 82: int length; /* Number of valid bytes in buffer */
1.1 timbl 83: int status; /* tcp return */
1.10 timbl 84: char crlf[3]; /* A CR LF equivalent string */
1.3 timbl 85: HTStream * target = NULL; /* Unconverted data */
86: HTFormat format_in; /* Format arriving in the message */
87:
1.2 timbl 88: CONST char* gate = 0; /* disable this feature */
1.1 timbl 89: SockA soc_address; /* Binary network address */
90: SockA * sin = &soc_address;
1.2 timbl 91: BOOL had_header = NO; /* Have we had at least one header? */
1.11 timbl 92: char * text_buffer = NULL;
93: char * binary_buffer = NULL;
1.2 timbl 94: BOOL extensions = YES; /* Assume good HTTP server */
1.1 timbl 95: if (!arg) return -3; /* Bad if no name sepcified */
96: if (!*arg) return -2; /* Bad if name had zero length */
97:
98: /* Set up defaults:
99: */
100: #ifdef DECNET
1.2 timbl 101: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
102: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 103: #else /* Internet */
1.2 timbl 104: sin->sin_family = AF_INET; /* Family = internet, host order */
105: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 106: #endif
107:
1.10 timbl 108: sprintf(crlf, "%c%c", CR, LF); /* To be corect on Mac, VM, etc */
109:
1.1 timbl 110: if (TRACE) {
111: if (gate) fprintf(stderr,
112: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
113: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
114: }
115:
116: /* Get node name and optional port number:
117: */
118: {
119: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
120: int status = HTParseInet(sin, p1); /* TBL 920622 */
121: free(p1);
122: if (status) return status; /* No such host for example */
123: }
124:
1.2 timbl 125: retry:
1.1 timbl 126:
1.10 timbl 127: /* Now, let's get a socket set up from the server for the data:
1.1 timbl 128: */
129: #ifdef DECNET
130: s = socket(AF_DECnet, SOCK_STREAM, 0);
131: #else
132: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
133: #endif
134: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
135: if (status < 0) {
136: if (TRACE) fprintf(stderr,
137: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
138: /* free(command); BUG OUT TBL 921121 */
139: return HTInetStatus("connect");
140: }
141:
142: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
143:
144: /* Ask that node for the document,
145: ** omitting the host name & anchor if not gatewayed.
146: */
147: if (gate) {
1.2 timbl 148: command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 149: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
150: strcpy(command, "GET ");
151: strcat(command, arg);
152: } else { /* not gatewayed */
153: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 timbl 154: command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 155: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
156: strcpy(command, "GET ");
157: strcat(command, p1);
158: free(p1);
159: }
1.2 timbl 160: #ifdef HTTP2
161: if (extensions) {
162: strcat(command, " ");
163: strcat(command, HTTP_VERSION);
164: }
165: #endif
1.10 timbl 166:
167: strcat(command, crlf); /* CR LF, as in rfc 977 */
1.1 timbl 168:
1.2 timbl 169: if (extensions) {
170:
171: int n;
172: int i;
173: HTAtom * present = WWW_PRESENT;
174: char line[256]; /*@@@@ */
175:
176: if (!HTPresentations) HTFormatInit();
177: n = HTList_count(HTPresentations);
178:
179: for(i=0; i<n; i++) {
180: HTPresentation * pres = HTList_objectAt(HTPresentations, i);
181: if (pres->rep_out == present) {
182: if (pres->quality != 1.0) {
1.3 timbl 183: sprintf(line, "Accept: %s q=%.3f%c%c",
184: HTAtom_name(pres->rep), pres->quality, CR, LF);
1.2 timbl 185: } else {
1.3 timbl 186: sprintf(line, "Accept: %s%c%c",
187: HTAtom_name(pres->rep), CR, LF);
1.2 timbl 188: }
189: StrAllocCat(command, line);
190:
191: }
192: }
1.6 timbl 193:
194: sprintf(line, "User-Agent: %s/%s libwww/%s%c%c",
195: HTAppName ? HTAppName : "unknown",
196: HTAppVersion ? HTAppVersion : "0.0",
197: HTLibraryVersion, CR, LF);
198: StrAllocCat(command, line);
1.2 timbl 199: }
1.3 timbl 200:
1.10 timbl 201: StrAllocCat(command, crlf); /* Blank line means "end" */
202:
203: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
204:
205: /* Translate into ASCII if necessary
206: */
1.4 timbl 207: #ifdef NOT_ASCII
1.1 timbl 208: {
209: char * p;
210: for(p = command; *p; p++) {
211: *p = TOASCII(*p);
212: }
1.4 timbl 213: }
1.3 timbl 214: #endif
1.1 timbl 215:
216: status = NETWRITE(s, command, (int)strlen(command));
217: free(command);
218: if (status<0) {
219: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
220: return HTInetStatus("send");
221: }
222:
1.2 timbl 223:
1.7 timbl 224: /* Read the first line of the response
225: ** -----------------------------------
1.11 timbl 226: **
227: ** HTTP0 servers must return ASCII style text, though it can in
228: ** principle be just text without any markup at all.
229: ** Full HTTP servers must return a response
230: ** line and RFC822 style header. The response must therefore in
231: ** either case have a CRLF somewhere soon.
232: **
233: ** This is the theory. In practice, there are (1993) unfortunately
234: ** many binary documents just served up with HTTP0.9. This
235: ** means we have to preserve the binary buffer (on the assumption that
236: ** conversion from ASCII may lose information) in case it turns
237: ** out that we want the binary original.
1.2 timbl 238: */
1.3 timbl 239:
1.2 timbl 240: {
241:
242: /* Get numeric status etc */
243:
244: BOOL end_of_file = NO;
245: HTAtom * encoding = HTAtom_for("7bit");
246: int buffer_length = INIT_LINE_SIZE; /* Why not? */
247:
1.11 timbl 248: binary_buffer = (char *) malloc(buffer_length * sizeof(char));
249: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
250: text_buffer = (char *) malloc(buffer_length * sizeof(char));
251: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
252: length = 0;
1.2 timbl 253:
1.7 timbl 254: do { /* Loop to read in the first line */
1.2 timbl 255:
256: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
257:
258: if (buffer_length - length < LINE_EXTEND_THRESH) {
259: buffer_length = buffer_length + buffer_length;
1.11 timbl 260: binary_buffer = (char *) realloc(
261: binary_buffer, buffer_length * sizeof(char));
262: if (!binary_buffer) outofmem(__FILE__, "HTLoadHTTP");
263: text_buffer = (char *) realloc(
264: text_buffer, buffer_length * sizeof(char));
265: if (!text_buffer) outofmem(__FILE__, "HTLoadHTTP");
1.2 timbl 266: }
1.11 timbl 267: status = NETREAD(s, binary_buffer + length,
1.2 timbl 268: buffer_length - length -1);
269: if (status < 0) {
270: HTAlert("Unexpected network read error on response");
1.9 timbl 271: NETCLOSE(s);
1.2 timbl 272: return status;
273: }
1.10 timbl 274:
275: if (TRACE) fprintf(stderr, "HTTP: read returned %d bytes.\n",
276: status);
277:
1.2 timbl 278: if (status == 0) {
279: end_of_file = YES;
280: break;
281: }
1.11 timbl 282: binary_buffer[length+status] = 0;
283:
284:
285: /* Make an ASCII *copy* of the buffer
286: */
1.2 timbl 287: #ifdef NOT_ASCII
1.10 timbl 288: if (TRACE) fprintf(stderr, "Local codes CR=%d, LF=%d\n", CR, LF);
1.11 timbl 289: #endif
1.2 timbl 290: {
291: char * p;
1.11 timbl 292: char * q;
293: for(p = binary_buffer+length, q=text_buffer+length;
294: *p; p++, q++) {
295: *q = FROMASCII(*p);
296: }
297:
298: *q++ = 0;
299: }
300:
301: /* Kludge to trap binary responses from illegal HTTP0.9 servers.
302: ** First time we have enough, look at the stub in ASCII
303: ** and get out of here if it doesn't look right.
304: **
305: ** We also check for characters above 128 in the first few bytes, and
306: ** if we find them we forget the html default.
307: **
308: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
309: ** will be taken as a HTTP 1.0 server. Failure.
310: ** An HTTP 0.9 server returning a binary document with
311: ** characters < 128 will be read as ASCII.
312: */
313: #define STUB_LENGTH 20
314: if (length < STUB_LENGTH && length+status >= STUB_LENGTH) {
315: if(strncmp("HTTP/", text_buffer, 5)!=0) {
316: char *p;
317: start_of_data = text_buffer; /* reparse whole reply */
318: for(p=binary_buffer; p <binary_buffer+STUB_LENGTH;p++) {
1.13 ! duns 319: if (((int)*p)&128) {
1.11 timbl 320: format_in = HTAtom_for("www/unknown");
1.13 ! duns 321: length = length + status;
! 322: goto copy; /* out of while loop */
1.11 timbl 323: }
324: }
1.2 timbl 325: }
326: }
1.11 timbl 327: /* end kludge */
328:
329:
330: eol = strchr(text_buffer + length, 10);
331: if (eol) {
332: *eol = 0; /* Terminate the line */
333: if (eol[-1] = CR) eol[-1] = 0; /* Chop trailing CR */
334: }
1.2 timbl 335:
336: length = length + status;
337:
1.7 timbl 338: } while (!eol && !end_of_file); /* No LF */
339:
340: } /* Scope of loop variables */
1.2 timbl 341:
1.7 timbl 342:
343: /* We now have a terminated unfolded line. Parse it.
344: ** -------------------------------------------------
1.2 timbl 345: */
346:
1.11 timbl 347: if (TRACE)fprintf(stderr, "HTTP: Rx: %.70s\n", text_buffer);
1.7 timbl 348:
349: {
350: int fields;
351: char server_version [VERSION_LENGTH+1];
352: int server_status;
353:
1.2 timbl 354:
355: /* Kludge to work with old buggy servers. They can't handle the third word
356: ** so we try again without it.
357: */
1.7 timbl 358: if (extensions &&
1.11 timbl 359: 0==strcmp(text_buffer, /* Old buggy server? */
1.7 timbl 360: "Document address invalid or access not authorised")) {
361: extensions = NO;
1.11 timbl 362: if (binary_buffer) free(binary_buffer);
363: if (text_buffer) free(text_buffer);
1.7 timbl 364: if (TRACE) fprintf(stderr,
365: "HTTP: close socket %d to retry with HTTP0\n", s);
366: NETCLOSE(s);
367: goto retry; /* @@@@@@@@@@ */
368: }
1.11 timbl 369: /* end kludge */
1.2 timbl 370:
1.11 timbl 371: fields = sscanf(text_buffer, "%20s%d",
1.7 timbl 372: server_version,
373: &server_status);
374:
1.11 timbl 375: if (fields < 2 ||
376: strncmp(server_version, "HTTP/", 5)!=0) { /* HTTP0 reply */
1.7 timbl 377: format_in = WWW_HTML;
1.11 timbl 378: start_of_data = text_buffer; /* reread whole reply */
1.9 timbl 379: if (eol) *eol = '\n'; /* Reconstitute buffer */
1.2 timbl 380:
1.11 timbl 381: } else { /* Full HTTP reply */
1.7 timbl 382:
383: /* Decode full HTTP response */
384:
1.3 timbl 385: format_in = HTAtom_for("www/mime");
1.11 timbl 386: start_of_data = eol ? eol + 1 : text_buffer + length;
1.3 timbl 387:
1.2 timbl 388: switch (server_status / 100) {
389:
1.3 timbl 390: default: /* bad number */
391: HTAlert("Unknown status reply from server!");
392: break;
393:
1.2 timbl 394: case 3: /* Various forms of redirection */
1.7 timbl 395: HTAlert(
1.3 timbl 396: "Redirection response from server is not handled by this client");
397: break;
398:
1.2 timbl 399: case 4: /* "I think I goofed" */
400: case 5: /* I think you goofed */
1.6 timbl 401: {
402: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
403: char * message = (char*)malloc(
1.11 timbl 404: strlen(text_buffer)+strlen(p1) + 100);
1.6 timbl 405: if (!message) outofmem(__FILE__, "HTTP 5xx status");
406: sprintf(message,
1.11 timbl 407: "HTTP server at %s replies:\n%s", p1, text_buffer);
1.8 timbl 408: status = HTLoadError(sink, server_status, message);
1.6 timbl 409: free(message);
410: free(p1);
411: goto clean_up;
412: }
1.3 timbl 413: break;
1.2 timbl 414:
415: case 2: /* Good: Got MIME object */
416: break;
417:
1.7 timbl 418: } /* switch on response code */
419:
420: } /* Full HTTP reply */
421:
422: } /* scope of fields */
1.2 timbl 423:
1.3 timbl 424: /* Set up the stream stack to handle the body of the message
425: */
426:
1.13 ! duns 427: copy:
! 428:
1.3 timbl 429: target = HTStreamStack(format_in,
430: format_out,
431: sink , anAnchor);
432:
433: if (!target) {
434: char buffer[1024]; /* @@@@@@@@ */
1.11 timbl 435: if (binary_buffer) free(binary_buffer);
436: if (text_buffer) free(text_buffer);
1.3 timbl 437: sprintf(buffer, "Sorry, no known way of converting %s to %s.",
438: HTAtom_name(format_in), HTAtom_name(format_out));
439: fprintf(stderr, "HTTP: %s", buffer);
1.6 timbl 440: status = HTLoadError(sink, 501, buffer);
441: goto clean_up;
1.3 timbl 442: }
443:
444:
1.11 timbl 445: /* Push the data down the stream
1.3 timbl 446: ** We have to remember the end of the first buffer we just read
1.2 timbl 447: */
1.11 timbl 448: if (format_in == WWW_HTML) {
449: target = HTNetToText(target); /* Pipe through CR stripper */
450: }
451:
452: (*target->isa->put_block)(target,
453: binary_buffer + (start_of_data - text_buffer),
454: length - (start_of_data - text_buffer));
455: HTCopy(s, target);
1.3 timbl 456:
457: (*target->isa->free)(target);
1.8 timbl 458: status = HT_LOADED;
1.2 timbl 459:
460: /* Clean up
1.1 timbl 461: */
1.3 timbl 462:
1.6 timbl 463: clean_up:
1.11 timbl 464: if (binary_buffer) free(binary_buffer);
465: if (text_buffer) free(text_buffer);
1.3 timbl 466:
1.1 timbl 467: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
1.6 timbl 468: (void) NETCLOSE(s);
1.1 timbl 469:
1.8 timbl 470: return status; /* Good return */
1.3 timbl 471:
1.1 timbl 472: }
1.7 timbl 473:
1.1 timbl 474:
475: /* Protocol descriptor
476: */
477:
1.13 ! duns 478: GLOBALDEF PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };
Webmaster