Annotation of libwww/Library/src/HTTP.c, revision 1.2
1.1 timbl 1: /* HyperText Tranfer Protocol - Client implementation HTTP.c
2: ** ==========================
1.2 ! timbl 3: **
! 4: ** Bugs:
! 5: ** Not implemented:
! 6: ** Forward
! 7: ** Redirection
! 8: ** Error handling
1.1 timbl 9: */
10:
11: /* Module parameters:
12: ** -----------------
13: **
14: ** These may be undefined and redefined by syspec.h
15: */
1.2 ! timbl 16:
! 17: /* Implements:
! 18: */
! 19: #include "HTTP.h"
! 20:
! 21: #define HTTP_VERSION "HTTP/1.0"
! 22: #define HTTP2 /* Version is greater than 0.9 */
! 23:
! 24: #define INIT_LINE_SIZE 1024 /* Start with line buffer this big */
! 25: #define LINE_EXTEND_THRESH 256 /* Minimum read size */
! 26: #define VERSION_LENGTH 20 /* for returned protocol version */
! 27:
! 28: /* Uses:
! 29: */
1.1 timbl 30: #include "HTParse.h"
31: #include "HTUtils.h"
32: #include "tcp.h"
33: #include "HTTCP.h"
34: #include "HTFormat.h"
1.2 ! timbl 35: #include <ctype.h>
! 36: #include "HTAlert.h"
! 37: #include "HTMIME.h"
1.1 timbl 38:
39:
1.2 ! timbl 40: struct _HTStream {
! 41: HTStreamClass * isa; /* all we need to know */
! 42: };
! 43:
! 44:
1.1 timbl 45: /* Load Document from HTTP Server HTLoadHTTP()
46: ** ==============================
47: **
48: ** Given a hypertext address, this routine loads a document.
49: **
50: **
51: ** On entry,
52: ** arg is the hypertext reference of the article to be loaded.
53: ** gate is nill if no gateway, else the gateway address.
54: **
55: ** On exit,
56: ** returns >=0 If no error, a good socket number
57: ** <0 Error.
58: **
59: ** The socket must be closed by the caller after the document has been
60: ** read.
61: **
62: */
1.2 ! timbl 63: PUBLIC int HTLoadHTTP ARGS4 (
! 64: CONST char *, arg,
! 65: /* CONST char *, gate, */
! 66: HTParentAnchor *, anAnchor,
! 67: HTFormat, format_out,
! 68: HTStream*, sink)
1.1 timbl 69: {
70: int s; /* Socket number for returned data */
71: char *command; /* The whole command */
72: int status; /* tcp return */
1.2 ! timbl 73: CONST char* gate = 0; /* disable this feature */
! 74: HTFormat format = WWW_HTML; /* default is HTTP2 */
1.1 timbl 75: SockA soc_address; /* Binary network address */
76: SockA * sin = &soc_address;
1.2 ! timbl 77: BOOL had_header = NO; /* Have we had at least one header? */
! 78: char * line_buffer = NULL;
! 79: BOOL extensions = YES; /* Assume good HTTP server */
1.1 timbl 80: if (!arg) return -3; /* Bad if no name sepcified */
81: if (!*arg) return -2; /* Bad if name had zero length */
82:
83: /* Set up defaults:
84: */
85: #ifdef DECNET
1.2 ! timbl 86: sin->sdn_family = AF_DECnet; /* Family = DECnet, host order */
! 87: sin->sdn_objnum = DNP_OBJ; /* Default: http object number */
1.1 timbl 88: #else /* Internet */
1.2 ! timbl 89: sin->sin_family = AF_INET; /* Family = internet, host order */
! 90: sin->sin_port = htons(TCP_PORT); /* Default: http port */
1.1 timbl 91: #endif
92:
93: if (TRACE) {
94: if (gate) fprintf(stderr,
95: "HTTPAccess: Using gateway %s for %s\n", gate, arg);
96: else fprintf(stderr, "HTTPAccess: Direct access for %s\n", arg);
97: }
98:
99: /* Get node name and optional port number:
100: */
101: {
102: char *p1 = HTParse(gate ? gate : arg, "", PARSE_HOST);
103: int status = HTParseInet(sin, p1); /* TBL 920622 */
104: free(p1);
105: if (status) return status; /* No such host for example */
106: }
107:
1.2 ! timbl 108: retry:
1.1 timbl 109:
110: /* Now, let's get a socket set up from the server for the sgml data:
111: */
112: #ifdef DECNET
113: s = socket(AF_DECnet, SOCK_STREAM, 0);
114: #else
115: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
116: #endif
117: status = connect(s, (struct sockaddr*)&soc_address, sizeof(soc_address));
118: if (status < 0) {
119: #ifndef DECNET
120: /* This code is temporary backward-compatibility. It should
121: go away when no server runs on port 2784 alone */
122: if (sin->sin_port == htons(TCP_PORT)) { /* Try the old one */
123: if (TRACE) printf (
124: "HTTP: Port %d doesn't answer (errno = %d). Trying good old port %d...\n",
125: TCP_PORT, errno, OLD_TCP_PORT);
126: sin->sin_port = htons(OLD_TCP_PORT);
127: /* First close current socket and open a clean one */
128: status = NETCLOSE (s);
129: s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
130: status = connect(s, (struct sockaddr*)&soc_address,
131: sizeof(soc_address));
132: }
133: if (status < 0)
134: #endif
135: {
136: if (TRACE) fprintf(stderr,
137: "HTTP: Unable to connect to remote host for `%s' (errno = %d).\n", arg, errno);
138: /* free(command); BUG OUT TBL 921121 */
139: return HTInetStatus("connect");
140: }
141: }
142:
143: if (TRACE) fprintf(stderr, "HTTP connected, socket %d\n", s);
144:
145: /* Ask that node for the document,
146: ** omitting the host name & anchor if not gatewayed.
147: */
148: if (gate) {
1.2 ! timbl 149: command = malloc(4 + strlen(arg)+ 2 + 31);
1.1 timbl 150: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
151: strcpy(command, "GET ");
152: strcat(command, arg);
153: } else { /* not gatewayed */
154: char * p1 = HTParse(arg, "", PARSE_PATH|PARSE_PUNCTUATION);
1.2 ! timbl 155: command = malloc(4 + strlen(p1)+ 2 + 31);
1.1 timbl 156: if (command == NULL) outofmem(__FILE__, "HTLoadHTTP");
157: strcpy(command, "GET ");
158: strcat(command, p1);
159: free(p1);
160: }
1.2 ! timbl 161: #ifdef HTTP2
! 162: if (extensions) {
! 163: strcat(command, " ");
! 164: strcat(command, HTTP_VERSION);
! 165: }
! 166: #endif
! 167: strcat(command, "\r\n"); /* Include CR for telnet compat. */
1.1 timbl 168:
169:
1.2 ! timbl 170: #ifdef HTTP2
! 171: if (extensions) {
! 172:
! 173: int n;
! 174: int i;
! 175: HTAtom * present = WWW_PRESENT;
! 176: char line[256]; /*@@@@ */
! 177:
! 178: if (!HTPresentations) HTFormatInit();
! 179: n = HTList_count(HTPresentations);
! 180:
! 181: for(i=0; i<n; i++) {
! 182: HTPresentation * pres = HTList_objectAt(HTPresentations, i);
! 183: if (pres->rep_out == present) {
! 184: if (pres->quality != 1.0) {
! 185: sprintf(line, "Accept: %s q=%.3f\r\n",
! 186: HTAtom_name(pres->rep), pres->quality);
! 187: } else {
! 188: sprintf(line, "Accept: %s\r\n",
! 189: HTAtom_name(pres->rep));
! 190: }
! 191: StrAllocCat(command, line);
! 192:
! 193: }
! 194: }
! 195: }
1.1 timbl 196:
1.2 ! timbl 197: StrAllocCat(command, "\r\n"); /* BLANK LINE means "end" */
! 198:
! 199: #endif
! 200:
1.1 timbl 201: #ifdef NOT_ASCII
202: {
203: char * p;
204: for(p = command; *p; p++) {
205: *p = TOASCII(*p);
206: }
207: }
208: #endif
209:
1.2 ! timbl 210: if (TRACE) fprintf(stderr, "HTTP Tx: %s\n", command);
1.1 timbl 211: status = NETWRITE(s, command, (int)strlen(command));
212: free(command);
213: if (status<0) {
214: if (TRACE) fprintf(stderr, "HTTPAccess: Unable to send command.\n");
215: return HTInetStatus("send");
216: }
217:
1.2 ! timbl 218:
! 219: /* Now load the data: HTTP2 response parse
! 220: */
! 221: #ifdef HTTP2
! 222: {
! 223:
! 224: /* Get numeric status etc */
! 225:
! 226: int status;
! 227: int length = 0;
! 228: char * eol = 0;
! 229: BOOL end_of_file = NO;
! 230: HTFormat format = WWW_PLAINTEXT; /* default */
! 231: HTAtom * encoding = HTAtom_for("7bit");
! 232: int buffer_length = INIT_LINE_SIZE; /* Why not? */
! 233:
! 234: line_buffer = (char *) malloc(buffer_length * sizeof(char));
! 235: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
! 236:
! 237: for(;;) {
! 238:
! 239: int fields;
! 240: char server_version [VERSION_LENGTH+1];
! 241: int server_status;
! 242:
! 243: /* Extend line buffer if necessary for those crazy WAIS URLs ;-) */
! 244:
! 245: if (buffer_length - length < LINE_EXTEND_THRESH) {
! 246: buffer_length = buffer_length + buffer_length;
! 247: line_buffer = (char *) realloc(
! 248: line_buffer, buffer_length * sizeof(char));
! 249: if (!line_buffer) outofmem(__FILE__, "HTLoadHTTP");
! 250: }
! 251: status = NETREAD(s, line_buffer + length,
! 252: buffer_length - length -1);
! 253: if (status < 0) {
! 254: HTAlert("Unexpected network read error on response");
! 255: return status;
! 256: }
! 257: if (status == 0) {
! 258: end_of_file = YES;
! 259: break;
! 260: }
! 261: line_buffer[length+status] = 0;
! 262: #ifdef NOT_ASCII
! 263: {
! 264: char * p;
! 265: for(p = line_buffer+length; *p; p++) {
! 266: *p = FROMASCII(*p);
! 267: }
! 268: }
! 269: #endif
! 270: eol = strchr(line_buffer + length, '\n');
! 271: if (eol && *(eol-1) == '\r') *(eol-1) = ' ';
! 272:
! 273: length = length + status;
! 274:
! 275: if (!eol && !end_of_file) continue; /* No LF */
! 276:
! 277: *eol = 0; /* Terminate the line */
! 278:
! 279:
! 280: /* We now have a terminated unfolded line.
! 281: */
! 282:
! 283: if (TRACE)fprintf(stderr, "HTTP: Rx: %s\n", line_buffer);
! 284:
! 285: /* Kludge to work with old buggy servers. They can't handle the third word
! 286: ** so we try again without it.
! 287: */
! 288: if (extensions &&
! 289: 0==strcmp(line_buffer, /* Old buggy server? */
! 290: "Document address invalid or access not authorised")) {
! 291: extensions = NO;
! 292: if (line_buffer) free(line_buffer);
! 293: if (TRACE) fprintf(stderr,
! 294: "HTTP: close socket %d to retry with HTTP0\n", s);
! 295: NETCLOSE(s);
! 296: goto retry; /* @@@@@@@@@@ */
! 297: }
! 298:
! 299: fields = sscanf(line_buffer, "%20s%d",
! 300: server_version,
! 301: &server_status);
! 302:
! 303: if (fields < 2) break;
! 304:
! 305: switch (server_status / 100) {
! 306:
! 307: case 3: /* Various forms of redirection */
! 308: case 4: /* "I think I goofed" */
! 309: case 5: /* I think you goofed */
! 310: default: /* bad number */
! 311:
! 312: HTAlert("Bad status reply from server");
! 313: /* Fall through @@@@@@@@@@@@@@@@@@@@@ */
! 314:
! 315: case 2: /* Good: Got MIME object */
! 316: {
! 317: HTStream * mime = HTStreamStack(HTAtom_for("www/mime"),
! 318: format_out, sink, anAnchor);
! 319:
! 320: if (!mime) {
! 321: if (line_buffer) free(line_buffer);
! 322: return HTLoadError(sink, 403,
! 323: "MIME: Can't convert this format");
! 324: }
! 325: mime->isa->put_string(mime, eol+1); /* Rest of buffer */
! 326: HTCopyNoCR(s, mime); /* Rest of doc */
! 327: mime->isa->end_document(mime);
! 328: mime->isa->free(mime);
! 329: goto done;
! 330: }
! 331: break;
! 332:
! 333: }
! 334:
! 335: break; /* Get out of for loop */
! 336:
! 337: } /* Loop over lines */
! 338: } /* Scope of HTTP2 handling block */
! 339:
! 340: /* Now, we can assume that we did NOT have a MIME header so behave as for HTTP0
! 341: */
! 342: {
! 343: HTParseSocket(format, format_out,
! 344: (HTParentAnchor *) anAnchor, s, sink);
! 345: }
! 346: #else
! 347: HTParseSocket(format, format_out,
! 348: (HTParentAnchor *) anAnchor, s, sink);
! 349: #endif
! 350:
! 351: /* Clean up
1.1 timbl 352: */
1.2 ! timbl 353: done:
! 354: if (line_buffer) free(line_buffer);
1.1 timbl 355:
356: if (TRACE) fprintf(stderr, "HTTP: close socket %d.\n", s);
357: status = NETCLOSE(s);
358:
359: return HT_LOADED; /* Good return */
360: }
361:
362: /* Protocol descriptor
363: */
364:
1.2 ! timbl 365: PUBLIC HTProtocol HTTP = { "http", HTLoadHTTP, 0 };
Webmaster