Annotation of libwww/Library/src/HTMIME.c, revision 2.71.2.4
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.71.2.4! eric 6: ** @(#) $Id: HTMIME.c,v 2.71.2.3 1996/11/05 21:43:09 frystyk Exp $
2.1 timbl 7: **
8: ** This is RFC 1341-specific code.
9: ** The input stream pushed into this parser is assumed to be
10: ** stripped on CRs, ie lines end with LF, not CR LF.
11: ** (It is easy to change this except for the body part where
12: ** conversion can be slow.)
13: **
14: ** History:
15: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 16: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.71 frystyk 17: ** 14 Mar 95 HFN Now using response for storing data. No more `\n',
2.18 frystyk 18: ** static buffers etc.
2.1 timbl 19: */
2.17 frystyk 20:
21: /* Library include files */
2.57 frystyk 22: #include "sysdep.h"
2.60 frystyk 23: #include "WWWUtil.h"
2.61 frystyk 24: #include "WWWCore.h"
2.70 frystyk 25: #include "WWWCache.h"
26: #include "WWWStream.h"
2.61 frystyk 27: #include "HTReqMan.h"
28: #include "HTNetMan.h"
2.36 frystyk 29: #include "HTHeader.h"
2.64 eric 30: #include "HTWWWStr.h"
2.14 frystyk 31: #include "HTMIME.h" /* Implemented here */
2.1 timbl 32:
2.64 eric 33: #define MIME_HASH_SIZE 101
34:
2.70 frystyk 35: typedef enum _HTMIMEMode {
36: HT_MIME_HEADER = 0x1,
2.71 frystyk 37: HT_MIME_FOOTER = 0x2,
38: HT_MIME_PARTIAL = 0x4
2.70 frystyk 39: } HTMIMEMode;
40:
2.1 timbl 41: struct _HTStream {
2.57 frystyk 42: const HTStreamClass * isa;
2.18 frystyk 43: HTRequest * request;
2.71 frystyk 44: HTResponse * response;
2.32 frystyk 45: HTNet * net;
2.18 frystyk 46: HTStream * target;
47: HTFormat target_format;
2.64 eric 48: HTChunk * token;
49: HTChunk * value;
50: int hash;
2.59 frystyk 51: HTEOLState EOLstate;
2.70 frystyk 52: HTMIMEMode mode;
2.18 frystyk 53: BOOL transparent;
2.64 eric 54: BOOL haveToken;
2.1 timbl 55: };
56:
2.18 frystyk 57: /* ------------------------------------------------------------------------- */
2.1 timbl 58:
2.64 eric 59: PRIVATE int pumpData (HTStream * me)
2.18 frystyk 60: {
2.64 eric 61: HTRequest * request = me->request;
2.71 frystyk 62: HTResponse * response = me->response;
63: HTFormat format = HTResponse_format(response);
64: HTEncoding transfer = HTResponse_transfer(response);
65: long length = HTResponse_length(response);
2.48 frystyk 66: me->transparent = YES; /* Pump rest of data right through */
2.27 frystyk 67:
2.71 frystyk 68: /* If this request is a source in PostWeb then pause here */
2.66 frystyk 69: if (HTRequest_isSource(request)) return HT_PAUSE;
2.47 frystyk 70:
2.71 frystyk 71: /*
72: ** Cache the metainformation in the anchor object by moving
73: ** it from the response object. This we do regardless if
74: ** we have a persistent cache or not as the memory cache will
75: ** use it as well. If we are updating a cache entry using
76: ** byte ranges then we alreayd have the metainformation and
77: ** hence we can ignore the new one as it'd better be the same.
78: */
79: if (!(me->mode & (HT_MIME_PARTIAL | HT_MIME_FOOTER)) &&
80: HTResponse_isCachable(me->response)) {
81: HTAnchor_update(HTRequest_anchor(request), me->response);
82: }
83:
84: /*
85: ** If we asked only to read the header or footer or we used a HEAD
86: ** method then we stop here as we don't expect any body part.
87: */
2.70 frystyk 88: if (me->mode & (HT_MIME_HEADER | HT_MIME_FOOTER) ||
2.71 frystyk 89: HTRequest_method(request) == METHOD_HEAD) {
2.70 frystyk 90: return HT_LOADED;
91: }
2.43 frystyk 92:
2.60 frystyk 93: /*
2.71 frystyk 94: ** If there is no content-length, no transfer encoding and no
95: ** content type then we assume that there is no body part in
96: ** the message and we can return HT_LOADED
2.68 frystyk 97: */
98: if (length<=0 && format==WWW_UNKNOWN && transfer==NULL) {
99: if (STREAM_TRACE) HTTrace("MIME Parser. No body in this messsage\n");
100: return HT_LOADED;
101: }
102:
103: /*
2.71 frystyk 104: ** Handle any Content Type
2.60 frystyk 105: */
2.71 frystyk 106: if (!(me->mode & HT_MIME_PARTIAL) &&
107: (format != WWW_UNKNOWN || length > 0 || transfer)) {
108: if (STREAM_TRACE) HTTrace("Building.... C-T stack from %s to %s\n",
109: HTAtom_name(format),
110: HTAtom_name(me->target_format));
111: me->target = HTStreamStack(format, me->target_format,
112: me->target, request, YES);
2.18 frystyk 113: }
2.60 frystyk 114:
2.71 frystyk 115: /*
116: ** Handle any Content Encoding
117: */
2.61 frystyk 118: {
2.71 frystyk 119: HTList * cc = HTResponse_encoding(response);
2.61 frystyk 120: if (cc) {
121: if (STREAM_TRACE) HTTrace("Building.... C-E stack\n");
122: me->target = HTContentDecodingStack(cc, me->target, request, NULL);
123: }
2.60 frystyk 124: }
125:
2.70 frystyk 126: /*
2.71 frystyk 127: ** Can we cache the data object? If so then create a T stream and hook it
128: ** into the stream pipe. We do it before the transfer decoding so that we
129: ** don't have to deal with that when we retrieve the object from cache.
130: ** If we are appending to a cache entry then use a different stream than
131: ** if creating a new entry.
132: */
133: if (HTCacheMode_enabled()) {
134: if (me->mode & HT_MIME_PARTIAL) {
135: HTStream * append = HTStreamStack(WWW_CACHE_APPEND,
136: me->target_format,
137: me->target, request, NO);
138: #if 0
139: if (cache) me->target = HTTee(me->target, cache, NULL);
140: me->target = HTPipeBuffer_new(me->target, request, 0);
141: #else
142: me->target = append;
143: #endif
144: } else if (HTResponse_isCachable(me->response)) {
145: HTStream * cache = HTStreamStack(WWW_CACHE, me->target_format,
146: me->target, request, NO);
147: if (cache) me->target = HTTee(me->target, cache, NULL);
148: }
2.70 frystyk 149: }
150:
2.71 frystyk 151: /*
152: ** Handle any Transfer encoding
153: */
2.61 frystyk 154: {
155: if (!HTFormat_isUnityTransfer(transfer)) {
156: if (STREAM_TRACE) HTTrace("Building.... C-T-E stack\n");
157: me->target = HTTransferCodingStack(transfer, me->target,
158: request, NULL, NO);
159: }
160: }
2.71 frystyk 161:
2.27 frystyk 162: return HT_OK;
2.1 timbl 163: }
164:
2.65 eric 165: /* _dispatchParsers - call request's MIME header parser.
166: ** Use global parser if no appropriate one is found for request.
167: */
168: PRIVATE int _dispatchParsers (HTStream * me)
169: {
170: int status;
171: char * token = HTChunk_data(me->token);
172: char * value = HTChunk_data(me->value);
2.71 frystyk 173: BOOL found = NO;
174: BOOL local = NO;
2.65 eric 175: HTMIMEParseSet * parseSet;
176:
177: /* In case we get an empty header consisting of a CRLF, we fall thru */
2.66 frystyk 178: if (STREAM_TRACE) HTTrace("MIME header. %s: %s\n",
179: token ? token : "<null>",
180: value ? value : "<null>");
181: if (!token) return HT_OK; /* Ignore noop token */
2.65 eric 182:
2.70 frystyk 183: /*
2.71 frystyk 184: ** Remember the original header
185: */
186: HTResponse_addHeader(me->response, token, value);
187:
188: /*
2.70 frystyk 189: ** Search the local set of MIME parsers
190: */
2.65 eric 191: if ((parseSet = HTRequest_MIMEParseSet(me->request, &local)) != NULL) {
192: status = HTMIMEParseSet_dispatch(parseSet, me->request,
2.71 frystyk 193: token, value, &found);
194: if (found) return status;
2.65 eric 195: }
196:
2.70 frystyk 197: /*
198: ** Search the global set of MIME parsers
199: */
2.71 frystyk 200: if (local==NO && (parseSet = HTHeader_MIMEParseSet()) != NULL) {
201: status = HTMIMEParseSet_dispatch(parseSet, me->request,
202: token, value, &found);
203: if (found) return status;
204: }
205:
2.65 eric 206: return HT_OK;
207: }
208:
2.18 frystyk 209: /*
210: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
211: ** Folding is either of CF LWS, LF LWS, CRLF LWS
212: */
2.57 frystyk 213: PRIVATE int HTMIME_put_block (HTStream * me, const char * b, int l)
2.18 frystyk 214: {
2.57 frystyk 215: const char * start = b;
216: const char * end = start;
2.64 eric 217: const char * value = me->value->size ? b : NULL;
2.71.2.3 frystyk 218: int length = l;
2.64 eric 219: int status;
220: /* enum {Line_CHAR, Line_END, Line_FOLD, Line_LINE} line = Line_CHAR; */
221:
222: while (!me->transparent) {
2.18 frystyk 223: if (me->EOLstate == EOL_FCR) {
2.64 eric 224: if (*b == CR) /* End of header */
225: me->EOLstate = EOL_END;
226: else if (*b == LF) /* CRLF */
2.18 frystyk 227: me->EOLstate = EOL_FLF;
2.64 eric 228: else if (WHITE(*b)) /* Folding: CR SP */
229: me->EOLstate = EOL_FOLD;
230: else /* New line */
231: me->EOLstate = EOL_LINE;
2.18 frystyk 232: } else if (me->EOLstate == EOL_FLF) {
233: if (*b == CR) /* LF CR or CR LF CR */
234: me->EOLstate = EOL_SCR;
2.64 eric 235: else if (*b == LF) /* End of header */
236: me->EOLstate = EOL_END;
237: else if (WHITE(*b)) /* Folding: LF SP or CR LF SP */
238: me->EOLstate = EOL_FOLD;
239: else /* New line */
240: me->EOLstate = EOL_LINE;
241: } else if (me->EOLstate == EOL_SCR) {
242: if (*b==CR || *b==LF) /* End of header */
243: me->EOLstate = EOL_END;
244: else if (WHITE(*b)) /* Folding: LF CR SP or CR LF CR SP */
245: me->EOLstate = EOL_FOLD;
246: else /* New line */
247: me->EOLstate = EOL_LINE;
248: } else if (*b == CR)
249: me->EOLstate = EOL_FCR;
250: else if (*b == LF)
251: me->EOLstate = EOL_FLF; /* Line found */
252: else {
253: if (!me->haveToken) {
254: if (*b == ':' || isspace(*b)) {
255: HTChunk_putb(me->token, start, end-start);
256: HTChunk_putc(me->token, '\0');
257: me->haveToken = YES;
258: } else {
259: unsigned char ch = *(unsigned char *) b;
2.71.2.1 eric 260: ch = tolower(ch);
2.64 eric 261: /* if (ch >= 'A' && ch <= 'Z')
262: ch += ('a' - 'A'); */
263: me->hash = (me->hash * 3 + ch) % MIME_HASH_SIZE;
264: }
265: } else if (value == NULL && *b != ':' && !isspace(*b))
266: value = b;
267: end++;
268: }
269: switch (me->EOLstate) {
270: case EOL_LINE:
271: case EOL_END: {
272: int status;
273: HTChunk_putb(me->value, value, end-value);
274: HTChunk_putc(me->value, '\0');
2.65 eric 275: status = _dispatchParsers(me);
2.71.2.4! eric 276: HTNet_addBytesRead(me->net, b-start);
2.71.2.3 frystyk 277: start=b, end=b;
2.64 eric 278: if (me->EOLstate == EOL_END) { /* EOL_END */
2.67 frystyk 279: if (status == HT_OK) {
280: b++, l--;
2.64 eric 281: status = pumpData(me);
2.71.2.4! eric 282: HTNet_addBytesRead(me->net, 1);
2.71.2.3 frystyk 283: HTNet_setHeaderLength(me->net, HTNet_bytesRead(me->net));
2.67 frystyk 284: }
2.64 eric 285: } else { /* EOL_LINE */
286: HTChunk_clear(me->token);
287: HTChunk_clear(me->value);
288: me->haveToken = NO;
289: me->hash = 0;
290: value = NULL;
291: }
2.18 frystyk 292: me->EOLstate = EOL_BEGIN;
2.27 frystyk 293: if (status != HT_OK)
294: return status;
2.64 eric 295: break;
296: }
297: case EOL_FOLD:
2.18 frystyk 298: me->EOLstate = EOL_BEGIN;
2.64 eric 299: if (!me->haveToken) {
300: HTChunk_putb(me->token, start, end-start);
301: HTChunk_putc(me->token, '\0');
302: me->haveToken = YES;
303: } else if (value) {
304: HTChunk_putb(me->value, value, end-value);
305: HTChunk_putc(me->value, ' ');
306: }
307: start=b, end=b;
308: break;
309: default:
2.71.2.4! eric 310: b++, l--;
2.64 eric 311: if (!l) {
312: if (!me->haveToken)
313: HTChunk_putb(me->token, start, end-start);
314: else if (value)
315: HTChunk_putb(me->value, value, end-value);
316: return HT_OK;
317: }
318: }
2.18 frystyk 319: }
2.32 frystyk 320:
321: /*
322: ** Put the rest down the stream without touching the data but make sure
2.71.2.3 frystyk 323: ** that we get the correct content length of data. If we have a CL in
324: ** the headers then this stream is responsible for the accountance.
2.32 frystyk 325: */
2.66 frystyk 326: if (me->target) {
2.71.2.3 frystyk 327: HTNet * net = me->net;
2.71.2.1 eric 328: /* Check if CL at all - thanks to jwei@hal.com (John Wei) */
329: long cl = HTResponse_length(me->response);
2.71.2.3 frystyk 330: if (cl >= 0) {
331: long bodyRead = HTNet_bytesRead(net) - HTNet_headerLength(net);
2.71.2.1 eric 332:
2.71.2.3 frystyk 333: /*
334: ** If we have more than we need then just take what belongs to us.
335: */
336: if (bodyRead + l >= cl) {
337: int consume = cl - bodyRead;
338: if ((status = (*me->target->isa->put_block)(me->target, b, consume)) < 0)
339: return status;
340: HTNet_addBytesRead(net, consume);
341: HTHost_setConsumed(HTNet_host(net), HTNet_bytesRead(net));
342: return HT_LOADED;
343: } else {
344: if ((status = (*me->target->isa->put_block)(me->target, b, l)) < 0)
345: return status;
346: HTNet_addBytesRead(net, l);
2.71.2.1 eric 347: return status;
2.71.2.3 frystyk 348: }
2.71.2.1 eric 349: }
2.71.2.3 frystyk 350: return (*me->target->isa->put_block)(me->target, b, l);
2.66 frystyk 351: }
2.71.2.3 frystyk 352: HTHost_setConsumed(HTNet_host(me->net), HTNet_bytesRead(me->net));
2.66 frystyk 353: return HT_LOADED;
2.18 frystyk 354: }
355:
356:
357: /* Character handling
358: ** ------------------
359: */
2.36 frystyk 360: PRIVATE int HTMIME_put_character (HTStream * me, char c)
2.18 frystyk 361: {
362: return HTMIME_put_block(me, &c, 1);
363: }
364:
2.1 timbl 365:
366: /* String handling
367: ** ---------------
368: */
2.57 frystyk 369: PRIVATE int HTMIME_put_string (HTStream * me, const char * s)
2.1 timbl 370: {
2.18 frystyk 371: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 372: }
373:
374:
2.18 frystyk 375: /* Flush an stream object
376: ** ---------------------
2.1 timbl 377: */
2.36 frystyk 378: PRIVATE int HTMIME_flush (HTStream * me)
2.1 timbl 379: {
2.47 frystyk 380: return me->target ? (*me->target->isa->flush)(me->target) : HT_OK;
2.1 timbl 381: }
382:
2.18 frystyk 383: /* Free a stream object
384: ** --------------------
2.1 timbl 385: */
2.36 frystyk 386: PRIVATE int HTMIME_free (HTStream * me)
2.1 timbl 387: {
2.18 frystyk 388: int status = HT_OK;
2.64 eric 389: if (!me->transparent)
2.65 eric 390: if (_dispatchParsers(me) == HT_OK)
2.64 eric 391: pumpData(me);
2.25 frystyk 392: if (me->target) {
393: if ((status = (*me->target->isa->_free)(me->target))==HT_WOULD_BLOCK)
394: return HT_WOULD_BLOCK;
395: }
2.26 frystyk 396: if (PROT_TRACE)
2.55 eric 397: HTTrace("MIME........ FREEING....\n");
2.64 eric 398: HTChunk_delete(me->token);
399: HTChunk_delete(me->value);
2.52 frystyk 400: HT_FREE(me);
2.18 frystyk 401: return status;
2.1 timbl 402: }
403:
404: /* End writing
405: */
2.38 frystyk 406: PRIVATE int HTMIME_abort (HTStream * me, HTList * e)
2.1 timbl 407: {
2.18 frystyk 408: int status = HT_ERROR;
2.41 frystyk 409: if (me->target) status = (*me->target->isa->abort)(me->target, e);
2.26 frystyk 410: if (PROT_TRACE)
2.55 eric 411: HTTrace("MIME........ ABORTING...\n");
2.64 eric 412: HTChunk_delete(me->token);
413: HTChunk_delete(me->value);
2.52 frystyk 414: HT_FREE(me);
2.18 frystyk 415: return status;
2.1 timbl 416: }
417:
418:
419:
420: /* Structured Object Class
421: ** -----------------------
422: */
2.57 frystyk 423: PRIVATE const HTStreamClass HTMIME =
2.1 timbl 424: {
425: "MIMEParser",
2.18 frystyk 426: HTMIME_flush,
2.1 timbl 427: HTMIME_free,
2.6 timbl 428: HTMIME_abort,
429: HTMIME_put_character,
430: HTMIME_put_string,
2.18 frystyk 431: HTMIME_put_block
2.1 timbl 432: };
433:
434:
2.48 frystyk 435: /* MIME header parser stream.
2.1 timbl 436: ** -------------------------
2.48 frystyk 437: ** This stream parses a complete MIME header and if a content type header
438: ** is found then the stream stack is called. Any left over data is pumped
439: ** right through the stream
2.1 timbl 440: */
2.36 frystyk 441: PUBLIC HTStream* HTMIMEConvert (HTRequest * request,
442: void * param,
443: HTFormat input_format,
444: HTFormat output_format,
445: HTStream * output_stream)
2.1 timbl 446: {
2.62 frystyk 447: HTStream * me;
2.52 frystyk 448: if ((me = (HTStream *) HT_CALLOC(1, sizeof(* me))) == NULL)
449: HT_OUTOFMEM("HTMIMEConvert");
2.1 timbl 450: me->isa = &HTMIME;
2.18 frystyk 451: me->request = request;
2.71 frystyk 452: me->response = HTRequest_response(request);
2.70 frystyk 453: me->net = HTRequest_net(request);
2.49 frystyk 454: me->target = output_stream;
2.18 frystyk 455: me->target_format = output_format;
2.64 eric 456: me->token = HTChunk_new(256);
457: me->value = HTChunk_new(256);
458: me->hash = 0;
2.18 frystyk 459: me->EOLstate = EOL_BEGIN;
2.64 eric 460: me->haveToken = NO;
2.1 timbl 461: return me;
462: }
2.32 frystyk 463:
2.48 frystyk 464: /* MIME header ONLY parser stream
465: ** ------------------------------
466: ** This stream parses a complete MIME header and then returnes HT_PAUSE.
467: ** It does not set up any streams and resting data stays in the buffer.
468: ** This can be used if you only want to parse the headers before you
469: ** decide what to do next. This is for example the case in a server app.
470: */
471: PUBLIC HTStream * HTMIMEHeader (HTRequest * request,
472: void * param,
473: HTFormat input_format,
474: HTFormat output_format,
475: HTStream * output_stream)
476: {
2.62 frystyk 477: HTStream * me = HTMIMEConvert(request, param, input_format,
478: output_format, output_stream);
2.70 frystyk 479: me->mode |= HT_MIME_HEADER;
2.48 frystyk 480: return me;
481: }
2.62 frystyk 482:
483: /* MIME footer ONLY parser stream
484: ** ------------------------------
485: ** Parse only a footer, for example after a chunked encoding.
486: */
487: PUBLIC HTStream * HTMIMEFooter (HTRequest * request,
488: void * param,
489: HTFormat input_format,
490: HTFormat output_format,
491: HTStream * output_stream)
492: {
493: HTStream * me = HTMIMEConvert(request, param, input_format,
494: output_format, output_stream);
2.70 frystyk 495: me->mode |= HT_MIME_FOOTER;
2.67 frystyk 496: me->EOLstate = EOL_FLF;
2.62 frystyk 497: return me;
498: }
2.71 frystyk 499:
500: /* Partial Response MIME parser stream
501: ** -----------------------------------
502: ** In case we sent a Range conditional GET we may get back a partial
503: ** response. This response must be appended to the already existing
504: ** cache entry before presented to the user.
505: ** We do this by continuing to load the new object into a temporary
506: ** buffer and at the same time start the cache load of the already
507: ** existing object. When we have loaded the cache we merge the two
508: ** buffers.
509: */
510: PUBLIC HTStream * HTMIMEPartial (HTRequest * request,
511: void * param,
512: HTFormat input_format,
513: HTFormat output_format,
514: HTStream * output_stream)
515: {
516: #if 0
517: HTParentAnchor * anchor = HTRequest_anchor(request);
518: HTStream * me = NULL;
519: HTStream * merge = NULL;
520: /*
521: ** The merge stream is a place holder for where we can put data when it
522: ** arrives. We have two feeds: one from the cache and one from the net.
523: ** We call the stream stack already now to get the right output stream.
524: ** We can do this as we already know the content type from when we got the
525: ** first part of the object.
526: */
527: {
528: HTFormat format = HTAnchor_format(anchor);
529: if (STREAM_TRACE) HTTrace("Building.... C-T stack from %s to %s\n",
530: HTAtom_name(format),
531: HTAtom_name(output_format));
532: merge = HTMerge(HTStreamStack(format, output_format, output_stream,
533: request, YES), 2);
534: }
535:
536: #else
537: /*
538: ** Set up the MIME parser as the one feed to the merge stream. The MIME
539: ** parser then calls the PIPE buffer. We use source output as the stream
540: ** stack has already been called.
541: */
542: HTStream * me = HTMIMEConvert(request, param, input_format,
543: output_format, output_stream);
544: me->mode |= HT_MIME_PARTIAL;
545: #endif
546:
547: /*
548: ** Now start the second load from the cache. First we read this data from
549: ** the cache and then we flush the data that we have read from the net.
550: ** We use the same anchor as before but with another physical address.
551: */
552: {
553: HTParentAnchor * anchor = HTRequest_anchor(request);
554: HTRequest * creq = HTRequest_new();
555: HTCache * cache = NULL;
556:
557: /* Set up the request */
558: #if 0
559: HTRequest_setOutputFormat(creq, WWW_SOURCE);
560: HTRequest_setOutputStream(creq, me);
561: #endif
562: HTRequest_setAnchor(creq, (HTAnchor *) anchor);
563:
564: /* Set up the anchor */
565: if ((cache = HTCache_find(anchor))) {
566: char * name = HTCache_name(cache);
567: HTAnchor_setPhysical(anchor, name);
568: HT_FREE(name);
569: if (STREAM_TRACE) HTTrace("Partial..... Starting cache load\n");
570: HTLoad(creq, NO);
571: }
572: }
573: return me;
574: }
575:
Webmaster