Annotation of libwww/Library/src/HTMIME.c, revision 2.71
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.71 ! frystyk 6: ** @(#) $Id: HTMIME.c,v 2.70 1996/09/08 22:08:29 frystyk Exp $
2.1 timbl 7: **
8: ** This is RFC 1341-specific code.
9: ** The input stream pushed into this parser is assumed to be
10: ** stripped on CRs, ie lines end with LF, not CR LF.
11: ** (It is easy to change this except for the body part where
12: ** conversion can be slow.)
13: **
14: ** History:
15: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 16: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.71 ! frystyk 17: ** 14 Mar 95 HFN Now using response for storing data. No more `\n',
2.18 frystyk 18: ** static buffers etc.
2.1 timbl 19: */
2.17 frystyk 20:
21: /* Library include files */
2.57 frystyk 22: #include "sysdep.h"
2.60 frystyk 23: #include "WWWUtil.h"
2.61 frystyk 24: #include "WWWCore.h"
2.70 frystyk 25: #include "WWWCache.h"
26: #include "WWWStream.h"
2.61 frystyk 27: #include "HTReqMan.h"
28: #include "HTNetMan.h"
2.36 frystyk 29: #include "HTHeader.h"
2.64 eric 30: #include "HTWWWStr.h"
2.14 frystyk 31: #include "HTMIME.h" /* Implemented here */
2.1 timbl 32:
2.64 eric 33: #define MIME_HASH_SIZE 101
34:
2.70 frystyk 35: typedef enum _HTMIMEMode {
36: HT_MIME_HEADER = 0x1,
2.71 ! frystyk 37: HT_MIME_FOOTER = 0x2,
! 38: HT_MIME_PARTIAL = 0x4
2.70 frystyk 39: } HTMIMEMode;
40:
2.1 timbl 41: struct _HTStream {
2.57 frystyk 42: const HTStreamClass * isa;
2.18 frystyk 43: HTRequest * request;
2.71 ! frystyk 44: HTResponse * response;
2.32 frystyk 45: HTNet * net;
2.18 frystyk 46: HTStream * target;
47: HTFormat target_format;
2.64 eric 48: HTChunk * token;
49: HTChunk * value;
50: int hash;
2.59 frystyk 51: HTEOLState EOLstate;
2.70 frystyk 52: HTMIMEMode mode;
2.18 frystyk 53: BOOL transparent;
2.64 eric 54: BOOL haveToken;
2.1 timbl 55: };
56:
2.18 frystyk 57: /* ------------------------------------------------------------------------- */
2.1 timbl 58:
2.64 eric 59: PRIVATE int pumpData (HTStream * me)
2.18 frystyk 60: {
2.64 eric 61: HTRequest * request = me->request;
2.71 ! frystyk 62: HTResponse * response = me->response;
! 63: HTFormat format = HTResponse_format(response);
! 64: HTEncoding transfer = HTResponse_transfer(response);
! 65: long length = HTResponse_length(response);
2.48 frystyk 66: me->transparent = YES; /* Pump rest of data right through */
2.27 frystyk 67:
2.71 ! frystyk 68: /* If this request is a source in PostWeb then pause here */
2.66 frystyk 69: if (HTRequest_isSource(request)) return HT_PAUSE;
2.47 frystyk 70:
2.71 ! frystyk 71: /*
! 72: ** Cache the metainformation in the anchor object by moving
! 73: ** it from the response object. This we do regardless if
! 74: ** we have a persistent cache or not as the memory cache will
! 75: ** use it as well. If we are updating a cache entry using
! 76: ** byte ranges then we alreayd have the metainformation and
! 77: ** hence we can ignore the new one as it'd better be the same.
! 78: */
! 79: if (!(me->mode & (HT_MIME_PARTIAL | HT_MIME_FOOTER)) &&
! 80: HTResponse_isCachable(me->response)) {
! 81: HTAnchor_update(HTRequest_anchor(request), me->response);
! 82: }
! 83:
! 84: /*
! 85: ** If we asked only to read the header or footer or we used a HEAD
! 86: ** method then we stop here as we don't expect any body part.
! 87: */
2.70 frystyk 88: if (me->mode & (HT_MIME_HEADER | HT_MIME_FOOTER) ||
2.71 ! frystyk 89: HTRequest_method(request) == METHOD_HEAD) {
2.70 frystyk 90: return HT_LOADED;
91: }
2.43 frystyk 92:
2.60 frystyk 93: /*
2.71 ! frystyk 94: ** If there is no content-length, no transfer encoding and no
! 95: ** content type then we assume that there is no body part in
! 96: ** the message and we can return HT_LOADED
2.68 frystyk 97: */
98: if (length<=0 && format==WWW_UNKNOWN && transfer==NULL) {
99: if (STREAM_TRACE) HTTrace("MIME Parser. No body in this messsage\n");
100: return HT_LOADED;
101: }
102:
103: /*
2.71 ! frystyk 104: ** Handle any Content Type
2.60 frystyk 105: */
2.71 ! frystyk 106: if (!(me->mode & HT_MIME_PARTIAL) &&
! 107: (format != WWW_UNKNOWN || length > 0 || transfer)) {
! 108: if (STREAM_TRACE) HTTrace("Building.... C-T stack from %s to %s\n",
! 109: HTAtom_name(format),
! 110: HTAtom_name(me->target_format));
! 111: me->target = HTStreamStack(format, me->target_format,
! 112: me->target, request, YES);
2.18 frystyk 113: }
2.60 frystyk 114:
2.71 ! frystyk 115: /*
! 116: ** Handle any Content Encoding
! 117: */
2.61 frystyk 118: {
2.71 ! frystyk 119: HTList * cc = HTResponse_encoding(response);
2.61 frystyk 120: if (cc) {
121: if (STREAM_TRACE) HTTrace("Building.... C-E stack\n");
122: me->target = HTContentDecodingStack(cc, me->target, request, NULL);
123: }
2.60 frystyk 124: }
125:
2.70 frystyk 126: /*
2.71 ! frystyk 127: ** Can we cache the data object? If so then create a T stream and hook it
! 128: ** into the stream pipe. We do it before the transfer decoding so that we
! 129: ** don't have to deal with that when we retrieve the object from cache.
! 130: ** If we are appending to a cache entry then use a different stream than
! 131: ** if creating a new entry.
! 132: */
! 133: if (HTCacheMode_enabled()) {
! 134: if (me->mode & HT_MIME_PARTIAL) {
! 135: HTStream * append = HTStreamStack(WWW_CACHE_APPEND,
! 136: me->target_format,
! 137: me->target, request, NO);
! 138: #if 0
! 139: if (cache) me->target = HTTee(me->target, cache, NULL);
! 140: me->target = HTPipeBuffer_new(me->target, request, 0);
! 141: #else
! 142: me->target = append;
! 143: #endif
! 144: } else if (HTResponse_isCachable(me->response)) {
! 145: HTStream * cache = HTStreamStack(WWW_CACHE, me->target_format,
! 146: me->target, request, NO);
! 147: if (cache) me->target = HTTee(me->target, cache, NULL);
! 148: }
2.70 frystyk 149: }
150:
2.71 ! frystyk 151: /*
! 152: ** Handle any Transfer encoding
! 153: */
2.61 frystyk 154: {
155: if (!HTFormat_isUnityTransfer(transfer)) {
156: if (STREAM_TRACE) HTTrace("Building.... C-T-E stack\n");
157: me->target = HTTransferCodingStack(transfer, me->target,
158: request, NULL, NO);
159: }
160: }
2.71 ! frystyk 161:
2.27 frystyk 162: return HT_OK;
2.1 timbl 163: }
164:
2.65 eric 165: /* _dispatchParsers - call request's MIME header parser.
166: ** Use global parser if no appropriate one is found for request.
167: */
168: PRIVATE int _dispatchParsers (HTStream * me)
169: {
170: int status;
171: char * token = HTChunk_data(me->token);
172: char * value = HTChunk_data(me->value);
2.71 ! frystyk 173: BOOL found = NO;
! 174: BOOL local = NO;
2.65 eric 175: HTMIMEParseSet * parseSet;
176:
177: /* In case we get an empty header consisting of a CRLF, we fall thru */
2.66 frystyk 178: if (STREAM_TRACE) HTTrace("MIME header. %s: %s\n",
179: token ? token : "<null>",
180: value ? value : "<null>");
181: if (!token) return HT_OK; /* Ignore noop token */
2.65 eric 182:
2.70 frystyk 183: /*
2.71 ! frystyk 184: ** Remember the original header
! 185: */
! 186: HTResponse_addHeader(me->response, token, value);
! 187:
! 188: /*
2.70 frystyk 189: ** Search the local set of MIME parsers
190: */
2.65 eric 191: if ((parseSet = HTRequest_MIMEParseSet(me->request, &local)) != NULL) {
192: status = HTMIMEParseSet_dispatch(parseSet, me->request,
2.71 ! frystyk 193: token, value, &found);
! 194: if (found) return status;
2.65 eric 195: }
196:
2.70 frystyk 197: /*
198: ** Search the global set of MIME parsers
199: */
2.71 ! frystyk 200: if (local==NO && (parseSet = HTHeader_MIMEParseSet()) != NULL) {
! 201: status = HTMIMEParseSet_dispatch(parseSet, me->request,
! 202: token, value, &found);
! 203: if (found) return status;
! 204: }
! 205:
2.65 eric 206: return HT_OK;
207: }
208:
2.18 frystyk 209: /*
210: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
211: ** Folding is either of CF LWS, LF LWS, CRLF LWS
212: */
2.57 frystyk 213: PRIVATE int HTMIME_put_block (HTStream * me, const char * b, int l)
2.18 frystyk 214: {
2.57 frystyk 215: const char * start = b;
216: const char * end = start;
2.64 eric 217: const char * value = me->value->size ? b : NULL;
218: long cl;
219: int status;
220: /* enum {Line_CHAR, Line_END, Line_FOLD, Line_LINE} line = Line_CHAR; */
221:
222: while (!me->transparent) {
2.18 frystyk 223: if (me->EOLstate == EOL_FCR) {
2.64 eric 224: if (*b == CR) /* End of header */
225: me->EOLstate = EOL_END;
226: else if (*b == LF) /* CRLF */
2.18 frystyk 227: me->EOLstate = EOL_FLF;
2.64 eric 228: else if (WHITE(*b)) /* Folding: CR SP */
229: me->EOLstate = EOL_FOLD;
230: else /* New line */
231: me->EOLstate = EOL_LINE;
2.18 frystyk 232: } else if (me->EOLstate == EOL_FLF) {
233: if (*b == CR) /* LF CR or CR LF CR */
234: me->EOLstate = EOL_SCR;
2.64 eric 235: else if (*b == LF) /* End of header */
236: me->EOLstate = EOL_END;
237: else if (WHITE(*b)) /* Folding: LF SP or CR LF SP */
238: me->EOLstate = EOL_FOLD;
239: else /* New line */
240: me->EOLstate = EOL_LINE;
241: } else if (me->EOLstate == EOL_SCR) {
242: if (*b==CR || *b==LF) /* End of header */
243: me->EOLstate = EOL_END;
244: else if (WHITE(*b)) /* Folding: LF CR SP or CR LF CR SP */
245: me->EOLstate = EOL_FOLD;
246: else /* New line */
247: me->EOLstate = EOL_LINE;
248: } else if (*b == CR)
249: me->EOLstate = EOL_FCR;
250: else if (*b == LF)
251: me->EOLstate = EOL_FLF; /* Line found */
252: else {
253: if (!me->haveToken) {
254: if (*b == ':' || isspace(*b)) {
255: HTChunk_putb(me->token, start, end-start);
256: HTChunk_putc(me->token, '\0');
257: me->haveToken = YES;
258: } else {
259: unsigned char ch = *(unsigned char *) b;
260: tolower(ch);
261: /* if (ch >= 'A' && ch <= 'Z')
262: ch += ('a' - 'A'); */
263: me->hash = (me->hash * 3 + ch) % MIME_HASH_SIZE;
264: }
265: } else if (value == NULL && *b != ':' && !isspace(*b))
266: value = b;
267: end++;
268: }
269: switch (me->EOLstate) {
270: case EOL_LINE:
271: case EOL_END: {
272: int status;
273: HTChunk_putb(me->value, value, end-value);
274: HTChunk_putc(me->value, '\0');
275: start=b, end=b;
2.65 eric 276: status = _dispatchParsers(me);
2.64 eric 277: if (me->EOLstate == EOL_END) { /* EOL_END */
2.67 frystyk 278: if (status == HT_OK) {
279: b++, l--;
2.64 eric 280: status = pumpData(me);
2.67 frystyk 281: }
2.64 eric 282: HTNet_setBytesRead(me->net, l);
283: } else { /* EOL_LINE */
284: HTChunk_clear(me->token);
285: HTChunk_clear(me->value);
286: me->haveToken = NO;
287: me->hash = 0;
288: value = NULL;
289: }
2.18 frystyk 290: me->EOLstate = EOL_BEGIN;
2.27 frystyk 291: if (status != HT_OK)
292: return status;
2.64 eric 293: break;
294: }
295: case EOL_FOLD:
2.18 frystyk 296: me->EOLstate = EOL_BEGIN;
2.64 eric 297: if (!me->haveToken) {
298: HTChunk_putb(me->token, start, end-start);
299: HTChunk_putc(me->token, '\0');
300: me->haveToken = YES;
301: } else if (value) {
302: HTChunk_putb(me->value, value, end-value);
303: HTChunk_putc(me->value, ' ');
304: }
305: start=b, end=b;
306: break;
307: default:
308: b++;
309: l--;
310: if (!l) {
311: if (!me->haveToken)
312: HTChunk_putb(me->token, start, end-start);
313: else if (value)
314: HTChunk_putb(me->value, value, end-value);
315: return HT_OK;
316: }
317: }
2.18 frystyk 318: }
2.32 frystyk 319:
320: /*
321: ** Put the rest down the stream without touching the data but make sure
322: ** that we get the correct content length of data
323: */
2.66 frystyk 324: if (me->target) {
325: if ((status = (*me->target->isa->put_block)(me->target, b, l)) != HT_OK)
326: return status;
327: /* Check if CL at all - thanks to jwei@hal.com (John Wei) */
2.71 ! frystyk 328: cl = HTResponse_length(me->response);
2.66 frystyk 329: return (cl>=0 && HTNet_bytesRead(me->net)>=cl) ? HT_LOADED : HT_OK;
330: }
331: return HT_LOADED;
2.18 frystyk 332: }
333:
334:
335: /* Character handling
336: ** ------------------
337: */
2.36 frystyk 338: PRIVATE int HTMIME_put_character (HTStream * me, char c)
2.18 frystyk 339: {
340: return HTMIME_put_block(me, &c, 1);
341: }
342:
2.1 timbl 343:
344: /* String handling
345: ** ---------------
346: */
2.57 frystyk 347: PRIVATE int HTMIME_put_string (HTStream * me, const char * s)
2.1 timbl 348: {
2.18 frystyk 349: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 350: }
351:
352:
2.18 frystyk 353: /* Flush an stream object
354: ** ---------------------
2.1 timbl 355: */
2.36 frystyk 356: PRIVATE int HTMIME_flush (HTStream * me)
2.1 timbl 357: {
2.47 frystyk 358: return me->target ? (*me->target->isa->flush)(me->target) : HT_OK;
2.1 timbl 359: }
360:
2.18 frystyk 361: /* Free a stream object
362: ** --------------------
2.1 timbl 363: */
2.36 frystyk 364: PRIVATE int HTMIME_free (HTStream * me)
2.1 timbl 365: {
2.18 frystyk 366: int status = HT_OK;
2.64 eric 367: if (!me->transparent)
2.65 eric 368: if (_dispatchParsers(me) == HT_OK)
2.64 eric 369: pumpData(me);
2.25 frystyk 370: if (me->target) {
371: if ((status = (*me->target->isa->_free)(me->target))==HT_WOULD_BLOCK)
372: return HT_WOULD_BLOCK;
373: }
2.26 frystyk 374: if (PROT_TRACE)
2.55 eric 375: HTTrace("MIME........ FREEING....\n");
2.64 eric 376: HTChunk_delete(me->token);
377: HTChunk_delete(me->value);
2.52 frystyk 378: HT_FREE(me);
2.18 frystyk 379: return status;
2.1 timbl 380: }
381:
382: /* End writing
383: */
2.38 frystyk 384: PRIVATE int HTMIME_abort (HTStream * me, HTList * e)
2.1 timbl 385: {
2.18 frystyk 386: int status = HT_ERROR;
2.41 frystyk 387: if (me->target) status = (*me->target->isa->abort)(me->target, e);
2.26 frystyk 388: if (PROT_TRACE)
2.55 eric 389: HTTrace("MIME........ ABORTING...\n");
2.64 eric 390: HTChunk_delete(me->token);
391: HTChunk_delete(me->value);
2.52 frystyk 392: HT_FREE(me);
2.18 frystyk 393: return status;
2.1 timbl 394: }
395:
396:
397:
398: /* Structured Object Class
399: ** -----------------------
400: */
2.57 frystyk 401: PRIVATE const HTStreamClass HTMIME =
2.1 timbl 402: {
403: "MIMEParser",
2.18 frystyk 404: HTMIME_flush,
2.1 timbl 405: HTMIME_free,
2.6 timbl 406: HTMIME_abort,
407: HTMIME_put_character,
408: HTMIME_put_string,
2.18 frystyk 409: HTMIME_put_block
2.1 timbl 410: };
411:
412:
2.48 frystyk 413: /* MIME header parser stream.
2.1 timbl 414: ** -------------------------
2.48 frystyk 415: ** This stream parses a complete MIME header and if a content type header
416: ** is found then the stream stack is called. Any left over data is pumped
417: ** right through the stream
2.1 timbl 418: */
2.36 frystyk 419: PUBLIC HTStream* HTMIMEConvert (HTRequest * request,
420: void * param,
421: HTFormat input_format,
422: HTFormat output_format,
423: HTStream * output_stream)
2.1 timbl 424: {
2.62 frystyk 425: HTStream * me;
2.52 frystyk 426: if ((me = (HTStream *) HT_CALLOC(1, sizeof(* me))) == NULL)
427: HT_OUTOFMEM("HTMIMEConvert");
2.1 timbl 428: me->isa = &HTMIME;
2.18 frystyk 429: me->request = request;
2.71 ! frystyk 430: me->response = HTRequest_response(request);
2.70 frystyk 431: me->net = HTRequest_net(request);
2.49 frystyk 432: me->target = output_stream;
2.18 frystyk 433: me->target_format = output_format;
2.64 eric 434: me->token = HTChunk_new(256);
435: me->value = HTChunk_new(256);
436: me->hash = 0;
2.18 frystyk 437: me->EOLstate = EOL_BEGIN;
2.64 eric 438: me->haveToken = NO;
2.1 timbl 439: return me;
440: }
2.32 frystyk 441:
2.48 frystyk 442: /* MIME header ONLY parser stream
443: ** ------------------------------
444: ** This stream parses a complete MIME header and then returnes HT_PAUSE.
445: ** It does not set up any streams and resting data stays in the buffer.
446: ** This can be used if you only want to parse the headers before you
447: ** decide what to do next. This is for example the case in a server app.
448: */
449: PUBLIC HTStream * HTMIMEHeader (HTRequest * request,
450: void * param,
451: HTFormat input_format,
452: HTFormat output_format,
453: HTStream * output_stream)
454: {
2.62 frystyk 455: HTStream * me = HTMIMEConvert(request, param, input_format,
456: output_format, output_stream);
2.70 frystyk 457: me->mode |= HT_MIME_HEADER;
2.48 frystyk 458: return me;
459: }
2.62 frystyk 460:
461: /* MIME footer ONLY parser stream
462: ** ------------------------------
463: ** Parse only a footer, for example after a chunked encoding.
464: */
465: PUBLIC HTStream * HTMIMEFooter (HTRequest * request,
466: void * param,
467: HTFormat input_format,
468: HTFormat output_format,
469: HTStream * output_stream)
470: {
471: HTStream * me = HTMIMEConvert(request, param, input_format,
472: output_format, output_stream);
2.70 frystyk 473: me->mode |= HT_MIME_FOOTER;
2.67 frystyk 474: me->EOLstate = EOL_FLF;
2.62 frystyk 475: return me;
476: }
2.71 ! frystyk 477:
! 478: /* Partial Response MIME parser stream
! 479: ** -----------------------------------
! 480: ** In case we sent a Range conditional GET we may get back a partial
! 481: ** response. This response must be appended to the already existing
! 482: ** cache entry before presented to the user.
! 483: ** We do this by continuing to load the new object into a temporary
! 484: ** buffer and at the same time start the cache load of the already
! 485: ** existing object. When we have loaded the cache we merge the two
! 486: ** buffers.
! 487: */
! 488: PUBLIC HTStream * HTMIMEPartial (HTRequest * request,
! 489: void * param,
! 490: HTFormat input_format,
! 491: HTFormat output_format,
! 492: HTStream * output_stream)
! 493: {
! 494: #if 0
! 495: HTParentAnchor * anchor = HTRequest_anchor(request);
! 496: HTStream * me = NULL;
! 497: HTStream * merge = NULL;
! 498: /*
! 499: ** The merge stream is a place holder for where we can put data when it
! 500: ** arrives. We have two feeds: one from the cache and one from the net.
! 501: ** We call the stream stack already now to get the right output stream.
! 502: ** We can do this as we already know the content type from when we got the
! 503: ** first part of the object.
! 504: */
! 505: {
! 506: HTFormat format = HTAnchor_format(anchor);
! 507: if (STREAM_TRACE) HTTrace("Building.... C-T stack from %s to %s\n",
! 508: HTAtom_name(format),
! 509: HTAtom_name(output_format));
! 510: merge = HTMerge(HTStreamStack(format, output_format, output_stream,
! 511: request, YES), 2);
! 512: }
! 513:
! 514: #else
! 515: /*
! 516: ** Set up the MIME parser as the one feed to the merge stream. The MIME
! 517: ** parser then calls the PIPE buffer. We use source output as the stream
! 518: ** stack has already been called.
! 519: */
! 520: HTStream * me = HTMIMEConvert(request, param, input_format,
! 521: output_format, output_stream);
! 522: me->mode |= HT_MIME_PARTIAL;
! 523: #endif
! 524:
! 525: /*
! 526: ** Now start the second load from the cache. First we read this data from
! 527: ** the cache and then we flush the data that we have read from the net.
! 528: ** We use the same anchor as before but with another physical address.
! 529: */
! 530: {
! 531: HTParentAnchor * anchor = HTRequest_anchor(request);
! 532: HTRequest * creq = HTRequest_new();
! 533: HTCache * cache = NULL;
! 534:
! 535: /* Set up the request */
! 536: #if 0
! 537: HTRequest_setOutputFormat(creq, WWW_SOURCE);
! 538: HTRequest_setOutputStream(creq, me);
! 539: #endif
! 540: HTRequest_setAnchor(creq, (HTAnchor *) anchor);
! 541:
! 542: /* Set up the anchor */
! 543: if ((cache = HTCache_find(anchor))) {
! 544: char * name = HTCache_name(cache);
! 545: HTAnchor_setPhysical(anchor, name);
! 546: HT_FREE(name);
! 547: if (STREAM_TRACE) HTTrace("Partial..... Starting cache load\n");
! 548: HTLoad(creq, NO);
! 549: }
! 550: }
! 551: return me;
! 552: }
! 553:
Webmaster