Annotation of libwww/Library/src/HTMIME.c, revision 2.71.2.1
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.71.2.1! eric 6: ** @(#) $Id: HTMIME.c,v 2.71 1996/10/07 02:04:54 frystyk Exp $
2.1 timbl 7: **
8: ** This is RFC 1341-specific code.
9: ** The input stream pushed into this parser is assumed to be
10: ** stripped on CRs, ie lines end with LF, not CR LF.
11: ** (It is easy to change this except for the body part where
12: ** conversion can be slow.)
13: **
14: ** History:
15: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 16: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.71 frystyk 17: ** 14 Mar 95 HFN Now using response for storing data. No more `\n',
2.18 frystyk 18: ** static buffers etc.
2.1 timbl 19: */
2.17 frystyk 20:
21: /* Library include files */
2.57 frystyk 22: #include "sysdep.h"
2.60 frystyk 23: #include "WWWUtil.h"
2.61 frystyk 24: #include "WWWCore.h"
2.70 frystyk 25: #include "WWWCache.h"
26: #include "WWWStream.h"
2.61 frystyk 27: #include "HTReqMan.h"
28: #include "HTNetMan.h"
2.36 frystyk 29: #include "HTHeader.h"
2.64 eric 30: #include "HTWWWStr.h"
2.14 frystyk 31: #include "HTMIME.h" /* Implemented here */
2.1 timbl 32:
2.64 eric 33: #define MIME_HASH_SIZE 101
34:
2.70 frystyk 35: typedef enum _HTMIMEMode {
36: HT_MIME_HEADER = 0x1,
2.71 frystyk 37: HT_MIME_FOOTER = 0x2,
38: HT_MIME_PARTIAL = 0x4
2.70 frystyk 39: } HTMIMEMode;
40:
2.1 timbl 41: struct _HTStream {
2.57 frystyk 42: const HTStreamClass * isa;
2.18 frystyk 43: HTRequest * request;
2.71 frystyk 44: HTResponse * response;
2.32 frystyk 45: HTNet * net;
2.18 frystyk 46: HTStream * target;
47: HTFormat target_format;
2.64 eric 48: HTChunk * token;
49: HTChunk * value;
50: int hash;
2.59 frystyk 51: HTEOLState EOLstate;
2.70 frystyk 52: HTMIMEMode mode;
2.18 frystyk 53: BOOL transparent;
2.64 eric 54: BOOL haveToken;
2.1 timbl 55: };
56:
2.18 frystyk 57: /* ------------------------------------------------------------------------- */
2.1 timbl 58:
2.64 eric 59: PRIVATE int pumpData (HTStream * me)
2.18 frystyk 60: {
2.64 eric 61: HTRequest * request = me->request;
2.71 frystyk 62: HTResponse * response = me->response;
63: HTFormat format = HTResponse_format(response);
64: HTEncoding transfer = HTResponse_transfer(response);
65: long length = HTResponse_length(response);
2.48 frystyk 66: me->transparent = YES; /* Pump rest of data right through */
2.27 frystyk 67:
2.71 frystyk 68: /* If this request is a source in PostWeb then pause here */
2.66 frystyk 69: if (HTRequest_isSource(request)) return HT_PAUSE;
2.47 frystyk 70:
2.71 frystyk 71: /*
72: ** Cache the metainformation in the anchor object by moving
73: ** it from the response object. This we do regardless if
74: ** we have a persistent cache or not as the memory cache will
75: ** use it as well. If we are updating a cache entry using
76: ** byte ranges then we alreayd have the metainformation and
77: ** hence we can ignore the new one as it'd better be the same.
78: */
79: if (!(me->mode & (HT_MIME_PARTIAL | HT_MIME_FOOTER)) &&
80: HTResponse_isCachable(me->response)) {
81: HTAnchor_update(HTRequest_anchor(request), me->response);
82: }
83:
84: /*
85: ** If we asked only to read the header or footer or we used a HEAD
86: ** method then we stop here as we don't expect any body part.
87: */
2.70 frystyk 88: if (me->mode & (HT_MIME_HEADER | HT_MIME_FOOTER) ||
2.71 frystyk 89: HTRequest_method(request) == METHOD_HEAD) {
2.70 frystyk 90: return HT_LOADED;
91: }
2.43 frystyk 92:
2.60 frystyk 93: /*
2.71 frystyk 94: ** If there is no content-length, no transfer encoding and no
95: ** content type then we assume that there is no body part in
96: ** the message and we can return HT_LOADED
2.68 frystyk 97: */
98: if (length<=0 && format==WWW_UNKNOWN && transfer==NULL) {
99: if (STREAM_TRACE) HTTrace("MIME Parser. No body in this messsage\n");
100: return HT_LOADED;
101: }
102:
103: /*
2.71 frystyk 104: ** Handle any Content Type
2.60 frystyk 105: */
2.71 frystyk 106: if (!(me->mode & HT_MIME_PARTIAL) &&
107: (format != WWW_UNKNOWN || length > 0 || transfer)) {
108: if (STREAM_TRACE) HTTrace("Building.... C-T stack from %s to %s\n",
109: HTAtom_name(format),
110: HTAtom_name(me->target_format));
111: me->target = HTStreamStack(format, me->target_format,
112: me->target, request, YES);
2.18 frystyk 113: }
2.60 frystyk 114:
2.71 frystyk 115: /*
116: ** Handle any Content Encoding
117: */
2.61 frystyk 118: {
2.71 frystyk 119: HTList * cc = HTResponse_encoding(response);
2.61 frystyk 120: if (cc) {
121: if (STREAM_TRACE) HTTrace("Building.... C-E stack\n");
122: me->target = HTContentDecodingStack(cc, me->target, request, NULL);
123: }
2.60 frystyk 124: }
125:
2.70 frystyk 126: /*
2.71 frystyk 127: ** Can we cache the data object? If so then create a T stream and hook it
128: ** into the stream pipe. We do it before the transfer decoding so that we
129: ** don't have to deal with that when we retrieve the object from cache.
130: ** If we are appending to a cache entry then use a different stream than
131: ** if creating a new entry.
132: */
133: if (HTCacheMode_enabled()) {
134: if (me->mode & HT_MIME_PARTIAL) {
135: HTStream * append = HTStreamStack(WWW_CACHE_APPEND,
136: me->target_format,
137: me->target, request, NO);
138: #if 0
139: if (cache) me->target = HTTee(me->target, cache, NULL);
140: me->target = HTPipeBuffer_new(me->target, request, 0);
141: #else
142: me->target = append;
143: #endif
144: } else if (HTResponse_isCachable(me->response)) {
145: HTStream * cache = HTStreamStack(WWW_CACHE, me->target_format,
146: me->target, request, NO);
147: if (cache) me->target = HTTee(me->target, cache, NULL);
148: }
2.70 frystyk 149: }
150:
2.71 frystyk 151: /*
152: ** Handle any Transfer encoding
153: */
2.61 frystyk 154: {
155: if (!HTFormat_isUnityTransfer(transfer)) {
156: if (STREAM_TRACE) HTTrace("Building.... C-T-E stack\n");
157: me->target = HTTransferCodingStack(transfer, me->target,
158: request, NULL, NO);
159: }
160: }
2.71 frystyk 161:
2.27 frystyk 162: return HT_OK;
2.1 timbl 163: }
164:
2.65 eric 165: /* _dispatchParsers - call request's MIME header parser.
166: ** Use global parser if no appropriate one is found for request.
167: */
168: PRIVATE int _dispatchParsers (HTStream * me)
169: {
170: int status;
171: char * token = HTChunk_data(me->token);
172: char * value = HTChunk_data(me->value);
2.71 frystyk 173: BOOL found = NO;
174: BOOL local = NO;
2.65 eric 175: HTMIMEParseSet * parseSet;
176:
177: /* In case we get an empty header consisting of a CRLF, we fall thru */
2.66 frystyk 178: if (STREAM_TRACE) HTTrace("MIME header. %s: %s\n",
179: token ? token : "<null>",
180: value ? value : "<null>");
181: if (!token) return HT_OK; /* Ignore noop token */
2.65 eric 182:
2.70 frystyk 183: /*
2.71 frystyk 184: ** Remember the original header
185: */
186: HTResponse_addHeader(me->response, token, value);
187:
188: /*
2.70 frystyk 189: ** Search the local set of MIME parsers
190: */
2.65 eric 191: if ((parseSet = HTRequest_MIMEParseSet(me->request, &local)) != NULL) {
192: status = HTMIMEParseSet_dispatch(parseSet, me->request,
2.71 frystyk 193: token, value, &found);
194: if (found) return status;
2.65 eric 195: }
196:
2.70 frystyk 197: /*
198: ** Search the global set of MIME parsers
199: */
2.71 frystyk 200: if (local==NO && (parseSet = HTHeader_MIMEParseSet()) != NULL) {
201: status = HTMIMEParseSet_dispatch(parseSet, me->request,
202: token, value, &found);
203: if (found) return status;
204: }
205:
2.65 eric 206: return HT_OK;
207: }
208:
2.18 frystyk 209: /*
210: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
211: ** Folding is either of CF LWS, LF LWS, CRLF LWS
212: */
2.57 frystyk 213: PRIVATE int HTMIME_put_block (HTStream * me, const char * b, int l)
2.18 frystyk 214: {
2.57 frystyk 215: const char * start = b;
216: const char * end = start;
2.64 eric 217: const char * value = me->value->size ? b : NULL;
2.71.2.1! eric 218: long blockLength = l;
2.64 eric 219: int status;
220: /* enum {Line_CHAR, Line_END, Line_FOLD, Line_LINE} line = Line_CHAR; */
221:
222: while (!me->transparent) {
2.18 frystyk 223: if (me->EOLstate == EOL_FCR) {
2.64 eric 224: if (*b == CR) /* End of header */
225: me->EOLstate = EOL_END;
226: else if (*b == LF) /* CRLF */
2.18 frystyk 227: me->EOLstate = EOL_FLF;
2.64 eric 228: else if (WHITE(*b)) /* Folding: CR SP */
229: me->EOLstate = EOL_FOLD;
230: else /* New line */
231: me->EOLstate = EOL_LINE;
2.18 frystyk 232: } else if (me->EOLstate == EOL_FLF) {
233: if (*b == CR) /* LF CR or CR LF CR */
234: me->EOLstate = EOL_SCR;
2.64 eric 235: else if (*b == LF) /* End of header */
236: me->EOLstate = EOL_END;
237: else if (WHITE(*b)) /* Folding: LF SP or CR LF SP */
238: me->EOLstate = EOL_FOLD;
239: else /* New line */
240: me->EOLstate = EOL_LINE;
241: } else if (me->EOLstate == EOL_SCR) {
242: if (*b==CR || *b==LF) /* End of header */
243: me->EOLstate = EOL_END;
244: else if (WHITE(*b)) /* Folding: LF CR SP or CR LF CR SP */
245: me->EOLstate = EOL_FOLD;
246: else /* New line */
247: me->EOLstate = EOL_LINE;
248: } else if (*b == CR)
249: me->EOLstate = EOL_FCR;
250: else if (*b == LF)
251: me->EOLstate = EOL_FLF; /* Line found */
252: else {
253: if (!me->haveToken) {
254: if (*b == ':' || isspace(*b)) {
255: HTChunk_putb(me->token, start, end-start);
256: HTChunk_putc(me->token, '\0');
257: me->haveToken = YES;
258: } else {
259: unsigned char ch = *(unsigned char *) b;
2.71.2.1! eric 260: ch = tolower(ch);
2.64 eric 261: /* if (ch >= 'A' && ch <= 'Z')
262: ch += ('a' - 'A'); */
263: me->hash = (me->hash * 3 + ch) % MIME_HASH_SIZE;
264: }
265: } else if (value == NULL && *b != ':' && !isspace(*b))
266: value = b;
267: end++;
268: }
269: switch (me->EOLstate) {
270: case EOL_LINE:
271: case EOL_END: {
272: int status;
273: HTChunk_putb(me->value, value, end-value);
274: HTChunk_putc(me->value, '\0');
275: start=b, end=b;
2.65 eric 276: status = _dispatchParsers(me);
2.64 eric 277: if (me->EOLstate == EOL_END) { /* EOL_END */
2.67 frystyk 278: if (status == HT_OK) {
279: b++, l--;
2.64 eric 280: status = pumpData(me);
2.67 frystyk 281: }
2.71.2.1! eric 282: HTNet_setHeaderLength(me->net, HTNet_bytesRead(me->net) - l);
! 283: /* bytesRead is actually body read */
! 284: /* HTNet_setBytesRead(me->net, l); */
2.64 eric 285: } else { /* EOL_LINE */
286: HTChunk_clear(me->token);
287: HTChunk_clear(me->value);
288: me->haveToken = NO;
289: me->hash = 0;
290: value = NULL;
291: }
2.18 frystyk 292: me->EOLstate = EOL_BEGIN;
2.27 frystyk 293: if (status != HT_OK)
294: return status;
2.64 eric 295: break;
296: }
297: case EOL_FOLD:
2.18 frystyk 298: me->EOLstate = EOL_BEGIN;
2.64 eric 299: if (!me->haveToken) {
300: HTChunk_putb(me->token, start, end-start);
301: HTChunk_putc(me->token, '\0');
302: me->haveToken = YES;
303: } else if (value) {
304: HTChunk_putb(me->value, value, end-value);
305: HTChunk_putc(me->value, ' ');
306: }
307: start=b, end=b;
308: break;
309: default:
310: b++;
311: l--;
312: if (!l) {
313: if (!me->haveToken)
314: HTChunk_putb(me->token, start, end-start);
315: else if (value)
316: HTChunk_putb(me->value, value, end-value);
317: return HT_OK;
318: }
319: }
2.18 frystyk 320: }
2.32 frystyk 321:
322: /*
323: ** Put the rest down the stream without touching the data but make sure
324: ** that we get the correct content length of data
325: */
2.66 frystyk 326: if (me->target) {
2.71.2.1! eric 327: /* Check if CL at all - thanks to jwei@hal.com (John Wei) */
! 328: long cl = HTResponse_length(me->response);
! 329: long availableBody = HTNet_bytesRead(me->net) - HTNet_headerLength(me->net);
! 330:
! 331: /* If content-length delimited, just consume what is ours */
! 332: if (cl >= 0 && availableBody >= cl) {
! 333: /* now reading this request's last packet */
! 334: int unconsumed = availableBody - cl;
! 335: HTHost_setConsumed(me->net->host, blockLength - unconsumed);
! 336: if ((status = (*me->target->isa->put_block)(me->target, b, unconsumed)) != HT_OK)
! 337: return status;
! 338: return HT_LOADED;
! 339: }
2.66 frystyk 340: if ((status = (*me->target->isa->put_block)(me->target, b, l)) != HT_OK)
341: return status;
2.71.2.1! eric 342: return HT_OK;
2.66 frystyk 343: }
344: return HT_LOADED;
2.18 frystyk 345: }
346:
347:
348: /* Character handling
349: ** ------------------
350: */
2.36 frystyk 351: PRIVATE int HTMIME_put_character (HTStream * me, char c)
2.18 frystyk 352: {
353: return HTMIME_put_block(me, &c, 1);
354: }
355:
2.1 timbl 356:
357: /* String handling
358: ** ---------------
359: */
2.57 frystyk 360: PRIVATE int HTMIME_put_string (HTStream * me, const char * s)
2.1 timbl 361: {
2.18 frystyk 362: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 363: }
364:
365:
2.18 frystyk 366: /* Flush an stream object
367: ** ---------------------
2.1 timbl 368: */
2.36 frystyk 369: PRIVATE int HTMIME_flush (HTStream * me)
2.1 timbl 370: {
2.47 frystyk 371: return me->target ? (*me->target->isa->flush)(me->target) : HT_OK;
2.1 timbl 372: }
373:
2.18 frystyk 374: /* Free a stream object
375: ** --------------------
2.1 timbl 376: */
2.36 frystyk 377: PRIVATE int HTMIME_free (HTStream * me)
2.1 timbl 378: {
2.18 frystyk 379: int status = HT_OK;
2.64 eric 380: if (!me->transparent)
2.65 eric 381: if (_dispatchParsers(me) == HT_OK)
2.64 eric 382: pumpData(me);
2.25 frystyk 383: if (me->target) {
384: if ((status = (*me->target->isa->_free)(me->target))==HT_WOULD_BLOCK)
385: return HT_WOULD_BLOCK;
386: }
2.26 frystyk 387: if (PROT_TRACE)
2.55 eric 388: HTTrace("MIME........ FREEING....\n");
2.64 eric 389: HTChunk_delete(me->token);
390: HTChunk_delete(me->value);
2.52 frystyk 391: HT_FREE(me);
2.18 frystyk 392: return status;
2.1 timbl 393: }
394:
395: /* End writing
396: */
2.38 frystyk 397: PRIVATE int HTMIME_abort (HTStream * me, HTList * e)
2.1 timbl 398: {
2.18 frystyk 399: int status = HT_ERROR;
2.41 frystyk 400: if (me->target) status = (*me->target->isa->abort)(me->target, e);
2.26 frystyk 401: if (PROT_TRACE)
2.55 eric 402: HTTrace("MIME........ ABORTING...\n");
2.64 eric 403: HTChunk_delete(me->token);
404: HTChunk_delete(me->value);
2.52 frystyk 405: HT_FREE(me);
2.18 frystyk 406: return status;
2.1 timbl 407: }
408:
409:
410:
411: /* Structured Object Class
412: ** -----------------------
413: */
2.57 frystyk 414: PRIVATE const HTStreamClass HTMIME =
2.1 timbl 415: {
416: "MIMEParser",
2.18 frystyk 417: HTMIME_flush,
2.1 timbl 418: HTMIME_free,
2.6 timbl 419: HTMIME_abort,
420: HTMIME_put_character,
421: HTMIME_put_string,
2.18 frystyk 422: HTMIME_put_block
2.1 timbl 423: };
424:
425:
2.48 frystyk 426: /* MIME header parser stream.
2.1 timbl 427: ** -------------------------
2.48 frystyk 428: ** This stream parses a complete MIME header and if a content type header
429: ** is found then the stream stack is called. Any left over data is pumped
430: ** right through the stream
2.1 timbl 431: */
2.36 frystyk 432: PUBLIC HTStream* HTMIMEConvert (HTRequest * request,
433: void * param,
434: HTFormat input_format,
435: HTFormat output_format,
436: HTStream * output_stream)
2.1 timbl 437: {
2.62 frystyk 438: HTStream * me;
2.52 frystyk 439: if ((me = (HTStream *) HT_CALLOC(1, sizeof(* me))) == NULL)
440: HT_OUTOFMEM("HTMIMEConvert");
2.1 timbl 441: me->isa = &HTMIME;
2.18 frystyk 442: me->request = request;
2.71 frystyk 443: me->response = HTRequest_response(request);
2.70 frystyk 444: me->net = HTRequest_net(request);
2.49 frystyk 445: me->target = output_stream;
2.18 frystyk 446: me->target_format = output_format;
2.64 eric 447: me->token = HTChunk_new(256);
448: me->value = HTChunk_new(256);
449: me->hash = 0;
2.18 frystyk 450: me->EOLstate = EOL_BEGIN;
2.64 eric 451: me->haveToken = NO;
2.1 timbl 452: return me;
453: }
2.32 frystyk 454:
2.48 frystyk 455: /* MIME header ONLY parser stream
456: ** ------------------------------
457: ** This stream parses a complete MIME header and then returnes HT_PAUSE.
458: ** It does not set up any streams and resting data stays in the buffer.
459: ** This can be used if you only want to parse the headers before you
460: ** decide what to do next. This is for example the case in a server app.
461: */
462: PUBLIC HTStream * HTMIMEHeader (HTRequest * request,
463: void * param,
464: HTFormat input_format,
465: HTFormat output_format,
466: HTStream * output_stream)
467: {
2.62 frystyk 468: HTStream * me = HTMIMEConvert(request, param, input_format,
469: output_format, output_stream);
2.70 frystyk 470: me->mode |= HT_MIME_HEADER;
2.48 frystyk 471: return me;
472: }
2.62 frystyk 473:
474: /* MIME footer ONLY parser stream
475: ** ------------------------------
476: ** Parse only a footer, for example after a chunked encoding.
477: */
478: PUBLIC HTStream * HTMIMEFooter (HTRequest * request,
479: void * param,
480: HTFormat input_format,
481: HTFormat output_format,
482: HTStream * output_stream)
483: {
484: HTStream * me = HTMIMEConvert(request, param, input_format,
485: output_format, output_stream);
2.70 frystyk 486: me->mode |= HT_MIME_FOOTER;
2.67 frystyk 487: me->EOLstate = EOL_FLF;
2.62 frystyk 488: return me;
489: }
2.71 frystyk 490:
491: /* Partial Response MIME parser stream
492: ** -----------------------------------
493: ** In case we sent a Range conditional GET we may get back a partial
494: ** response. This response must be appended to the already existing
495: ** cache entry before presented to the user.
496: ** We do this by continuing to load the new object into a temporary
497: ** buffer and at the same time start the cache load of the already
498: ** existing object. When we have loaded the cache we merge the two
499: ** buffers.
500: */
501: PUBLIC HTStream * HTMIMEPartial (HTRequest * request,
502: void * param,
503: HTFormat input_format,
504: HTFormat output_format,
505: HTStream * output_stream)
506: {
507: #if 0
508: HTParentAnchor * anchor = HTRequest_anchor(request);
509: HTStream * me = NULL;
510: HTStream * merge = NULL;
511: /*
512: ** The merge stream is a place holder for where we can put data when it
513: ** arrives. We have two feeds: one from the cache and one from the net.
514: ** We call the stream stack already now to get the right output stream.
515: ** We can do this as we already know the content type from when we got the
516: ** first part of the object.
517: */
518: {
519: HTFormat format = HTAnchor_format(anchor);
520: if (STREAM_TRACE) HTTrace("Building.... C-T stack from %s to %s\n",
521: HTAtom_name(format),
522: HTAtom_name(output_format));
523: merge = HTMerge(HTStreamStack(format, output_format, output_stream,
524: request, YES), 2);
525: }
526:
527: #else
528: /*
529: ** Set up the MIME parser as the one feed to the merge stream. The MIME
530: ** parser then calls the PIPE buffer. We use source output as the stream
531: ** stack has already been called.
532: */
533: HTStream * me = HTMIMEConvert(request, param, input_format,
534: output_format, output_stream);
535: me->mode |= HT_MIME_PARTIAL;
536: #endif
537:
538: /*
539: ** Now start the second load from the cache. First we read this data from
540: ** the cache and then we flush the data that we have read from the net.
541: ** We use the same anchor as before but with another physical address.
542: */
543: {
544: HTParentAnchor * anchor = HTRequest_anchor(request);
545: HTRequest * creq = HTRequest_new();
546: HTCache * cache = NULL;
547:
548: /* Set up the request */
549: #if 0
550: HTRequest_setOutputFormat(creq, WWW_SOURCE);
551: HTRequest_setOutputStream(creq, me);
552: #endif
553: HTRequest_setAnchor(creq, (HTAnchor *) anchor);
554:
555: /* Set up the anchor */
556: if ((cache = HTCache_find(anchor))) {
557: char * name = HTCache_name(cache);
558: HTAnchor_setPhysical(anchor, name);
559: HT_FREE(name);
560: if (STREAM_TRACE) HTTrace("Partial..... Starting cache load\n");
561: HTLoad(creq, NO);
562: }
563: }
564: return me;
565: }
566:
Webmaster