Annotation of libwww/Library/src/HTMIME.c, revision 2.70
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.70 ! frystyk 6: ** @(#) $Id: HTMIME.c,v 2.69 1996/08/24 18:10:08 frystyk Exp $
2.1 timbl 7: **
8: ** This is RFC 1341-specific code.
9: ** The input stream pushed into this parser is assumed to be
10: ** stripped on CRs, ie lines end with LF, not CR LF.
11: ** (It is easy to change this except for the body part where
12: ** conversion can be slow.)
13: **
14: ** History:
15: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 16: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 frystyk 17: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
18: ** static buffers etc.
2.1 timbl 19: */
2.17 frystyk 20:
21: /* Library include files */
2.57 frystyk 22: #include "sysdep.h"
2.60 frystyk 23: #include "WWWUtil.h"
2.61 frystyk 24: #include "WWWCore.h"
2.70 ! frystyk 25: #include "WWWCache.h"
! 26: #include "WWWStream.h"
2.61 frystyk 27: #include "HTReqMan.h"
28: #include "HTNetMan.h"
2.36 frystyk 29: #include "HTHeader.h"
2.64 eric 30: #include "HTWWWStr.h"
2.14 frystyk 31: #include "HTMIME.h" /* Implemented here */
2.1 timbl 32:
2.64 eric 33: #define MIME_HASH_SIZE 101
34:
2.70 ! frystyk 35: typedef enum _HTMIMEMode {
! 36: HT_MIME_HEADER = 0x1,
! 37: HT_MIME_FOOTER = 0x2
! 38: } HTMIMEMode;
! 39:
2.1 timbl 40: struct _HTStream {
2.57 frystyk 41: const HTStreamClass * isa;
2.18 frystyk 42: HTRequest * request;
2.32 frystyk 43: HTNet * net;
44: HTParentAnchor * anchor;
2.18 frystyk 45: HTStream * target;
46: HTFormat target_format;
2.64 eric 47: HTChunk * token;
48: HTChunk * value;
49: int hash;
2.59 frystyk 50: HTEOLState EOLstate;
2.70 ! frystyk 51: HTMIMEMode mode;
2.18 frystyk 52: BOOL transparent;
2.64 eric 53: BOOL haveToken;
2.70 ! frystyk 54: BOOL cache;
2.1 timbl 55: };
56:
2.18 frystyk 57: /* ------------------------------------------------------------------------- */
2.1 timbl 58:
2.64 eric 59: PRIVATE int pumpData (HTStream * me)
2.18 frystyk 60: {
2.64 eric 61: HTRequest * request = me->request;
62: HTParentAnchor * anchor = me->anchor;
2.68 frystyk 63: HTFormat format = HTAnchor_format(anchor);
64: HTEncoding transfer = HTAnchor_transfer(anchor);
65: long length = HTAnchor_length(anchor);
2.48 frystyk 66: me->transparent = YES; /* Pump rest of data right through */
2.70 ! frystyk 67: HTAnchor_setHeaderParsed(anchor);
2.27 frystyk 68:
2.68 frystyk 69: /* If this request is a source in PostWeb then pause here */
2.66 frystyk 70: if (HTRequest_isSource(request)) return HT_PAUSE;
2.47 frystyk 71:
2.48 frystyk 72: /* If HEAD method then we just stop here */
2.70 ! frystyk 73: if (me->mode & (HT_MIME_HEADER | HT_MIME_FOOTER) ||
! 74: HTRequest_method(me->request) == METHOD_HEAD) {
! 75: return HT_LOADED;
! 76: }
2.43 frystyk 77:
2.60 frystyk 78: /*
2.68 frystyk 79: ** If there is no content-length, no transfer encoding and no
80: ** content type then we assume that there is no
81: ** bodypart in the message and we can return HT_LOADED
82: */
83: if (length<=0 && format==WWW_UNKNOWN && transfer==NULL) {
84: if (STREAM_TRACE) HTTrace("MIME Parser. No body in this messsage\n");
85: return HT_LOADED;
86: }
87:
88: /*
2.60 frystyk 89: ** Handle any Content Type
90: */
2.61 frystyk 91: {
2.69 frystyk 92: if (format != WWW_UNKNOWN || length>0 || transfer) {
2.61 frystyk 93: if (STREAM_TRACE) HTTrace("Building.... C-T stack from %s to %s\n",
94: HTAtom_name(format),
95: HTAtom_name(me->target_format));
96: me->target = HTStreamStack(format, me->target_format,
97: me->target, request, YES);
98: }
2.18 frystyk 99: }
2.60 frystyk 100:
101: /* Handle any Content Encoding */
2.61 frystyk 102: {
103: HTList * cc = HTAnchor_encoding(anchor);
104: if (cc) {
105: if (STREAM_TRACE) HTTrace("Building.... C-E stack\n");
106: me->target = HTContentDecodingStack(cc, me->target, request, NULL);
107: }
2.60 frystyk 108: }
109:
2.70 ! frystyk 110: /*
! 111: ** Can we cache the data object? If so then create a T stream and hook it
! 112: ** into the stream pipe. We do it before the transfer decoding so that we
! 113: ** don't have to deal with that when we retrieve the object from cache
! 114: */
! 115: if (HTCacheMode_enabled() && HTAnchor_cachable(anchor)) {
! 116: HTStream * cache;
! 117: if ((cache = HTStreamStack(WWW_CACHE, me->target_format,
! 118: me->target, request, NO)))
! 119: me->target = HTTee(me->target, cache, NULL);
! 120: }
! 121:
2.60 frystyk 122: /* Handle any Transfer encoding */
2.61 frystyk 123: {
124: if (!HTFormat_isUnityTransfer(transfer)) {
125: if (STREAM_TRACE) HTTrace("Building.... C-T-E stack\n");
126: me->target = HTTransferCodingStack(transfer, me->target,
127: request, NULL, NO);
128: }
129: }
2.27 frystyk 130: return HT_OK;
2.1 timbl 131: }
132:
2.65 eric 133: /* _dispatchParsers - call request's MIME header parser.
134: ** Use global parser if no appropriate one is found for request.
135: */
136: PRIVATE int _dispatchParsers (HTStream * me)
137: {
138: int status;
139: char * token = HTChunk_data(me->token);
140: char * value = HTChunk_data(me->value);
141: BOOL found, local;
142: HTMIMEParseSet * parseSet;
143:
144: /* In case we get an empty header consisting of a CRLF, we fall thru */
2.66 frystyk 145: if (STREAM_TRACE) HTTrace("MIME header. %s: %s\n",
146: token ? token : "<null>",
147: value ? value : "<null>");
148: if (!token) return HT_OK; /* Ignore noop token */
2.65 eric 149:
2.70 ! frystyk 150: /*
! 151: ** Search the local set of MIME parsers
! 152: */
2.65 eric 153: if ((parseSet = HTRequest_MIMEParseSet(me->request, &local)) != NULL) {
154: status = HTMIMEParseSet_dispatch(parseSet, me->request,
2.70 ! frystyk 155: token, value, &found, me->cache);
2.65 eric 156: if (found)
157: return status;
158: if (local)
159: return HT_OK; /* not found, but that's OK */
160: }
161:
2.70 ! frystyk 162: /*
! 163: ** Search the global set of MIME parsers
! 164: */
! 165: if ((parseSet = HTHeader_MIMEParseSet()) == NULL) return HT_OK;
2.65 eric 166: status = HTMIMEParseSet_dispatch(parseSet, me->request,
2.70 ! frystyk 167: token, value, &found, me->cache);
! 168: if (found) return status;
! 169: if (STREAM_TRACE) HTTrace("MIME header. Ignoring %s: %s\n", token, value);
2.65 eric 170: return HT_OK;
171: }
172:
2.18 frystyk 173: /*
174: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
175: ** Folding is either of CF LWS, LF LWS, CRLF LWS
176: */
2.57 frystyk 177: PRIVATE int HTMIME_put_block (HTStream * me, const char * b, int l)
2.18 frystyk 178: {
2.57 frystyk 179: const char * start = b;
180: const char * end = start;
2.64 eric 181: const char * value = me->value->size ? b : NULL;
182: long cl;
183: int status;
184: /* enum {Line_CHAR, Line_END, Line_FOLD, Line_LINE} line = Line_CHAR; */
185:
186: while (!me->transparent) {
2.18 frystyk 187: if (me->EOLstate == EOL_FCR) {
2.64 eric 188: if (*b == CR) /* End of header */
189: me->EOLstate = EOL_END;
190: else if (*b == LF) /* CRLF */
2.18 frystyk 191: me->EOLstate = EOL_FLF;
2.64 eric 192: else if (WHITE(*b)) /* Folding: CR SP */
193: me->EOLstate = EOL_FOLD;
194: else /* New line */
195: me->EOLstate = EOL_LINE;
2.18 frystyk 196: } else if (me->EOLstate == EOL_FLF) {
197: if (*b == CR) /* LF CR or CR LF CR */
198: me->EOLstate = EOL_SCR;
2.64 eric 199: else if (*b == LF) /* End of header */
200: me->EOLstate = EOL_END;
201: else if (WHITE(*b)) /* Folding: LF SP or CR LF SP */
202: me->EOLstate = EOL_FOLD;
203: else /* New line */
204: me->EOLstate = EOL_LINE;
205: } else if (me->EOLstate == EOL_SCR) {
206: if (*b==CR || *b==LF) /* End of header */
207: me->EOLstate = EOL_END;
208: else if (WHITE(*b)) /* Folding: LF CR SP or CR LF CR SP */
209: me->EOLstate = EOL_FOLD;
210: else /* New line */
211: me->EOLstate = EOL_LINE;
212: } else if (*b == CR)
213: me->EOLstate = EOL_FCR;
214: else if (*b == LF)
215: me->EOLstate = EOL_FLF; /* Line found */
216: else {
217: if (!me->haveToken) {
218: if (*b == ':' || isspace(*b)) {
219: HTChunk_putb(me->token, start, end-start);
220: HTChunk_putc(me->token, '\0');
221: me->haveToken = YES;
222: } else {
223: unsigned char ch = *(unsigned char *) b;
224: tolower(ch);
225: /* if (ch >= 'A' && ch <= 'Z')
226: ch += ('a' - 'A'); */
227: me->hash = (me->hash * 3 + ch) % MIME_HASH_SIZE;
228: }
229: } else if (value == NULL && *b != ':' && !isspace(*b))
230: value = b;
231: end++;
232: }
233: switch (me->EOLstate) {
234: case EOL_LINE:
235: case EOL_END: {
236: int status;
237: HTChunk_putb(me->value, value, end-value);
238: HTChunk_putc(me->value, '\0');
239: start=b, end=b;
2.65 eric 240: status = _dispatchParsers(me);
2.64 eric 241: if (me->EOLstate == EOL_END) { /* EOL_END */
2.67 frystyk 242: if (status == HT_OK) {
243: b++, l--;
2.64 eric 244: status = pumpData(me);
2.67 frystyk 245: }
2.64 eric 246: HTNet_setBytesRead(me->net, l);
247: } else { /* EOL_LINE */
248: HTChunk_clear(me->token);
249: HTChunk_clear(me->value);
250: me->haveToken = NO;
251: me->hash = 0;
252: value = NULL;
253: }
2.18 frystyk 254: me->EOLstate = EOL_BEGIN;
2.27 frystyk 255: if (status != HT_OK)
256: return status;
2.64 eric 257: break;
258: }
259: case EOL_FOLD:
2.18 frystyk 260: me->EOLstate = EOL_BEGIN;
2.64 eric 261: if (!me->haveToken) {
262: HTChunk_putb(me->token, start, end-start);
263: HTChunk_putc(me->token, '\0');
264: me->haveToken = YES;
265: } else if (value) {
266: HTChunk_putb(me->value, value, end-value);
267: HTChunk_putc(me->value, ' ');
268: }
269: start=b, end=b;
270: break;
271: default:
272: b++;
273: l--;
274: if (!l) {
275: if (!me->haveToken)
276: HTChunk_putb(me->token, start, end-start);
277: else if (value)
278: HTChunk_putb(me->value, value, end-value);
279: return HT_OK;
280: }
281: }
2.18 frystyk 282: }
2.32 frystyk 283:
284: /*
285: ** Put the rest down the stream without touching the data but make sure
286: ** that we get the correct content length of data
287: */
2.66 frystyk 288: if (me->target) {
289: if ((status = (*me->target->isa->put_block)(me->target, b, l)) != HT_OK)
290: return status;
291: /* Check if CL at all - thanks to jwei@hal.com (John Wei) */
292: cl = HTAnchor_length(me->anchor);
293: return (cl>=0 && HTNet_bytesRead(me->net)>=cl) ? HT_LOADED : HT_OK;
294: }
295: return HT_LOADED;
2.18 frystyk 296: }
297:
298:
299: /* Character handling
300: ** ------------------
301: */
2.36 frystyk 302: PRIVATE int HTMIME_put_character (HTStream * me, char c)
2.18 frystyk 303: {
304: return HTMIME_put_block(me, &c, 1);
305: }
306:
2.1 timbl 307:
308: /* String handling
309: ** ---------------
310: */
2.57 frystyk 311: PRIVATE int HTMIME_put_string (HTStream * me, const char * s)
2.1 timbl 312: {
2.18 frystyk 313: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 314: }
315:
316:
2.18 frystyk 317: /* Flush an stream object
318: ** ---------------------
2.1 timbl 319: */
2.36 frystyk 320: PRIVATE int HTMIME_flush (HTStream * me)
2.1 timbl 321: {
2.47 frystyk 322: return me->target ? (*me->target->isa->flush)(me->target) : HT_OK;
2.1 timbl 323: }
324:
2.18 frystyk 325: /* Free a stream object
326: ** --------------------
2.1 timbl 327: */
2.36 frystyk 328: PRIVATE int HTMIME_free (HTStream * me)
2.1 timbl 329: {
2.18 frystyk 330: int status = HT_OK;
2.64 eric 331: if (!me->transparent)
2.65 eric 332: if (_dispatchParsers(me) == HT_OK)
2.64 eric 333: pumpData(me);
2.25 frystyk 334: if (me->target) {
335: if ((status = (*me->target->isa->_free)(me->target))==HT_WOULD_BLOCK)
336: return HT_WOULD_BLOCK;
337: }
2.26 frystyk 338: if (PROT_TRACE)
2.55 eric 339: HTTrace("MIME........ FREEING....\n");
2.64 eric 340: HTChunk_delete(me->token);
341: HTChunk_delete(me->value);
2.52 frystyk 342: HT_FREE(me);
2.18 frystyk 343: return status;
2.1 timbl 344: }
345:
346: /* End writing
347: */
2.38 frystyk 348: PRIVATE int HTMIME_abort (HTStream * me, HTList * e)
2.1 timbl 349: {
2.18 frystyk 350: int status = HT_ERROR;
2.41 frystyk 351: if (me->target) status = (*me->target->isa->abort)(me->target, e);
2.26 frystyk 352: if (PROT_TRACE)
2.55 eric 353: HTTrace("MIME........ ABORTING...\n");
2.64 eric 354: HTChunk_delete(me->token);
355: HTChunk_delete(me->value);
2.52 frystyk 356: HT_FREE(me);
2.18 frystyk 357: return status;
2.1 timbl 358: }
359:
360:
361:
362: /* Structured Object Class
363: ** -----------------------
364: */
2.57 frystyk 365: PRIVATE const HTStreamClass HTMIME =
2.1 timbl 366: {
367: "MIMEParser",
2.18 frystyk 368: HTMIME_flush,
2.1 timbl 369: HTMIME_free,
2.6 timbl 370: HTMIME_abort,
371: HTMIME_put_character,
372: HTMIME_put_string,
2.18 frystyk 373: HTMIME_put_block
2.1 timbl 374: };
375:
376:
2.48 frystyk 377: /* MIME header parser stream.
2.1 timbl 378: ** -------------------------
2.48 frystyk 379: ** This stream parses a complete MIME header and if a content type header
380: ** is found then the stream stack is called. Any left over data is pumped
381: ** right through the stream
2.1 timbl 382: */
2.36 frystyk 383: PUBLIC HTStream* HTMIMEConvert (HTRequest * request,
384: void * param,
385: HTFormat input_format,
386: HTFormat output_format,
387: HTStream * output_stream)
2.1 timbl 388: {
2.62 frystyk 389: HTStream * me;
2.52 frystyk 390: if ((me = (HTStream *) HT_CALLOC(1, sizeof(* me))) == NULL)
391: HT_OUTOFMEM("HTMIMEConvert");
2.1 timbl 392: me->isa = &HTMIME;
2.18 frystyk 393: me->request = request;
2.70 ! frystyk 394: me->anchor = HTRequest_anchor(request);
! 395: me->net = HTRequest_net(request);
2.49 frystyk 396: me->target = output_stream;
2.18 frystyk 397: me->target_format = output_format;
2.64 eric 398: me->token = HTChunk_new(256);
399: me->value = HTChunk_new(256);
2.70 ! frystyk 400: me->cache = HTCacheMode_enabled() && HTAnchor_cachable(me->anchor);
2.64 eric 401: me->hash = 0;
2.18 frystyk 402: me->EOLstate = EOL_BEGIN;
2.64 eric 403: me->haveToken = NO;
2.1 timbl 404: return me;
405: }
2.32 frystyk 406:
2.48 frystyk 407: /* MIME header ONLY parser stream
408: ** ------------------------------
409: ** This stream parses a complete MIME header and then returnes HT_PAUSE.
410: ** It does not set up any streams and resting data stays in the buffer.
411: ** This can be used if you only want to parse the headers before you
412: ** decide what to do next. This is for example the case in a server app.
413: */
414: PUBLIC HTStream * HTMIMEHeader (HTRequest * request,
415: void * param,
416: HTFormat input_format,
417: HTFormat output_format,
418: HTStream * output_stream)
419: {
2.62 frystyk 420: HTStream * me = HTMIMEConvert(request, param, input_format,
421: output_format, output_stream);
2.70 ! frystyk 422: me->mode |= HT_MIME_HEADER;
2.48 frystyk 423: return me;
424: }
2.62 frystyk 425:
426: /* MIME footer ONLY parser stream
427: ** ------------------------------
428: ** Parse only a footer, for example after a chunked encoding.
429: */
430: PUBLIC HTStream * HTMIMEFooter (HTRequest * request,
431: void * param,
432: HTFormat input_format,
433: HTFormat output_format,
434: HTStream * output_stream)
435: {
436: HTStream * me = HTMIMEConvert(request, param, input_format,
437: output_format, output_stream);
2.70 ! frystyk 438: me->mode |= HT_MIME_FOOTER;
2.67 frystyk 439: me->EOLstate = EOL_FLF;
2.62 frystyk 440: return me;
441: }
Webmaster