Annotation of libwww/Library/src/HTMIME.c, revision 2.69
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.69 ! frystyk 6: ** @(#) $Id: HTMIME.c,v 2.68 1996/08/19 18:30:43 frystyk Exp $
2.1 timbl 7: **
8: ** This is RFC 1341-specific code.
9: ** The input stream pushed into this parser is assumed to be
10: ** stripped on CRs, ie lines end with LF, not CR LF.
11: ** (It is easy to change this except for the body part where
12: ** conversion can be slow.)
13: **
14: ** History:
15: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 16: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 frystyk 17: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
18: ** static buffers etc.
2.1 timbl 19: */
2.17 frystyk 20:
21: /* Library include files */
2.57 frystyk 22: #include "sysdep.h"
2.60 frystyk 23: #include "WWWUtil.h"
2.61 frystyk 24: #include "WWWCore.h"
25: #include "HTReqMan.h"
26: #include "HTNetMan.h"
2.36 frystyk 27: #include "HTHeader.h"
2.64 eric 28: #include "HTWWWStr.h"
2.14 frystyk 29: #include "HTMIME.h" /* Implemented here */
2.1 timbl 30:
2.64 eric 31: #define MIME_HASH_SIZE 101
32:
2.1 timbl 33: /* MIME Object
34: ** -----------
35: */
36: struct _HTStream {
2.57 frystyk 37: const HTStreamClass * isa;
2.18 frystyk 38: HTRequest * request;
2.32 frystyk 39: HTNet * net;
40: HTParentAnchor * anchor;
2.18 frystyk 41: HTStream * target;
42: HTFormat target_format;
2.64 eric 43: HTChunk * token;
44: HTChunk * value;
45: int hash;
2.59 frystyk 46: HTEOLState EOLstate;
2.18 frystyk 47: BOOL transparent;
2.48 frystyk 48: BOOL head_only;
2.62 frystyk 49: BOOL footer;
2.64 eric 50: BOOL haveToken;
2.1 timbl 51: };
52:
2.18 frystyk 53: /* ------------------------------------------------------------------------- */
2.1 timbl 54:
2.64 eric 55: PRIVATE int pumpData (HTStream * me)
2.18 frystyk 56: {
2.64 eric 57: HTRequest * request = me->request;
58: HTParentAnchor * anchor = me->anchor;
2.68 frystyk 59: HTFormat format = HTAnchor_format(anchor);
60: HTEncoding transfer = HTAnchor_transfer(anchor);
61: long length = HTAnchor_length(anchor);
2.48 frystyk 62: me->transparent = YES; /* Pump rest of data right through */
2.27 frystyk 63:
2.68 frystyk 64: /* If this request is a source in PostWeb then pause here */
2.66 frystyk 65: if (HTRequest_isSource(request)) return HT_PAUSE;
2.47 frystyk 66:
2.48 frystyk 67: /* If HEAD method then we just stop here */
2.66 frystyk 68: if (me->head_only || me->footer ||
69: request->method == METHOD_HEAD) return HT_LOADED;
2.43 frystyk 70:
2.60 frystyk 71: /*
2.68 frystyk 72: ** If there is no content-length, no transfer encoding and no
73: ** content type then we assume that there is no
74: ** bodypart in the message and we can return HT_LOADED
75: */
76: if (length<=0 && format==WWW_UNKNOWN && transfer==NULL) {
77: if (STREAM_TRACE) HTTrace("MIME Parser. No body in this messsage\n");
78: return HT_LOADED;
79: }
80:
81: /*
2.60 frystyk 82: ** Handle any Content Type
83: */
2.61 frystyk 84: {
2.69 ! frystyk 85: if (format != WWW_UNKNOWN || length>0 || transfer) {
2.61 frystyk 86: if (STREAM_TRACE) HTTrace("Building.... C-T stack from %s to %s\n",
87: HTAtom_name(format),
88: HTAtom_name(me->target_format));
89: me->target = HTStreamStack(format, me->target_format,
90: me->target, request, YES);
91: }
2.18 frystyk 92: }
2.60 frystyk 93:
94: /* Handle any Content Encoding */
2.61 frystyk 95: {
96: HTList * cc = HTAnchor_encoding(anchor);
97: if (cc) {
98: if (STREAM_TRACE) HTTrace("Building.... C-E stack\n");
99: me->target = HTContentDecodingStack(cc, me->target, request, NULL);
100: }
2.60 frystyk 101: }
102:
103: /* Handle any Transfer encoding */
2.61 frystyk 104: {
105: if (!HTFormat_isUnityTransfer(transfer)) {
106: if (STREAM_TRACE) HTTrace("Building.... C-T-E stack\n");
107: me->target = HTTransferCodingStack(transfer, me->target,
108: request, NULL, NO);
109: }
110: }
2.27 frystyk 111: return HT_OK;
2.1 timbl 112: }
113:
2.65 eric 114: /* _dispatchParsers - call request's MIME header parser.
115: ** Use global parser if no appropriate one is found for request.
116: */
117: PRIVATE int _dispatchParsers (HTStream * me)
118: {
119: int status;
120: char * token = HTChunk_data(me->token);
121: char * value = HTChunk_data(me->value);
122: BOOL found, local;
123: HTMIMEParseSet * parseSet;
124:
125: /* In case we get an empty header consisting of a CRLF, we fall thru */
2.66 frystyk 126: if (STREAM_TRACE) HTTrace("MIME header. %s: %s\n",
127: token ? token : "<null>",
128: value ? value : "<null>");
129: if (!token) return HT_OK; /* Ignore noop token */
2.65 eric 130:
131: if ((parseSet = HTRequest_MIMEParseSet(me->request, &local)) != NULL) {
132: status = HTMIMEParseSet_dispatch(parseSet, me->request,
133: token, value, &found);
134: if (found)
135: return status;
136: if (local)
137: return HT_OK; /* not found, but that's OK */
138: }
139:
140: if ((parseSet = HTHeader_MIMEParseSet()) == NULL)
141: return HT_OK;
142: status = HTMIMEParseSet_dispatch(parseSet, me->request,
143: token, value, &found);
144: if (found)
145: return status;
146: if (STREAM_TRACE) HTTrace("Ignoring MIME header: %s: %s.\n", token, value);
147:
148: return HT_OK;
149: }
150:
2.18 frystyk 151: /*
152: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
153: ** Folding is either of CF LWS, LF LWS, CRLF LWS
154: */
2.57 frystyk 155: PRIVATE int HTMIME_put_block (HTStream * me, const char * b, int l)
2.18 frystyk 156: {
2.57 frystyk 157: const char * start = b;
158: const char * end = start;
2.64 eric 159: const char * value = me->value->size ? b : NULL;
160: long cl;
161: int status;
162: /* enum {Line_CHAR, Line_END, Line_FOLD, Line_LINE} line = Line_CHAR; */
163:
164: while (!me->transparent) {
2.18 frystyk 165: if (me->EOLstate == EOL_FCR) {
2.64 eric 166: if (*b == CR) /* End of header */
167: me->EOLstate = EOL_END;
168: else if (*b == LF) /* CRLF */
2.18 frystyk 169: me->EOLstate = EOL_FLF;
2.64 eric 170: else if (WHITE(*b)) /* Folding: CR SP */
171: me->EOLstate = EOL_FOLD;
172: else /* New line */
173: me->EOLstate = EOL_LINE;
2.18 frystyk 174: } else if (me->EOLstate == EOL_FLF) {
175: if (*b == CR) /* LF CR or CR LF CR */
176: me->EOLstate = EOL_SCR;
2.64 eric 177: else if (*b == LF) /* End of header */
178: me->EOLstate = EOL_END;
179: else if (WHITE(*b)) /* Folding: LF SP or CR LF SP */
180: me->EOLstate = EOL_FOLD;
181: else /* New line */
182: me->EOLstate = EOL_LINE;
183: } else if (me->EOLstate == EOL_SCR) {
184: if (*b==CR || *b==LF) /* End of header */
185: me->EOLstate = EOL_END;
186: else if (WHITE(*b)) /* Folding: LF CR SP or CR LF CR SP */
187: me->EOLstate = EOL_FOLD;
188: else /* New line */
189: me->EOLstate = EOL_LINE;
190: } else if (*b == CR)
191: me->EOLstate = EOL_FCR;
192: else if (*b == LF)
193: me->EOLstate = EOL_FLF; /* Line found */
194: else {
195: if (!me->haveToken) {
196: if (*b == ':' || isspace(*b)) {
197: HTChunk_putb(me->token, start, end-start);
198: HTChunk_putc(me->token, '\0');
199: me->haveToken = YES;
200: } else {
201: unsigned char ch = *(unsigned char *) b;
202: tolower(ch);
203: /* if (ch >= 'A' && ch <= 'Z')
204: ch += ('a' - 'A'); */
205: me->hash = (me->hash * 3 + ch) % MIME_HASH_SIZE;
206: }
207: } else if (value == NULL && *b != ':' && !isspace(*b))
208: value = b;
209: end++;
210: }
211: switch (me->EOLstate) {
212: case EOL_LINE:
213: case EOL_END: {
214: int status;
215: HTChunk_putb(me->value, value, end-value);
216: HTChunk_putc(me->value, '\0');
217: start=b, end=b;
2.65 eric 218: status = _dispatchParsers(me);
2.64 eric 219: if (me->EOLstate == EOL_END) { /* EOL_END */
2.67 frystyk 220: if (status == HT_OK) {
221: b++, l--;
2.64 eric 222: status = pumpData(me);
2.67 frystyk 223: }
2.64 eric 224: HTNet_setBytesRead(me->net, l);
225: } else { /* EOL_LINE */
226: HTChunk_clear(me->token);
227: HTChunk_clear(me->value);
228: me->haveToken = NO;
229: me->hash = 0;
230: value = NULL;
231: }
2.18 frystyk 232: me->EOLstate = EOL_BEGIN;
2.27 frystyk 233: if (status != HT_OK)
234: return status;
2.64 eric 235: break;
236: }
237: case EOL_FOLD:
2.18 frystyk 238: me->EOLstate = EOL_BEGIN;
2.64 eric 239: if (!me->haveToken) {
240: HTChunk_putb(me->token, start, end-start);
241: HTChunk_putc(me->token, '\0');
242: me->haveToken = YES;
243: } else if (value) {
244: HTChunk_putb(me->value, value, end-value);
245: HTChunk_putc(me->value, ' ');
246: }
247: start=b, end=b;
248: break;
249: default:
250: b++;
251: l--;
252: if (!l) {
253: if (!me->haveToken)
254: HTChunk_putb(me->token, start, end-start);
255: else if (value)
256: HTChunk_putb(me->value, value, end-value);
257: return HT_OK;
258: }
259: }
2.18 frystyk 260: }
2.32 frystyk 261:
262: /*
263: ** Put the rest down the stream without touching the data but make sure
264: ** that we get the correct content length of data
265: */
2.66 frystyk 266: if (me->target) {
267: if ((status = (*me->target->isa->put_block)(me->target, b, l)) != HT_OK)
268: return status;
269: /* Check if CL at all - thanks to jwei@hal.com (John Wei) */
270: cl = HTAnchor_length(me->anchor);
271: return (cl>=0 && HTNet_bytesRead(me->net)>=cl) ? HT_LOADED : HT_OK;
272: }
273: return HT_LOADED;
2.18 frystyk 274: }
275:
276:
277: /* Character handling
278: ** ------------------
279: */
2.36 frystyk 280: PRIVATE int HTMIME_put_character (HTStream * me, char c)
2.18 frystyk 281: {
282: return HTMIME_put_block(me, &c, 1);
283: }
284:
2.1 timbl 285:
286: /* String handling
287: ** ---------------
288: */
2.57 frystyk 289: PRIVATE int HTMIME_put_string (HTStream * me, const char * s)
2.1 timbl 290: {
2.18 frystyk 291: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 292: }
293:
294:
2.18 frystyk 295: /* Flush an stream object
296: ** ---------------------
2.1 timbl 297: */
2.36 frystyk 298: PRIVATE int HTMIME_flush (HTStream * me)
2.1 timbl 299: {
2.47 frystyk 300: return me->target ? (*me->target->isa->flush)(me->target) : HT_OK;
2.1 timbl 301: }
302:
2.18 frystyk 303: /* Free a stream object
304: ** --------------------
2.1 timbl 305: */
2.36 frystyk 306: PRIVATE int HTMIME_free (HTStream * me)
2.1 timbl 307: {
2.18 frystyk 308: int status = HT_OK;
2.64 eric 309: if (!me->transparent)
2.65 eric 310: if (_dispatchParsers(me) == HT_OK)
2.64 eric 311: pumpData(me);
2.25 frystyk 312: if (me->target) {
313: if ((status = (*me->target->isa->_free)(me->target))==HT_WOULD_BLOCK)
314: return HT_WOULD_BLOCK;
315: }
2.26 frystyk 316: if (PROT_TRACE)
2.55 eric 317: HTTrace("MIME........ FREEING....\n");
2.64 eric 318: HTChunk_delete(me->token);
319: HTChunk_delete(me->value);
2.52 frystyk 320: HT_FREE(me);
2.18 frystyk 321: return status;
2.1 timbl 322: }
323:
324: /* End writing
325: */
2.38 frystyk 326: PRIVATE int HTMIME_abort (HTStream * me, HTList * e)
2.1 timbl 327: {
2.18 frystyk 328: int status = HT_ERROR;
2.41 frystyk 329: if (me->target) status = (*me->target->isa->abort)(me->target, e);
2.26 frystyk 330: if (PROT_TRACE)
2.55 eric 331: HTTrace("MIME........ ABORTING...\n");
2.64 eric 332: HTChunk_delete(me->token);
333: HTChunk_delete(me->value);
2.52 frystyk 334: HT_FREE(me);
2.18 frystyk 335: return status;
2.1 timbl 336: }
337:
338:
339:
340: /* Structured Object Class
341: ** -----------------------
342: */
2.57 frystyk 343: PRIVATE const HTStreamClass HTMIME =
2.1 timbl 344: {
345: "MIMEParser",
2.18 frystyk 346: HTMIME_flush,
2.1 timbl 347: HTMIME_free,
2.6 timbl 348: HTMIME_abort,
349: HTMIME_put_character,
350: HTMIME_put_string,
2.18 frystyk 351: HTMIME_put_block
2.1 timbl 352: };
353:
354:
2.48 frystyk 355: /* MIME header parser stream.
2.1 timbl 356: ** -------------------------
2.48 frystyk 357: ** This stream parses a complete MIME header and if a content type header
358: ** is found then the stream stack is called. Any left over data is pumped
359: ** right through the stream
2.1 timbl 360: */
2.36 frystyk 361: PUBLIC HTStream* HTMIMEConvert (HTRequest * request,
362: void * param,
363: HTFormat input_format,
364: HTFormat output_format,
365: HTStream * output_stream)
2.1 timbl 366: {
2.62 frystyk 367: HTStream * me;
2.52 frystyk 368: if ((me = (HTStream *) HT_CALLOC(1, sizeof(* me))) == NULL)
369: HT_OUTOFMEM("HTMIMEConvert");
2.1 timbl 370: me->isa = &HTMIME;
2.18 frystyk 371: me->request = request;
2.32 frystyk 372: me->anchor = request->anchor;
373: me->net = request->net;
2.49 frystyk 374: me->target = output_stream;
2.18 frystyk 375: me->target_format = output_format;
2.64 eric 376: me->token = HTChunk_new(256);
377: me->value = HTChunk_new(256);
378: me->hash = 0;
2.18 frystyk 379: me->EOLstate = EOL_BEGIN;
2.64 eric 380: me->haveToken = NO;
2.1 timbl 381: return me;
382: }
2.32 frystyk 383:
2.48 frystyk 384: /* MIME header ONLY parser stream
385: ** ------------------------------
386: ** This stream parses a complete MIME header and then returnes HT_PAUSE.
387: ** It does not set up any streams and resting data stays in the buffer.
388: ** This can be used if you only want to parse the headers before you
389: ** decide what to do next. This is for example the case in a server app.
390: */
391: PUBLIC HTStream * HTMIMEHeader (HTRequest * request,
392: void * param,
393: HTFormat input_format,
394: HTFormat output_format,
395: HTStream * output_stream)
396: {
2.62 frystyk 397: HTStream * me = HTMIMEConvert(request, param, input_format,
398: output_format, output_stream);
399: me->head_only = YES;
2.48 frystyk 400: return me;
401: }
2.62 frystyk 402:
403: /* MIME footer ONLY parser stream
404: ** ------------------------------
405: ** Parse only a footer, for example after a chunked encoding.
406: */
407: PUBLIC HTStream * HTMIMEFooter (HTRequest * request,
408: void * param,
409: HTFormat input_format,
410: HTFormat output_format,
411: HTStream * output_stream)
412: {
413: HTStream * me = HTMIMEConvert(request, param, input_format,
414: output_format, output_stream);
415: me->footer = YES;
2.67 frystyk 416: me->EOLstate = EOL_FLF;
2.62 frystyk 417: return me;
418: }
Webmaster