Annotation of libwww/Library/src/HTMIME.c, revision 2.26
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 frystyk 16: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
17: ** static buffers etc.
2.1 timbl 18: */
2.17 frystyk 19:
20: /* Library include files */
21: #include "tcp.h"
22: #include "HTUtils.h"
23: #include "HTString.h"
2.9 luotonen 24: #include "HTFormat.h"
2.18 frystyk 25: #include "HTChunk.h"
2.26 ! frystyk 26: #include "HTMethod.h"
2.24 frystyk 27: #include "HTSocket.h"
2.17 frystyk 28: #include "HTFWrite.h"
2.14 frystyk 29: #include "HTMIME.h" /* Implemented here */
2.1 timbl 30:
31: /* MIME Object
32: ** -----------
33: */
34: typedef enum _MIME_state {
2.23 frystyk 35: BEGINNING_OF_LINE=0,
2.18 frystyk 36: CHECK, /* check against check_pointer */
37: UNKNOWN, /* Unknown header */
38: JUNK_LINE, /* Ignore rest of header */
39:
40: CONTENT, /* Intermediate states */
41: FIRSTLETTER_D,
42: FIRSTLETTER_L,
43: CONTENTLETTER_L,
44: CONTENTLETTER_T,
45:
46: ALLOW, /* Headers supported */
47: AUTHENTICATE,
48: CONTENT_ENCODING,
49: CONTENT_LANGUAGE,
50: CONTENT_LENGTH,
2.14 frystyk 51: CONTENT_TRANSFER_ENCODING,
52: CONTENT_TYPE,
2.23 frystyk 53: MIME_DATE,
2.18 frystyk 54: DERIVED_FROM,
55: EXPIRES,
56: LAST_MODIFIED,
57: LINK,
2.14 frystyk 58: LOCATION,
2.18 frystyk 59: PUBLIC_METHODS,
60: RETRY_AFTER,
61: TITLE,
62: URI_HEADER,
63: VERSION
2.1 timbl 64: } MIME_state;
65:
66: struct _HTStream {
2.18 frystyk 67: CONST HTStreamClass * isa;
68: HTRequest * request;
69: HTStream * target;
70: HTFormat target_format;
71: HTChunk * buffer;
72: HTSocketEOL EOLstate;
73: BOOL transparent;
2.1 timbl 74: };
75:
2.18 frystyk 76: /* ------------------------------------------------------------------------- */
2.1 timbl 77:
2.18 frystyk 78: /*
2.1 timbl 79: ** This is a FSM parser which is tolerant as it can be of all
80: ** syntax errors. It ignores field names it does not understand,
81: ** and resynchronises on line beginnings.
82: */
2.18 frystyk 83: PRIVATE void parseheader ARGS3(HTStream *, me, HTRequest *, request,
84: HTParentAnchor *, anchor)
85: {
86: MIME_state state = BEGINNING_OF_LINE;
87: MIME_state ok_state; /* got this state if match */
88: char *ptr = me->buffer->data-1; /* We dont change the data in length */
89: char *stop = ptr+me->buffer->size; /* When to stop */
90: char *header = ptr; /* For diagnostics */
91: CONST char * check_pointer; /* checking input */
92: char *value;
93: me->transparent = YES; /* Pump rest of data right through */
2.26 ! frystyk 94: if (!me->buffer->data) /* No header to parse */
2.18 frystyk 95: return;
96: while (ptr < stop) {
97: switch (state) {
98: case BEGINNING_OF_LINE:
99: header = ++ptr;
100: switch (TOLOWER(*ptr)) {
101: case 'a':
102: check_pointer = "llow";
103: ok_state = ALLOW;
104: state = CHECK;
105: break;
106:
107: case 'c':
108: check_pointer = "ontent-";
109: ok_state = CONTENT;
110: state = CHECK;
111: break;
112:
113: case 'd':
114: state = FIRSTLETTER_D;
115: break;
116:
117: case 'e':
118: check_pointer = "xpires";
119: ok_state = EXPIRES;
120: state = CHECK;
121: break;
122:
123: case 'l':
124: state = FIRSTLETTER_L;
125: break;
126:
127: case 'm':
128: check_pointer = "ime-version";
129: ok_state = JUNK_LINE; /* We don't use this but recognize it */
130: state = CHECK;
131: break;
132:
133: case 'p':
134: break;
135:
136: case 'r':
137: check_pointer = "etry-after";
138: ok_state = RETRY_AFTER;
139: state = CHECK;
140: break;
141:
142: case 's':
143: check_pointer = "erver";
144: ok_state = JUNK_LINE; /* We don't use this but recognize it */
145: state = CHECK;
146: break;
2.1 timbl 147:
2.18 frystyk 148: case 't':
149: check_pointer = "itle";
150: ok_state = TITLE;
151: state = CHECK;
152: break;
153:
154: case 'u':
155: check_pointer = "ri";
156: ok_state = URI_HEADER;
157: state = CHECK;
158: break;
159:
160: case 'v':
161: check_pointer = "ersion";
162: ok_state = VERSION;
163: state = CHECK;
164: break;
165:
166: case 'w':
167: check_pointer = "ww-authenticate";
168: ok_state = AUTHENTICATE;
169: state = CHECK;
170: break;
2.1 timbl 171:
2.18 frystyk 172: default:
173: state = UNKNOWN;
174: break;
175: }
176: ptr++;
2.1 timbl 177: break;
178:
2.18 frystyk 179: case FIRSTLETTER_D:
180: switch (TOLOWER(*ptr)) {
181: case 'a':
182: check_pointer = "te";
2.23 frystyk 183: ok_state = MIME_DATE;
2.18 frystyk 184: state = CHECK;
185: break;
186:
187: case 'e':
188: check_pointer = "rived-from";
189: ok_state = DERIVED_FROM;
190: state = CHECK;
191: break;
192:
193: default:
194: state = UNKNOWN;
195: break;
196: }
197: ptr++;
198: break;
199:
200: case FIRSTLETTER_L:
201: switch (TOLOWER(*ptr)) {
202: case 'a':
203: check_pointer = "st-modified";
204: ok_state = LAST_MODIFIED;
205: state = CHECK;
206: break;
207:
208: case 'i':
209: check_pointer = "nk";
210: ok_state = LINK;
211: state = CHECK;
212: break;
213:
214: case 'o':
215: check_pointer = "cation";
216: ok_state = LOCATION;
217: state = CHECK;
218: break;
219:
220: default:
221: state = UNKNOWN;
222: break;
223: }
224: ptr++;
225: break;
226:
227: case CONTENT:
228: switch (TOLOWER(*ptr)) {
229: case 'e':
230: check_pointer = "ncoding";
231: ok_state = CONTENT_ENCODING;
232: state = CHECK;
233: break;
234:
235: case 'l':
236: state = CONTENTLETTER_L;
237: break;
238:
239: case 't':
240: state = CONTENTLETTER_T;
241: break;
242:
243: default:
244: state = UNKNOWN;
245: break;
246: }
247: ptr++;
2.1 timbl 248: break;
2.14 frystyk 249:
2.18 frystyk 250: case CONTENTLETTER_L:
251: switch (TOLOWER(*ptr)) {
252: case 'a':
253: check_pointer = "nguage";
254: ok_state = CONTENT_LANGUAGE;
255: state = CHECK;
256: break;
257:
258: case 'e':
259: check_pointer = "ngth";
260: ok_state = CONTENT_LENGTH;
261: state = CHECK;
262: break;
263:
264: default:
265: state = UNKNOWN;
266: break;
267: }
268: ptr++;
2.14 frystyk 269: break;
270:
2.18 frystyk 271: case CONTENTLETTER_T:
272: switch (TOLOWER(*ptr)) {
273: case 'r':
274: check_pointer = "ansfer-encoding";
275: ok_state = CONTENT_TRANSFER_ENCODING;
276: state = CHECK;
277: break;
278:
279: case 'y':
280: check_pointer = "pe";
281: ok_state = CONTENT_TYPE;
282: state = CHECK;
283: break;
284:
285: default:
286: state = UNKNOWN;
287: break;
288: }
289: ptr++;
2.14 frystyk 290: break;
291:
2.18 frystyk 292: case CHECK: /* Check against string */
293: while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
294: if (!*--check_pointer) {
295: state = ok_state;
296: while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
297: ptr++;
298: } else
299: state = UNKNOWN;
2.14 frystyk 300: break;
301:
2.18 frystyk 302: case ALLOW:
2.20 frystyk 303: while ((value = HTNextField(&ptr)) != NULL) {
304: HTMethod new_method;
2.26 ! frystyk 305: /* We treat them as case-insensitive! */
2.20 frystyk 306: if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
307: anchor->methods += new_method;
2.1 timbl 308: }
2.18 frystyk 309: if (STREAM_TRACE)
310: fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
311: anchor->methods);
312: state = JUNK_LINE;
2.1 timbl 313: break;
2.18 frystyk 314:
315: case AUTHENTICATE:
316: if ((value = HTNextField(&ptr)) != NULL) {
317: StrAllocCopy(request->WWWAAScheme, value);
2.20 frystyk 318:
319: /* The parsing is done in HTSSUtils.c for the moment */
320: if (*ptr) StrAllocCopy(request->WWWAARealm, ptr);
2.1 timbl 321: }
2.18 frystyk 322: state = JUNK_LINE;
323: break;
324:
325: case CONTENT_ENCODING:
326: if ((value = HTNextField(&ptr)) != NULL) {
327: char *lc = value;
2.20 frystyk 328: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 329: anchor->content_encoding = HTAtom_for(value);
330: }
331: state = JUNK_LINE;
332: break;
333:
2.21 frystyk 334: case CONTENT_LANGUAGE: /* @@@ SHOULD BE A LIST @@@ */
335: if ((value = HTNextField(&ptr)) != NULL) {
336: char *lc = value;
337: while ((*lc = TOLOWER(*lc))) lc++;
338: anchor->content_language = HTAtom_for(value);
339: }
340: state = JUNK_LINE;
2.18 frystyk 341: break;
342:
343: case CONTENT_LENGTH:
344: if ((value = HTNextField(&ptr)) != NULL)
345: anchor->content_length = atol(value);
346: state = JUNK_LINE;
347: break;
348:
349: case CONTENT_TRANSFER_ENCODING:
350: if ((value = HTNextField(&ptr)) != NULL) {
351: char *lc = value;
2.20 frystyk 352: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 353: anchor->cte = HTAtom_for(value);
354: }
355: state = JUNK_LINE;
356: break;
357:
358: case CONTENT_TYPE:
359: if ((value = HTNextField(&ptr)) != NULL) {
360: char *lc = value;
361: while ((*lc = TOLOWER(*lc))) lc++;
362: anchor->content_type = HTAtom_for(value);
2.20 frystyk 363: while ((value = HTNextField(&ptr)) != NULL) { /* Charset */
364: if (!strcasecomp(value, "charset")) {
365: if ((value = HTNextField(&ptr)) != NULL) {
366: lc = value;
367: while ((*lc = TOLOWER(*lc))) lc++;
368: anchor->charset = HTAtom_for(value);
369: }
370: } else if (!strcasecomp(value, "level")) { /* Level */
371: if ((value = HTNextField(&ptr)) != NULL) {
372: lc = value;
373: while ((*lc = TOLOWER(*lc))) lc++;
374: anchor->level = HTAtom_for(value);
375: }
376: }
377: }
2.1 timbl 378: }
2.20 frystyk 379: state = JUNK_LINE;
2.18 frystyk 380: break;
381:
2.23 frystyk 382: case MIME_DATE:
2.18 frystyk 383: anchor->date = HTParseTime(ptr);
384: state = JUNK_LINE;
385: break;
386:
387: case DERIVED_FROM:
388: if ((value = HTNextField(&ptr)) != NULL)
389: StrAllocCopy(anchor->derived_from, value);
390: state = JUNK_LINE;
391: break;
392:
393: case EXPIRES:
394: anchor->expires = HTParseTime(ptr);
395: state = JUNK_LINE;
396: break;
397:
398: case LAST_MODIFIED:
399: anchor->last_modified = HTParseTime(ptr);
400: state = JUNK_LINE;
401: break;
402:
403: case LINK:
2.20 frystyk 404: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 405: break;
406:
407: case LOCATION:
408: if ((value = HTNextField(&ptr)) != NULL)
409: StrAllocCopy(request->redirect, value);
410: state = JUNK_LINE;
411: break;
412:
413: case PUBLIC_METHODS:
2.20 frystyk 414: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 415: break;
416:
417: case RETRY_AFTER:
2.19 frystyk 418: request->retry_after = HTParseTime(ptr);
419: state = JUNK_LINE;
2.18 frystyk 420: break;
421:
422: case TITLE: /* Can't reuse buffer as HTML version might differ */
423: if ((value = HTNextField(&ptr)) != NULL)
424: StrAllocCopy(anchor->title, value);
425: state = JUNK_LINE;
426: break;
427:
428: case URI_HEADER:
429: state = LOCATION; /* @@@ Need extended parsing */
430: break;
431:
432: case VERSION:
433: if ((value = HTNextField(&ptr)) != NULL)
434: StrAllocCopy(anchor->version, value);
435: state = JUNK_LINE;
436: break;
437:
438: case UNKNOWN:
439: if (STREAM_TRACE)
440: fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
441: HTAnchor_addExtra(anchor, header);
442:
443: /* Fall through */
444:
445: case JUNK_LINE:
446: while (*ptr) ptr++;
447: state = BEGINNING_OF_LINE;
448: break;
2.1 timbl 449: }
2.18 frystyk 450: }
451:
452: if (STREAM_TRACE)
453: fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
454: HTAtom_name(anchor->content_type),
455: HTAtom_name(me->target_format));
456: if ((me->target = HTStreamStack(anchor->content_type,
457: me->target_format, me->target,
458: me->request, YES)) == NULL) {
459: if (STREAM_TRACE)
460: fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
461: me->target = HTBlackHole();
462: }
463: anchor->header_parsed = YES;
2.1 timbl 464: }
465:
466:
2.18 frystyk 467: /*
468: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
469: ** Folding is either of CF LWS, LF LWS, CRLF LWS
470: */
471: PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
472: {
473: while (!me->transparent && l-- > 0) {
474: if (me->EOLstate == EOL_FCR) {
475: if (*b == CR) /* End of header */
476: parseheader(me, me->request, me->request->anchor);
477: else if (*b == LF) /* CRLF */
478: me->EOLstate = EOL_FLF;
479: else if (WHITE(*b)) { /* Folding: CR SP */
480: me->EOLstate = EOL_BEGIN;
481: HTChunkPutc(me->buffer, ' ');
482: } else { /* New line */
483: me->EOLstate = EOL_BEGIN;
484: HTChunkPutc(me->buffer, '\0');
485: HTChunkPutc(me->buffer, *b);
486: }
487: } else if (me->EOLstate == EOL_FLF) {
488: if (*b == CR) /* LF CR or CR LF CR */
489: me->EOLstate = EOL_SCR;
490: else if (*b == LF) /* End of header */
491: parseheader(me, me->request, me->request->anchor);
492: else if (WHITE(*b)) { /* Folding: LF SP or CR LF SP */
493: me->EOLstate = EOL_BEGIN;
494: HTChunkPutc(me->buffer, ' ');
495: } else { /* New line */
496: me->EOLstate = EOL_BEGIN;
497: HTChunkPutc(me->buffer, '\0');
498: HTChunkPutc(me->buffer, *b);
499: }
500: } else if (me->EOLstate == EOL_SCR) {
501: if (*b==CR || *b==LF) /* End of header */
502: parseheader(me, me->request, me->request->anchor);
503: else if (WHITE(*b)) { /* Folding: LF CR SP or CR LF CR SP */
504: me->EOLstate = EOL_BEGIN;
505: HTChunkPutc(me->buffer, ' ');
506: } else { /* New line */
507: me->EOLstate = EOL_BEGIN;
508: HTChunkPutc(me->buffer, '\0');
509: HTChunkPutc(me->buffer, *b);
510: }
511: } else if (*b == CR) {
512: me->EOLstate = EOL_FCR;
513: } else if (*b == LF) {
514: me->EOLstate = EOL_FLF; /* Line found */
515: } else
516: HTChunkPutc(me->buffer, *b);
517: b++;
518: }
2.26 ! frystyk 519: if (me->target) { /* Is the stream set up? */
! 520: if (l > 0) /* Anything left? */
! 521: return (*me->target->isa->put_block)(me->target, b, l);
! 522: return HT_OK;
! 523: }
! 524: return HT_WOULD_BLOCK;
2.18 frystyk 525: }
526:
527:
528: /* Character handling
529: ** ------------------
530: */
2.21 frystyk 531: PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, char, c)
2.18 frystyk 532: {
533: return HTMIME_put_block(me, &c, 1);
534: }
535:
2.1 timbl 536:
537: /* String handling
538: ** ---------------
539: */
2.18 frystyk 540: PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
2.1 timbl 541: {
2.18 frystyk 542: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 543: }
544:
545:
2.18 frystyk 546: /* Flush an stream object
547: ** ---------------------
2.1 timbl 548: */
2.18 frystyk 549: PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
2.1 timbl 550: {
2.18 frystyk 551: return (*me->target->isa->flush)(me->target);
2.1 timbl 552: }
553:
2.18 frystyk 554: /* Free a stream object
555: ** --------------------
2.1 timbl 556: */
2.14 frystyk 557: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 558: {
2.18 frystyk 559: int status = HT_OK;
2.25 frystyk 560: if (me->target) {
561: if ((status = (*me->target->isa->_free)(me->target))==HT_WOULD_BLOCK)
562: return HT_WOULD_BLOCK;
563: }
2.26 ! frystyk 564: if (PROT_TRACE)
! 565: fprintf(TDEST, "MIME........ FREEING....\n");
2.19 frystyk 566: HTChunkFree(me->buffer);
2.1 timbl 567: free(me);
2.18 frystyk 568: return status;
2.1 timbl 569: }
570:
571: /* End writing
572: */
2.14 frystyk 573: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 574: {
2.18 frystyk 575: int status = HT_ERROR;
576: if (me->target)
577: status = (*me->target->isa->abort)(me->target, e);
2.26 ! frystyk 578: if (PROT_TRACE)
! 579: fprintf(TDEST, "MIME........ ABORTING...\n");
! 580: HTChunkFree(me->buffer);
2.6 timbl 581: free(me);
2.18 frystyk 582: return status;
2.1 timbl 583: }
584:
585:
586:
587: /* Structured Object Class
588: ** -----------------------
589: */
2.6 timbl 590: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 591: {
592: "MIMEParser",
2.18 frystyk 593: HTMIME_flush,
2.1 timbl 594: HTMIME_free,
2.6 timbl 595: HTMIME_abort,
596: HTMIME_put_character,
597: HTMIME_put_string,
2.18 frystyk 598: HTMIME_put_block
2.1 timbl 599: };
600:
601:
602: /* Subclass-specific Methods
603: ** -------------------------
604: */
2.7 timbl 605: PUBLIC HTStream* HTMIMEConvert ARGS5(
606: HTRequest *, request,
607: void *, param,
608: HTFormat, input_format,
609: HTFormat, output_format,
610: HTStream *, output_stream)
2.1 timbl 611: {
612: HTStream* me;
2.18 frystyk 613: if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
614: outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 615: me->isa = &HTMIME;
2.18 frystyk 616: me->request = request;
617: me->target = output_stream;
618: me->target_format = output_format;
619: me->buffer = HTChunkCreate(512);
620: me->EOLstate = EOL_BEGIN;
2.1 timbl 621: return me;
622: }
Webmaster