Annotation of libwww/Library/src/HTMIME.c, revision 2.31
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 frystyk 16: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
17: ** static buffers etc.
2.1 timbl 18: */
2.17 frystyk 19:
20: /* Library include files */
21: #include "tcp.h"
22: #include "HTUtils.h"
23: #include "HTString.h"
2.9 luotonen 24: #include "HTFormat.h"
2.27 frystyk 25: #include "HTCache.h"
26: #include "HTAlert.h"
2.18 frystyk 27: #include "HTChunk.h"
2.26 frystyk 28: #include "HTMethod.h"
2.24 frystyk 29: #include "HTSocket.h"
2.17 frystyk 30: #include "HTFWrite.h"
2.31 ! frystyk 31: #include "HTReqMan.h"
2.14 frystyk 32: #include "HTMIME.h" /* Implemented here */
2.1 timbl 33:
34: /* MIME Object
35: ** -----------
36: */
37: typedef enum _MIME_state {
2.23 frystyk 38: BEGINNING_OF_LINE=0,
2.18 frystyk 39: CHECK, /* check against check_pointer */
40: UNKNOWN, /* Unknown header */
41: JUNK_LINE, /* Ignore rest of header */
42:
43: CONTENT, /* Intermediate states */
44: FIRSTLETTER_D,
45: FIRSTLETTER_L,
46: CONTENTLETTER_L,
47: CONTENTLETTER_T,
48:
49: ALLOW, /* Headers supported */
50: AUTHENTICATE,
51: CONTENT_ENCODING,
52: CONTENT_LANGUAGE,
53: CONTENT_LENGTH,
2.14 frystyk 54: CONTENT_TRANSFER_ENCODING,
55: CONTENT_TYPE,
2.23 frystyk 56: MIME_DATE,
2.18 frystyk 57: DERIVED_FROM,
58: EXPIRES,
59: LAST_MODIFIED,
60: LINK,
2.14 frystyk 61: LOCATION,
2.18 frystyk 62: PUBLIC_METHODS,
63: RETRY_AFTER,
64: TITLE,
65: URI_HEADER,
66: VERSION
2.1 timbl 67: } MIME_state;
68:
69: struct _HTStream {
2.18 frystyk 70: CONST HTStreamClass * isa;
71: HTRequest * request;
72: HTStream * target;
73: HTFormat target_format;
74: HTChunk * buffer;
75: HTSocketEOL EOLstate;
76: BOOL transparent;
2.1 timbl 77: };
78:
2.18 frystyk 79: /* ------------------------------------------------------------------------- */
2.1 timbl 80:
2.18 frystyk 81: /*
2.1 timbl 82: ** This is a FSM parser which is tolerant as it can be of all
83: ** syntax errors. It ignores field names it does not understand,
84: ** and resynchronises on line beginnings.
85: */
2.27 frystyk 86: PRIVATE int parseheader ARGS3(HTStream *, me, HTRequest *, request,
87: HTParentAnchor *, anchor)
2.18 frystyk 88: {
89: MIME_state state = BEGINNING_OF_LINE;
90: MIME_state ok_state; /* got this state if match */
91: char *ptr = me->buffer->data-1; /* We dont change the data in length */
92: char *stop = ptr+me->buffer->size; /* When to stop */
93: char *header = ptr; /* For diagnostics */
94: CONST char * check_pointer; /* checking input */
95: char *value;
2.27 frystyk 96:
97: /* In case we get an empty header consisting of a CRLF, we fall thru */
2.18 frystyk 98: while (ptr < stop) {
99: switch (state) {
100: case BEGINNING_OF_LINE:
101: header = ++ptr;
102: switch (TOLOWER(*ptr)) {
103: case 'a':
104: check_pointer = "llow";
105: ok_state = ALLOW;
106: state = CHECK;
107: break;
108:
109: case 'c':
110: check_pointer = "ontent-";
111: ok_state = CONTENT;
112: state = CHECK;
113: break;
114:
115: case 'd':
116: state = FIRSTLETTER_D;
117: break;
118:
119: case 'e':
120: check_pointer = "xpires";
121: ok_state = EXPIRES;
122: state = CHECK;
123: break;
124:
125: case 'l':
126: state = FIRSTLETTER_L;
127: break;
128:
129: case 'm':
130: check_pointer = "ime-version";
131: ok_state = JUNK_LINE; /* We don't use this but recognize it */
132: state = CHECK;
133: break;
134:
135: case 'p':
136: break;
137:
138: case 'r':
139: check_pointer = "etry-after";
140: ok_state = RETRY_AFTER;
141: state = CHECK;
142: break;
143:
144: case 's':
145: check_pointer = "erver";
146: ok_state = JUNK_LINE; /* We don't use this but recognize it */
147: state = CHECK;
148: break;
2.1 timbl 149:
2.18 frystyk 150: case 't':
151: check_pointer = "itle";
152: ok_state = TITLE;
153: state = CHECK;
154: break;
155:
156: case 'u':
157: check_pointer = "ri";
158: ok_state = URI_HEADER;
159: state = CHECK;
160: break;
161:
162: case 'v':
163: check_pointer = "ersion";
164: ok_state = VERSION;
165: state = CHECK;
166: break;
167:
168: case 'w':
169: check_pointer = "ww-authenticate";
170: ok_state = AUTHENTICATE;
171: state = CHECK;
172: break;
2.1 timbl 173:
2.18 frystyk 174: default:
175: state = UNKNOWN;
176: break;
177: }
178: ptr++;
2.1 timbl 179: break;
180:
2.18 frystyk 181: case FIRSTLETTER_D:
182: switch (TOLOWER(*ptr)) {
183: case 'a':
184: check_pointer = "te";
2.23 frystyk 185: ok_state = MIME_DATE;
2.18 frystyk 186: state = CHECK;
187: break;
188:
189: case 'e':
190: check_pointer = "rived-from";
191: ok_state = DERIVED_FROM;
192: state = CHECK;
193: break;
194:
195: default:
196: state = UNKNOWN;
197: break;
198: }
199: ptr++;
200: break;
201:
202: case FIRSTLETTER_L:
203: switch (TOLOWER(*ptr)) {
204: case 'a':
205: check_pointer = "st-modified";
206: ok_state = LAST_MODIFIED;
207: state = CHECK;
208: break;
209:
210: case 'i':
211: check_pointer = "nk";
212: ok_state = LINK;
213: state = CHECK;
214: break;
215:
216: case 'o':
217: check_pointer = "cation";
218: ok_state = LOCATION;
219: state = CHECK;
220: break;
221:
222: default:
223: state = UNKNOWN;
224: break;
225: }
226: ptr++;
227: break;
228:
229: case CONTENT:
230: switch (TOLOWER(*ptr)) {
231: case 'e':
232: check_pointer = "ncoding";
233: ok_state = CONTENT_ENCODING;
234: state = CHECK;
235: break;
236:
237: case 'l':
238: state = CONTENTLETTER_L;
239: break;
240:
241: case 't':
242: state = CONTENTLETTER_T;
243: break;
244:
245: default:
246: state = UNKNOWN;
247: break;
248: }
249: ptr++;
2.1 timbl 250: break;
2.14 frystyk 251:
2.18 frystyk 252: case CONTENTLETTER_L:
253: switch (TOLOWER(*ptr)) {
254: case 'a':
255: check_pointer = "nguage";
256: ok_state = CONTENT_LANGUAGE;
257: state = CHECK;
258: break;
259:
260: case 'e':
261: check_pointer = "ngth";
262: ok_state = CONTENT_LENGTH;
263: state = CHECK;
264: break;
265:
266: default:
267: state = UNKNOWN;
268: break;
269: }
270: ptr++;
2.14 frystyk 271: break;
272:
2.18 frystyk 273: case CONTENTLETTER_T:
274: switch (TOLOWER(*ptr)) {
275: case 'r':
276: check_pointer = "ansfer-encoding";
277: ok_state = CONTENT_TRANSFER_ENCODING;
278: state = CHECK;
279: break;
280:
281: case 'y':
282: check_pointer = "pe";
283: ok_state = CONTENT_TYPE;
284: state = CHECK;
285: break;
286:
287: default:
288: state = UNKNOWN;
289: break;
290: }
291: ptr++;
2.14 frystyk 292: break;
293:
2.18 frystyk 294: case CHECK: /* Check against string */
295: while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
296: if (!*--check_pointer) {
297: state = ok_state;
298: while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
299: ptr++;
300: } else
301: state = UNKNOWN;
2.14 frystyk 302: break;
303:
2.18 frystyk 304: case ALLOW:
2.20 frystyk 305: while ((value = HTNextField(&ptr)) != NULL) {
306: HTMethod new_method;
2.26 frystyk 307: /* We treat them as case-insensitive! */
2.20 frystyk 308: if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
309: anchor->methods += new_method;
2.1 timbl 310: }
2.18 frystyk 311: if (STREAM_TRACE)
312: fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
313: anchor->methods);
314: state = JUNK_LINE;
2.1 timbl 315: break;
2.18 frystyk 316:
317: case AUTHENTICATE:
318: if ((value = HTNextField(&ptr)) != NULL) {
319: StrAllocCopy(request->WWWAAScheme, value);
2.20 frystyk 320:
321: /* The parsing is done in HTSSUtils.c for the moment */
322: if (*ptr) StrAllocCopy(request->WWWAARealm, ptr);
2.1 timbl 323: }
2.18 frystyk 324: state = JUNK_LINE;
325: break;
326:
327: case CONTENT_ENCODING:
328: if ((value = HTNextField(&ptr)) != NULL) {
329: char *lc = value;
2.20 frystyk 330: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 331: anchor->content_encoding = HTAtom_for(value);
332: }
333: state = JUNK_LINE;
334: break;
335:
2.21 frystyk 336: case CONTENT_LANGUAGE: /* @@@ SHOULD BE A LIST @@@ */
337: if ((value = HTNextField(&ptr)) != NULL) {
338: char *lc = value;
339: while ((*lc = TOLOWER(*lc))) lc++;
340: anchor->content_language = HTAtom_for(value);
341: }
342: state = JUNK_LINE;
2.18 frystyk 343: break;
344:
345: case CONTENT_LENGTH:
346: if ((value = HTNextField(&ptr)) != NULL)
347: anchor->content_length = atol(value);
348: state = JUNK_LINE;
349: break;
350:
351: case CONTENT_TRANSFER_ENCODING:
352: if ((value = HTNextField(&ptr)) != NULL) {
353: char *lc = value;
2.20 frystyk 354: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 355: anchor->cte = HTAtom_for(value);
356: }
357: state = JUNK_LINE;
358: break;
359:
360: case CONTENT_TYPE:
361: if ((value = HTNextField(&ptr)) != NULL) {
362: char *lc = value;
363: while ((*lc = TOLOWER(*lc))) lc++;
364: anchor->content_type = HTAtom_for(value);
2.20 frystyk 365: while ((value = HTNextField(&ptr)) != NULL) { /* Charset */
366: if (!strcasecomp(value, "charset")) {
367: if ((value = HTNextField(&ptr)) != NULL) {
368: lc = value;
369: while ((*lc = TOLOWER(*lc))) lc++;
370: anchor->charset = HTAtom_for(value);
371: }
372: } else if (!strcasecomp(value, "level")) { /* Level */
373: if ((value = HTNextField(&ptr)) != NULL) {
374: lc = value;
375: while ((*lc = TOLOWER(*lc))) lc++;
376: anchor->level = HTAtom_for(value);
377: }
378: }
379: }
2.1 timbl 380: }
2.20 frystyk 381: state = JUNK_LINE;
2.18 frystyk 382: break;
383:
2.23 frystyk 384: case MIME_DATE:
2.18 frystyk 385: anchor->date = HTParseTime(ptr);
386: state = JUNK_LINE;
387: break;
388:
389: case DERIVED_FROM:
390: if ((value = HTNextField(&ptr)) != NULL)
391: StrAllocCopy(anchor->derived_from, value);
392: state = JUNK_LINE;
393: break;
394:
395: case EXPIRES:
396: anchor->expires = HTParseTime(ptr);
397: state = JUNK_LINE;
398: break;
399:
400: case LAST_MODIFIED:
401: anchor->last_modified = HTParseTime(ptr);
402: state = JUNK_LINE;
403: break;
404:
405: case LINK:
2.20 frystyk 406: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 407: break;
408:
409: case LOCATION:
2.31 ! frystyk 410: #if 0
! 411: /*
! 412: ** Doesn't work as a redirection header might contain a '='
! 413: ** Thanks to mitch@tam.net (Mitch DeShields)
! 414: */
2.18 frystyk 415: if ((value = HTNextField(&ptr)) != NULL)
416: StrAllocCopy(request->redirect, value);
2.31 ! frystyk 417: #endif
! 418: StrAllocCopy(request->redirect, ptr);
2.18 frystyk 419: state = JUNK_LINE;
420: break;
421:
422: case PUBLIC_METHODS:
2.20 frystyk 423: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 424: break;
425:
426: case RETRY_AFTER:
2.19 frystyk 427: request->retry_after = HTParseTime(ptr);
428: state = JUNK_LINE;
2.18 frystyk 429: break;
430:
431: case TITLE: /* Can't reuse buffer as HTML version might differ */
432: if ((value = HTNextField(&ptr)) != NULL)
433: StrAllocCopy(anchor->title, value);
434: state = JUNK_LINE;
435: break;
436:
437: case URI_HEADER:
438: state = LOCATION; /* @@@ Need extended parsing */
439: break;
440:
441: case VERSION:
442: if ((value = HTNextField(&ptr)) != NULL)
443: StrAllocCopy(anchor->version, value);
444: state = JUNK_LINE;
445: break;
446:
447: case UNKNOWN:
448: if (STREAM_TRACE)
449: fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
450: HTAnchor_addExtra(anchor, header);
451:
452: /* Fall through */
453:
454: case JUNK_LINE:
455: while (*ptr) ptr++;
456: state = BEGINNING_OF_LINE;
457: break;
2.1 timbl 458: }
2.18 frystyk 459: }
460:
2.27 frystyk 461: /*
462: ** If coming from cache then check if the document has expired. We can
463: ** either ignore this or attempt a reload
464: */
465: {
466: char *msg;
2.31 ! frystyk 467: HTExpiresMode expire_mode = HTCache_expiresMode(&msg);
2.27 frystyk 468: if (expire_mode != HT_EXPIRES_IGNORE) {
469: time_t cur = time(NULL);
470: if (anchor->expires>0 && cur>0 && anchor->expires<cur) {
471: if (expire_mode == HT_EXPIRES_NOTIFY)
2.28 frystyk 472: HTAlert(request, msg);
2.31 ! frystyk 473: else if (HTRequest_retry(request)) {
2.27 frystyk 474: if (PROT_TRACE)
475: fprintf(TDEST, "MIMEParser.. Expired - auto reload\n");
476: if (anchor->cacheHit) {
2.31 ! frystyk 477: HTRequest_addRqHd(request, HT_IMS);
! 478: HTRequest_setReloadMode(request, HT_FORCE_RELOAD);
2.27 frystyk 479: anchor->cacheHit = NO; /* Don't want to loop */
480: }
481: return HT_RELOAD;
482: }
483: }
484: }
485: }
486:
2.30 frystyk 487: if (anchor->content_type != WWW_UNKNOWN) {
2.18 frystyk 488: if (STREAM_TRACE)
2.30 frystyk 489: fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
490: HTAtom_name(anchor->content_type),
491: HTAtom_name(me->target_format));
492: if ((me->target=HTStreamStack(anchor->content_type, me->target_format,
493: me->target, request, YES)) == NULL) {
494: if (STREAM_TRACE)
495: fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
496: me->target = HTBlackHole();
497: }
2.18 frystyk 498: }
499: anchor->header_parsed = YES;
2.27 frystyk 500: me->transparent = YES; /* Pump rest of data right through */
501: return HT_OK;
2.1 timbl 502: }
503:
504:
2.18 frystyk 505: /*
506: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
507: ** Folding is either of CF LWS, LF LWS, CRLF LWS
508: */
509: PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
510: {
511: while (!me->transparent && l-- > 0) {
512: if (me->EOLstate == EOL_FCR) {
2.27 frystyk 513: if (*b == CR) { /* End of header */
514: int status = parseheader(me, me->request, me->request->anchor);
2.31 ! frystyk 515: me->request->net->bytes_read = l;
2.27 frystyk 516: if (status != HT_OK)
517: return status;
518: } else if (*b == LF) /* CRLF */
2.18 frystyk 519: me->EOLstate = EOL_FLF;
520: else if (WHITE(*b)) { /* Folding: CR SP */
521: me->EOLstate = EOL_BEGIN;
522: HTChunkPutc(me->buffer, ' ');
523: } else { /* New line */
524: me->EOLstate = EOL_BEGIN;
525: HTChunkPutc(me->buffer, '\0');
526: HTChunkPutc(me->buffer, *b);
527: }
528: } else if (me->EOLstate == EOL_FLF) {
529: if (*b == CR) /* LF CR or CR LF CR */
530: me->EOLstate = EOL_SCR;
2.27 frystyk 531: else if (*b == LF) { /* End of header */
532: int status = parseheader(me, me->request, me->request->anchor);
2.31 ! frystyk 533: me->request->net->bytes_read = l;
2.27 frystyk 534: if (status != HT_OK)
535: return status;
536: } else if (WHITE(*b)) { /* Folding: LF SP or CR LF SP */
2.18 frystyk 537: me->EOLstate = EOL_BEGIN;
538: HTChunkPutc(me->buffer, ' ');
539: } else { /* New line */
540: me->EOLstate = EOL_BEGIN;
541: HTChunkPutc(me->buffer, '\0');
542: HTChunkPutc(me->buffer, *b);
543: }
544: } else if (me->EOLstate == EOL_SCR) {
2.27 frystyk 545: if (*b==CR || *b==LF) { /* End of header */
546: int status = parseheader(me, me->request, me->request->anchor);
2.31 ! frystyk 547: me->request->net->bytes_read = l;
2.27 frystyk 548: if (status != HT_OK)
549: return status;
550: } else if (WHITE(*b)) { /* Folding: LF CR SP or CR LF CR SP */
2.18 frystyk 551: me->EOLstate = EOL_BEGIN;
552: HTChunkPutc(me->buffer, ' ');
553: } else { /* New line */
554: me->EOLstate = EOL_BEGIN;
555: HTChunkPutc(me->buffer, '\0');
556: HTChunkPutc(me->buffer, *b);
557: }
558: } else if (*b == CR) {
559: me->EOLstate = EOL_FCR;
560: } else if (*b == LF) {
561: me->EOLstate = EOL_FLF; /* Line found */
562: } else
563: HTChunkPutc(me->buffer, *b);
564: b++;
565: }
2.30 frystyk 566: #if 0
2.26 frystyk 567: if (me->target) { /* Is the stream set up? */
568: if (l > 0) /* Anything left? */
569: return (*me->target->isa->put_block)(me->target, b, l);
570: return HT_OK;
571: }
572: return HT_WOULD_BLOCK;
2.30 frystyk 573: #endif
574: if (me->target && l > 0) /* Anything left? */
575: return (*me->target->isa->put_block)(me->target, b, l);
576: return HT_OK;
2.18 frystyk 577: }
578:
579:
580: /* Character handling
581: ** ------------------
582: */
2.21 frystyk 583: PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, char, c)
2.18 frystyk 584: {
585: return HTMIME_put_block(me, &c, 1);
586: }
587:
2.1 timbl 588:
589: /* String handling
590: ** ---------------
591: */
2.18 frystyk 592: PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
2.1 timbl 593: {
2.18 frystyk 594: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 595: }
596:
597:
2.18 frystyk 598: /* Flush an stream object
599: ** ---------------------
2.1 timbl 600: */
2.18 frystyk 601: PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
2.1 timbl 602: {
2.18 frystyk 603: return (*me->target->isa->flush)(me->target);
2.1 timbl 604: }
605:
2.18 frystyk 606: /* Free a stream object
607: ** --------------------
2.1 timbl 608: */
2.14 frystyk 609: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 610: {
2.18 frystyk 611: int status = HT_OK;
2.25 frystyk 612: if (me->target) {
613: if ((status = (*me->target->isa->_free)(me->target))==HT_WOULD_BLOCK)
614: return HT_WOULD_BLOCK;
615: }
2.26 frystyk 616: if (PROT_TRACE)
617: fprintf(TDEST, "MIME........ FREEING....\n");
2.19 frystyk 618: HTChunkFree(me->buffer);
2.1 timbl 619: free(me);
2.18 frystyk 620: return status;
2.1 timbl 621: }
622:
623: /* End writing
624: */
2.14 frystyk 625: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 626: {
2.18 frystyk 627: int status = HT_ERROR;
628: if (me->target)
629: status = (*me->target->isa->abort)(me->target, e);
2.26 frystyk 630: if (PROT_TRACE)
631: fprintf(TDEST, "MIME........ ABORTING...\n");
632: HTChunkFree(me->buffer);
2.6 timbl 633: free(me);
2.18 frystyk 634: return status;
2.1 timbl 635: }
636:
637:
638:
639: /* Structured Object Class
640: ** -----------------------
641: */
2.6 timbl 642: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 643: {
644: "MIMEParser",
2.18 frystyk 645: HTMIME_flush,
2.1 timbl 646: HTMIME_free,
2.6 timbl 647: HTMIME_abort,
648: HTMIME_put_character,
649: HTMIME_put_string,
2.18 frystyk 650: HTMIME_put_block
2.1 timbl 651: };
652:
653:
654: /* Subclass-specific Methods
655: ** -------------------------
656: */
2.7 timbl 657: PUBLIC HTStream* HTMIMEConvert ARGS5(
658: HTRequest *, request,
659: void *, param,
660: HTFormat, input_format,
661: HTFormat, output_format,
662: HTStream *, output_stream)
2.1 timbl 663: {
664: HTStream* me;
2.18 frystyk 665: if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
666: outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 667: me->isa = &HTMIME;
2.18 frystyk 668: me->request = request;
669: me->target = output_stream;
670: me->target_format = output_format;
671: me->buffer = HTChunkCreate(512);
672: me->EOLstate = EOL_BEGIN;
2.1 timbl 673: return me;
674: }
Webmaster