Annotation of libwww/Library/src/HTMIME.c, revision 2.30
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 frystyk 16: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
17: ** static buffers etc.
2.1 timbl 18: */
2.17 frystyk 19:
20: /* Library include files */
21: #include "tcp.h"
22: #include "HTUtils.h"
23: #include "HTString.h"
2.9 luotonen 24: #include "HTFormat.h"
2.27 frystyk 25: #include "HTCache.h"
26: #include "HTAlert.h"
2.18 frystyk 27: #include "HTChunk.h"
2.26 frystyk 28: #include "HTMethod.h"
2.24 frystyk 29: #include "HTSocket.h"
2.17 frystyk 30: #include "HTFWrite.h"
2.14 frystyk 31: #include "HTMIME.h" /* Implemented here */
2.1 timbl 32:
33: /* MIME Object
34: ** -----------
35: */
36: typedef enum _MIME_state {
2.23 frystyk 37: BEGINNING_OF_LINE=0,
2.18 frystyk 38: CHECK, /* check against check_pointer */
39: UNKNOWN, /* Unknown header */
40: JUNK_LINE, /* Ignore rest of header */
41:
42: CONTENT, /* Intermediate states */
43: FIRSTLETTER_D,
44: FIRSTLETTER_L,
45: CONTENTLETTER_L,
46: CONTENTLETTER_T,
47:
48: ALLOW, /* Headers supported */
49: AUTHENTICATE,
50: CONTENT_ENCODING,
51: CONTENT_LANGUAGE,
52: CONTENT_LENGTH,
2.14 frystyk 53: CONTENT_TRANSFER_ENCODING,
54: CONTENT_TYPE,
2.23 frystyk 55: MIME_DATE,
2.18 frystyk 56: DERIVED_FROM,
57: EXPIRES,
58: LAST_MODIFIED,
59: LINK,
2.14 frystyk 60: LOCATION,
2.18 frystyk 61: PUBLIC_METHODS,
62: RETRY_AFTER,
63: TITLE,
64: URI_HEADER,
65: VERSION
2.1 timbl 66: } MIME_state;
67:
68: struct _HTStream {
2.18 frystyk 69: CONST HTStreamClass * isa;
70: HTRequest * request;
71: HTStream * target;
72: HTFormat target_format;
73: HTChunk * buffer;
74: HTSocketEOL EOLstate;
75: BOOL transparent;
2.1 timbl 76: };
77:
2.18 frystyk 78: /* ------------------------------------------------------------------------- */
2.1 timbl 79:
2.18 frystyk 80: /*
2.1 timbl 81: ** This is a FSM parser which is tolerant as it can be of all
82: ** syntax errors. It ignores field names it does not understand,
83: ** and resynchronises on line beginnings.
84: */
2.27 frystyk 85: PRIVATE int parseheader ARGS3(HTStream *, me, HTRequest *, request,
86: HTParentAnchor *, anchor)
2.18 frystyk 87: {
88: MIME_state state = BEGINNING_OF_LINE;
89: MIME_state ok_state; /* got this state if match */
90: char *ptr = me->buffer->data-1; /* We dont change the data in length */
91: char *stop = ptr+me->buffer->size; /* When to stop */
92: char *header = ptr; /* For diagnostics */
93: CONST char * check_pointer; /* checking input */
94: char *value;
2.27 frystyk 95:
96: /* In case we get an empty header consisting of a CRLF, we fall thru */
2.18 frystyk 97: while (ptr < stop) {
98: switch (state) {
99: case BEGINNING_OF_LINE:
100: header = ++ptr;
101: switch (TOLOWER(*ptr)) {
102: case 'a':
103: check_pointer = "llow";
104: ok_state = ALLOW;
105: state = CHECK;
106: break;
107:
108: case 'c':
109: check_pointer = "ontent-";
110: ok_state = CONTENT;
111: state = CHECK;
112: break;
113:
114: case 'd':
115: state = FIRSTLETTER_D;
116: break;
117:
118: case 'e':
119: check_pointer = "xpires";
120: ok_state = EXPIRES;
121: state = CHECK;
122: break;
123:
124: case 'l':
125: state = FIRSTLETTER_L;
126: break;
127:
128: case 'm':
129: check_pointer = "ime-version";
130: ok_state = JUNK_LINE; /* We don't use this but recognize it */
131: state = CHECK;
132: break;
133:
134: case 'p':
135: break;
136:
137: case 'r':
138: check_pointer = "etry-after";
139: ok_state = RETRY_AFTER;
140: state = CHECK;
141: break;
142:
143: case 's':
144: check_pointer = "erver";
145: ok_state = JUNK_LINE; /* We don't use this but recognize it */
146: state = CHECK;
147: break;
2.1 timbl 148:
2.18 frystyk 149: case 't':
150: check_pointer = "itle";
151: ok_state = TITLE;
152: state = CHECK;
153: break;
154:
155: case 'u':
156: check_pointer = "ri";
157: ok_state = URI_HEADER;
158: state = CHECK;
159: break;
160:
161: case 'v':
162: check_pointer = "ersion";
163: ok_state = VERSION;
164: state = CHECK;
165: break;
166:
167: case 'w':
168: check_pointer = "ww-authenticate";
169: ok_state = AUTHENTICATE;
170: state = CHECK;
171: break;
2.1 timbl 172:
2.18 frystyk 173: default:
174: state = UNKNOWN;
175: break;
176: }
177: ptr++;
2.1 timbl 178: break;
179:
2.18 frystyk 180: case FIRSTLETTER_D:
181: switch (TOLOWER(*ptr)) {
182: case 'a':
183: check_pointer = "te";
2.23 frystyk 184: ok_state = MIME_DATE;
2.18 frystyk 185: state = CHECK;
186: break;
187:
188: case 'e':
189: check_pointer = "rived-from";
190: ok_state = DERIVED_FROM;
191: state = CHECK;
192: break;
193:
194: default:
195: state = UNKNOWN;
196: break;
197: }
198: ptr++;
199: break;
200:
201: case FIRSTLETTER_L:
202: switch (TOLOWER(*ptr)) {
203: case 'a':
204: check_pointer = "st-modified";
205: ok_state = LAST_MODIFIED;
206: state = CHECK;
207: break;
208:
209: case 'i':
210: check_pointer = "nk";
211: ok_state = LINK;
212: state = CHECK;
213: break;
214:
215: case 'o':
216: check_pointer = "cation";
217: ok_state = LOCATION;
218: state = CHECK;
219: break;
220:
221: default:
222: state = UNKNOWN;
223: break;
224: }
225: ptr++;
226: break;
227:
228: case CONTENT:
229: switch (TOLOWER(*ptr)) {
230: case 'e':
231: check_pointer = "ncoding";
232: ok_state = CONTENT_ENCODING;
233: state = CHECK;
234: break;
235:
236: case 'l':
237: state = CONTENTLETTER_L;
238: break;
239:
240: case 't':
241: state = CONTENTLETTER_T;
242: break;
243:
244: default:
245: state = UNKNOWN;
246: break;
247: }
248: ptr++;
2.1 timbl 249: break;
2.14 frystyk 250:
2.18 frystyk 251: case CONTENTLETTER_L:
252: switch (TOLOWER(*ptr)) {
253: case 'a':
254: check_pointer = "nguage";
255: ok_state = CONTENT_LANGUAGE;
256: state = CHECK;
257: break;
258:
259: case 'e':
260: check_pointer = "ngth";
261: ok_state = CONTENT_LENGTH;
262: state = CHECK;
263: break;
264:
265: default:
266: state = UNKNOWN;
267: break;
268: }
269: ptr++;
2.14 frystyk 270: break;
271:
2.18 frystyk 272: case CONTENTLETTER_T:
273: switch (TOLOWER(*ptr)) {
274: case 'r':
275: check_pointer = "ansfer-encoding";
276: ok_state = CONTENT_TRANSFER_ENCODING;
277: state = CHECK;
278: break;
279:
280: case 'y':
281: check_pointer = "pe";
282: ok_state = CONTENT_TYPE;
283: state = CHECK;
284: break;
285:
286: default:
287: state = UNKNOWN;
288: break;
289: }
290: ptr++;
2.14 frystyk 291: break;
292:
2.18 frystyk 293: case CHECK: /* Check against string */
294: while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
295: if (!*--check_pointer) {
296: state = ok_state;
297: while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
298: ptr++;
299: } else
300: state = UNKNOWN;
2.14 frystyk 301: break;
302:
2.18 frystyk 303: case ALLOW:
2.20 frystyk 304: while ((value = HTNextField(&ptr)) != NULL) {
305: HTMethod new_method;
2.26 frystyk 306: /* We treat them as case-insensitive! */
2.20 frystyk 307: if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
308: anchor->methods += new_method;
2.1 timbl 309: }
2.18 frystyk 310: if (STREAM_TRACE)
311: fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
312: anchor->methods);
313: state = JUNK_LINE;
2.1 timbl 314: break;
2.18 frystyk 315:
316: case AUTHENTICATE:
317: if ((value = HTNextField(&ptr)) != NULL) {
318: StrAllocCopy(request->WWWAAScheme, value);
2.20 frystyk 319:
320: /* The parsing is done in HTSSUtils.c for the moment */
321: if (*ptr) StrAllocCopy(request->WWWAARealm, ptr);
2.1 timbl 322: }
2.18 frystyk 323: state = JUNK_LINE;
324: break;
325:
326: case CONTENT_ENCODING:
327: if ((value = HTNextField(&ptr)) != NULL) {
328: char *lc = value;
2.20 frystyk 329: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 330: anchor->content_encoding = HTAtom_for(value);
331: }
332: state = JUNK_LINE;
333: break;
334:
2.21 frystyk 335: case CONTENT_LANGUAGE: /* @@@ SHOULD BE A LIST @@@ */
336: if ((value = HTNextField(&ptr)) != NULL) {
337: char *lc = value;
338: while ((*lc = TOLOWER(*lc))) lc++;
339: anchor->content_language = HTAtom_for(value);
340: }
341: state = JUNK_LINE;
2.18 frystyk 342: break;
343:
344: case CONTENT_LENGTH:
345: if ((value = HTNextField(&ptr)) != NULL)
346: anchor->content_length = atol(value);
347: state = JUNK_LINE;
348: break;
349:
350: case CONTENT_TRANSFER_ENCODING:
351: if ((value = HTNextField(&ptr)) != NULL) {
352: char *lc = value;
2.20 frystyk 353: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 354: anchor->cte = HTAtom_for(value);
355: }
356: state = JUNK_LINE;
357: break;
358:
359: case CONTENT_TYPE:
360: if ((value = HTNextField(&ptr)) != NULL) {
361: char *lc = value;
362: while ((*lc = TOLOWER(*lc))) lc++;
363: anchor->content_type = HTAtom_for(value);
2.20 frystyk 364: while ((value = HTNextField(&ptr)) != NULL) { /* Charset */
365: if (!strcasecomp(value, "charset")) {
366: if ((value = HTNextField(&ptr)) != NULL) {
367: lc = value;
368: while ((*lc = TOLOWER(*lc))) lc++;
369: anchor->charset = HTAtom_for(value);
370: }
371: } else if (!strcasecomp(value, "level")) { /* Level */
372: if ((value = HTNextField(&ptr)) != NULL) {
373: lc = value;
374: while ((*lc = TOLOWER(*lc))) lc++;
375: anchor->level = HTAtom_for(value);
376: }
377: }
378: }
2.1 timbl 379: }
2.20 frystyk 380: state = JUNK_LINE;
2.18 frystyk 381: break;
382:
2.23 frystyk 383: case MIME_DATE:
2.18 frystyk 384: anchor->date = HTParseTime(ptr);
385: state = JUNK_LINE;
386: break;
387:
388: case DERIVED_FROM:
389: if ((value = HTNextField(&ptr)) != NULL)
390: StrAllocCopy(anchor->derived_from, value);
391: state = JUNK_LINE;
392: break;
393:
394: case EXPIRES:
395: anchor->expires = HTParseTime(ptr);
396: state = JUNK_LINE;
397: break;
398:
399: case LAST_MODIFIED:
400: anchor->last_modified = HTParseTime(ptr);
401: state = JUNK_LINE;
402: break;
403:
404: case LINK:
2.20 frystyk 405: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 406: break;
407:
408: case LOCATION:
409: if ((value = HTNextField(&ptr)) != NULL)
410: StrAllocCopy(request->redirect, value);
411: state = JUNK_LINE;
412: break;
413:
414: case PUBLIC_METHODS:
2.20 frystyk 415: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 416: break;
417:
418: case RETRY_AFTER:
2.19 frystyk 419: request->retry_after = HTParseTime(ptr);
420: state = JUNK_LINE;
2.18 frystyk 421: break;
422:
423: case TITLE: /* Can't reuse buffer as HTML version might differ */
424: if ((value = HTNextField(&ptr)) != NULL)
425: StrAllocCopy(anchor->title, value);
426: state = JUNK_LINE;
427: break;
428:
429: case URI_HEADER:
430: state = LOCATION; /* @@@ Need extended parsing */
431: break;
432:
433: case VERSION:
434: if ((value = HTNextField(&ptr)) != NULL)
435: StrAllocCopy(anchor->version, value);
436: state = JUNK_LINE;
437: break;
438:
439: case UNKNOWN:
440: if (STREAM_TRACE)
441: fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
442: HTAnchor_addExtra(anchor, header);
443:
444: /* Fall through */
445:
446: case JUNK_LINE:
447: while (*ptr) ptr++;
448: state = BEGINNING_OF_LINE;
449: break;
2.1 timbl 450: }
2.18 frystyk 451: }
452:
2.27 frystyk 453: /*
454: ** If coming from cache then check if the document has expired. We can
455: ** either ignore this or attempt a reload
456: */
457: {
458: char *msg;
459: HTExpiresMode expire_mode = HTAccess_expiresMode(&msg);
460: if (expire_mode != HT_EXPIRES_IGNORE) {
461: time_t cur = time(NULL);
462: if (anchor->expires>0 && cur>0 && anchor->expires<cur) {
463: if (expire_mode == HT_EXPIRES_NOTIFY)
2.28 frystyk 464: HTAlert(request, msg);
2.27 frystyk 465: else if (request->reloads < HTAccess_maxReload()-1) {
466: if (PROT_TRACE)
467: fprintf(TDEST, "MIMEParser.. Expired - auto reload\n");
468: if (anchor->cacheHit) {
469: request->RequestMask |= HT_IMS;
470: request->reload = HT_FORCE_RELOAD;
471: anchor->cacheHit = NO; /* Don't want to loop */
472: }
473: return HT_RELOAD;
474: }
475: }
476: }
477: }
478:
2.30 ! frystyk 479: if (anchor->content_type != WWW_UNKNOWN) {
2.18 frystyk 480: if (STREAM_TRACE)
2.30 ! frystyk 481: fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
! 482: HTAtom_name(anchor->content_type),
! 483: HTAtom_name(me->target_format));
! 484: if ((me->target=HTStreamStack(anchor->content_type, me->target_format,
! 485: me->target, request, YES)) == NULL) {
! 486: if (STREAM_TRACE)
! 487: fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
! 488: me->target = HTBlackHole();
! 489: }
2.18 frystyk 490: }
491: anchor->header_parsed = YES;
2.27 frystyk 492: me->transparent = YES; /* Pump rest of data right through */
493: return HT_OK;
2.1 timbl 494: }
495:
496:
2.18 frystyk 497: /*
498: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
499: ** Folding is either of CF LWS, LF LWS, CRLF LWS
500: */
501: PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
502: {
503: while (!me->transparent && l-- > 0) {
504: if (me->EOLstate == EOL_FCR) {
2.27 frystyk 505: if (*b == CR) { /* End of header */
506: int status = parseheader(me, me->request, me->request->anchor);
2.29 frystyk 507: me->request->net_info->bytes_read = l;
2.27 frystyk 508: if (status != HT_OK)
509: return status;
510: } else if (*b == LF) /* CRLF */
2.18 frystyk 511: me->EOLstate = EOL_FLF;
512: else if (WHITE(*b)) { /* Folding: CR SP */
513: me->EOLstate = EOL_BEGIN;
514: HTChunkPutc(me->buffer, ' ');
515: } else { /* New line */
516: me->EOLstate = EOL_BEGIN;
517: HTChunkPutc(me->buffer, '\0');
518: HTChunkPutc(me->buffer, *b);
519: }
520: } else if (me->EOLstate == EOL_FLF) {
521: if (*b == CR) /* LF CR or CR LF CR */
522: me->EOLstate = EOL_SCR;
2.27 frystyk 523: else if (*b == LF) { /* End of header */
524: int status = parseheader(me, me->request, me->request->anchor);
2.29 frystyk 525: me->request->net_info->bytes_read = l;
2.27 frystyk 526: if (status != HT_OK)
527: return status;
528: } else if (WHITE(*b)) { /* Folding: LF SP or CR LF SP */
2.18 frystyk 529: me->EOLstate = EOL_BEGIN;
530: HTChunkPutc(me->buffer, ' ');
531: } else { /* New line */
532: me->EOLstate = EOL_BEGIN;
533: HTChunkPutc(me->buffer, '\0');
534: HTChunkPutc(me->buffer, *b);
535: }
536: } else if (me->EOLstate == EOL_SCR) {
2.27 frystyk 537: if (*b==CR || *b==LF) { /* End of header */
538: int status = parseheader(me, me->request, me->request->anchor);
2.29 frystyk 539: me->request->net_info->bytes_read = l;
2.27 frystyk 540: if (status != HT_OK)
541: return status;
542: } else if (WHITE(*b)) { /* Folding: LF CR SP or CR LF CR SP */
2.18 frystyk 543: me->EOLstate = EOL_BEGIN;
544: HTChunkPutc(me->buffer, ' ');
545: } else { /* New line */
546: me->EOLstate = EOL_BEGIN;
547: HTChunkPutc(me->buffer, '\0');
548: HTChunkPutc(me->buffer, *b);
549: }
550: } else if (*b == CR) {
551: me->EOLstate = EOL_FCR;
552: } else if (*b == LF) {
553: me->EOLstate = EOL_FLF; /* Line found */
554: } else
555: HTChunkPutc(me->buffer, *b);
556: b++;
557: }
2.30 ! frystyk 558: #if 0
2.26 frystyk 559: if (me->target) { /* Is the stream set up? */
560: if (l > 0) /* Anything left? */
561: return (*me->target->isa->put_block)(me->target, b, l);
562: return HT_OK;
563: }
564: return HT_WOULD_BLOCK;
2.30 ! frystyk 565: #endif
! 566: if (me->target && l > 0) /* Anything left? */
! 567: return (*me->target->isa->put_block)(me->target, b, l);
! 568: return HT_OK;
2.18 frystyk 569: }
570:
571:
572: /* Character handling
573: ** ------------------
574: */
2.21 frystyk 575: PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, char, c)
2.18 frystyk 576: {
577: return HTMIME_put_block(me, &c, 1);
578: }
579:
2.1 timbl 580:
581: /* String handling
582: ** ---------------
583: */
2.18 frystyk 584: PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
2.1 timbl 585: {
2.18 frystyk 586: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 587: }
588:
589:
2.18 frystyk 590: /* Flush an stream object
591: ** ---------------------
2.1 timbl 592: */
2.18 frystyk 593: PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
2.1 timbl 594: {
2.18 frystyk 595: return (*me->target->isa->flush)(me->target);
2.1 timbl 596: }
597:
2.18 frystyk 598: /* Free a stream object
599: ** --------------------
2.1 timbl 600: */
2.14 frystyk 601: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 602: {
2.18 frystyk 603: int status = HT_OK;
2.25 frystyk 604: if (me->target) {
605: if ((status = (*me->target->isa->_free)(me->target))==HT_WOULD_BLOCK)
606: return HT_WOULD_BLOCK;
607: }
2.26 frystyk 608: if (PROT_TRACE)
609: fprintf(TDEST, "MIME........ FREEING....\n");
2.19 frystyk 610: HTChunkFree(me->buffer);
2.1 timbl 611: free(me);
2.18 frystyk 612: return status;
2.1 timbl 613: }
614:
615: /* End writing
616: */
2.14 frystyk 617: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 618: {
2.18 frystyk 619: int status = HT_ERROR;
620: if (me->target)
621: status = (*me->target->isa->abort)(me->target, e);
2.26 frystyk 622: if (PROT_TRACE)
623: fprintf(TDEST, "MIME........ ABORTING...\n");
624: HTChunkFree(me->buffer);
2.6 timbl 625: free(me);
2.18 frystyk 626: return status;
2.1 timbl 627: }
628:
629:
630:
631: /* Structured Object Class
632: ** -----------------------
633: */
2.6 timbl 634: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 635: {
636: "MIMEParser",
2.18 frystyk 637: HTMIME_flush,
2.1 timbl 638: HTMIME_free,
2.6 timbl 639: HTMIME_abort,
640: HTMIME_put_character,
641: HTMIME_put_string,
2.18 frystyk 642: HTMIME_put_block
2.1 timbl 643: };
644:
645:
646: /* Subclass-specific Methods
647: ** -------------------------
648: */
2.7 timbl 649: PUBLIC HTStream* HTMIMEConvert ARGS5(
650: HTRequest *, request,
651: void *, param,
652: HTFormat, input_format,
653: HTFormat, output_format,
654: HTStream *, output_stream)
2.1 timbl 655: {
656: HTStream* me;
2.18 frystyk 657: if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
658: outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 659: me->isa = &HTMIME;
2.18 frystyk 660: me->request = request;
661: me->target = output_stream;
662: me->target_format = output_format;
663: me->buffer = HTChunkCreate(512);
664: me->EOLstate = EOL_BEGIN;
2.1 timbl 665: return me;
666: }
Webmaster