Annotation of libwww/Library/src/HTMIME.c, revision 2.27
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 frystyk 16: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
17: ** static buffers etc.
2.1 timbl 18: */
2.17 frystyk 19:
20: /* Library include files */
21: #include "tcp.h"
22: #include "HTUtils.h"
23: #include "HTString.h"
2.9 luotonen 24: #include "HTFormat.h"
2.27 ! frystyk 25: #include "HTCache.h"
! 26: #include "HTAlert.h"
2.18 frystyk 27: #include "HTChunk.h"
2.26 frystyk 28: #include "HTMethod.h"
2.24 frystyk 29: #include "HTSocket.h"
2.17 frystyk 30: #include "HTFWrite.h"
2.14 frystyk 31: #include "HTMIME.h" /* Implemented here */
2.1 timbl 32:
33: /* MIME Object
34: ** -----------
35: */
36: typedef enum _MIME_state {
2.23 frystyk 37: BEGINNING_OF_LINE=0,
2.18 frystyk 38: CHECK, /* check against check_pointer */
39: UNKNOWN, /* Unknown header */
40: JUNK_LINE, /* Ignore rest of header */
41:
42: CONTENT, /* Intermediate states */
43: FIRSTLETTER_D,
44: FIRSTLETTER_L,
45: CONTENTLETTER_L,
46: CONTENTLETTER_T,
47:
48: ALLOW, /* Headers supported */
49: AUTHENTICATE,
50: CONTENT_ENCODING,
51: CONTENT_LANGUAGE,
52: CONTENT_LENGTH,
2.14 frystyk 53: CONTENT_TRANSFER_ENCODING,
54: CONTENT_TYPE,
2.23 frystyk 55: MIME_DATE,
2.18 frystyk 56: DERIVED_FROM,
57: EXPIRES,
58: LAST_MODIFIED,
59: LINK,
2.14 frystyk 60: LOCATION,
2.18 frystyk 61: PUBLIC_METHODS,
62: RETRY_AFTER,
63: TITLE,
64: URI_HEADER,
65: VERSION
2.1 timbl 66: } MIME_state;
67:
68: struct _HTStream {
2.18 frystyk 69: CONST HTStreamClass * isa;
70: HTRequest * request;
71: HTStream * target;
72: HTFormat target_format;
73: HTChunk * buffer;
74: HTSocketEOL EOLstate;
75: BOOL transparent;
2.1 timbl 76: };
77:
2.18 frystyk 78: /* ------------------------------------------------------------------------- */
2.1 timbl 79:
2.18 frystyk 80: /*
2.1 timbl 81: ** This is a FSM parser which is tolerant as it can be of all
82: ** syntax errors. It ignores field names it does not understand,
83: ** and resynchronises on line beginnings.
84: */
2.27 ! frystyk 85: PRIVATE int parseheader ARGS3(HTStream *, me, HTRequest *, request,
! 86: HTParentAnchor *, anchor)
2.18 frystyk 87: {
88: MIME_state state = BEGINNING_OF_LINE;
89: MIME_state ok_state; /* got this state if match */
90: char *ptr = me->buffer->data-1; /* We dont change the data in length */
91: char *stop = ptr+me->buffer->size; /* When to stop */
92: char *header = ptr; /* For diagnostics */
93: CONST char * check_pointer; /* checking input */
94: char *value;
2.27 ! frystyk 95:
! 96: /* In case we get an empty header consisting of a CRLF, we fall thru */
2.18 frystyk 97: while (ptr < stop) {
98: switch (state) {
99: case BEGINNING_OF_LINE:
100: header = ++ptr;
101: switch (TOLOWER(*ptr)) {
102: case 'a':
103: check_pointer = "llow";
104: ok_state = ALLOW;
105: state = CHECK;
106: break;
107:
108: case 'c':
109: check_pointer = "ontent-";
110: ok_state = CONTENT;
111: state = CHECK;
112: break;
113:
114: case 'd':
115: state = FIRSTLETTER_D;
116: break;
117:
118: case 'e':
119: check_pointer = "xpires";
120: ok_state = EXPIRES;
121: state = CHECK;
122: break;
123:
124: case 'l':
125: state = FIRSTLETTER_L;
126: break;
127:
128: case 'm':
129: check_pointer = "ime-version";
130: ok_state = JUNK_LINE; /* We don't use this but recognize it */
131: state = CHECK;
132: break;
133:
134: case 'p':
135: break;
136:
137: case 'r':
138: check_pointer = "etry-after";
139: ok_state = RETRY_AFTER;
140: state = CHECK;
141: break;
142:
143: case 's':
144: check_pointer = "erver";
145: ok_state = JUNK_LINE; /* We don't use this but recognize it */
146: state = CHECK;
147: break;
2.1 timbl 148:
2.18 frystyk 149: case 't':
150: check_pointer = "itle";
151: ok_state = TITLE;
152: state = CHECK;
153: break;
154:
155: case 'u':
156: check_pointer = "ri";
157: ok_state = URI_HEADER;
158: state = CHECK;
159: break;
160:
161: case 'v':
162: check_pointer = "ersion";
163: ok_state = VERSION;
164: state = CHECK;
165: break;
166:
167: case 'w':
168: check_pointer = "ww-authenticate";
169: ok_state = AUTHENTICATE;
170: state = CHECK;
171: break;
2.1 timbl 172:
2.18 frystyk 173: default:
174: state = UNKNOWN;
175: break;
176: }
177: ptr++;
2.1 timbl 178: break;
179:
2.18 frystyk 180: case FIRSTLETTER_D:
181: switch (TOLOWER(*ptr)) {
182: case 'a':
183: check_pointer = "te";
2.23 frystyk 184: ok_state = MIME_DATE;
2.18 frystyk 185: state = CHECK;
186: break;
187:
188: case 'e':
189: check_pointer = "rived-from";
190: ok_state = DERIVED_FROM;
191: state = CHECK;
192: break;
193:
194: default:
195: state = UNKNOWN;
196: break;
197: }
198: ptr++;
199: break;
200:
201: case FIRSTLETTER_L:
202: switch (TOLOWER(*ptr)) {
203: case 'a':
204: check_pointer = "st-modified";
205: ok_state = LAST_MODIFIED;
206: state = CHECK;
207: break;
208:
209: case 'i':
210: check_pointer = "nk";
211: ok_state = LINK;
212: state = CHECK;
213: break;
214:
215: case 'o':
216: check_pointer = "cation";
217: ok_state = LOCATION;
218: state = CHECK;
219: break;
220:
221: default:
222: state = UNKNOWN;
223: break;
224: }
225: ptr++;
226: break;
227:
228: case CONTENT:
229: switch (TOLOWER(*ptr)) {
230: case 'e':
231: check_pointer = "ncoding";
232: ok_state = CONTENT_ENCODING;
233: state = CHECK;
234: break;
235:
236: case 'l':
237: state = CONTENTLETTER_L;
238: break;
239:
240: case 't':
241: state = CONTENTLETTER_T;
242: break;
243:
244: default:
245: state = UNKNOWN;
246: break;
247: }
248: ptr++;
2.1 timbl 249: break;
2.14 frystyk 250:
2.18 frystyk 251: case CONTENTLETTER_L:
252: switch (TOLOWER(*ptr)) {
253: case 'a':
254: check_pointer = "nguage";
255: ok_state = CONTENT_LANGUAGE;
256: state = CHECK;
257: break;
258:
259: case 'e':
260: check_pointer = "ngth";
261: ok_state = CONTENT_LENGTH;
262: state = CHECK;
263: break;
264:
265: default:
266: state = UNKNOWN;
267: break;
268: }
269: ptr++;
2.14 frystyk 270: break;
271:
2.18 frystyk 272: case CONTENTLETTER_T:
273: switch (TOLOWER(*ptr)) {
274: case 'r':
275: check_pointer = "ansfer-encoding";
276: ok_state = CONTENT_TRANSFER_ENCODING;
277: state = CHECK;
278: break;
279:
280: case 'y':
281: check_pointer = "pe";
282: ok_state = CONTENT_TYPE;
283: state = CHECK;
284: break;
285:
286: default:
287: state = UNKNOWN;
288: break;
289: }
290: ptr++;
2.14 frystyk 291: break;
292:
2.18 frystyk 293: case CHECK: /* Check against string */
294: while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
295: if (!*--check_pointer) {
296: state = ok_state;
297: while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
298: ptr++;
299: } else
300: state = UNKNOWN;
2.14 frystyk 301: break;
302:
2.18 frystyk 303: case ALLOW:
2.20 frystyk 304: while ((value = HTNextField(&ptr)) != NULL) {
305: HTMethod new_method;
2.26 frystyk 306: /* We treat them as case-insensitive! */
2.20 frystyk 307: if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
308: anchor->methods += new_method;
2.1 timbl 309: }
2.18 frystyk 310: if (STREAM_TRACE)
311: fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
312: anchor->methods);
313: state = JUNK_LINE;
2.1 timbl 314: break;
2.18 frystyk 315:
316: case AUTHENTICATE:
317: if ((value = HTNextField(&ptr)) != NULL) {
318: StrAllocCopy(request->WWWAAScheme, value);
2.20 frystyk 319:
320: /* The parsing is done in HTSSUtils.c for the moment */
321: if (*ptr) StrAllocCopy(request->WWWAARealm, ptr);
2.1 timbl 322: }
2.18 frystyk 323: state = JUNK_LINE;
324: break;
325:
326: case CONTENT_ENCODING:
327: if ((value = HTNextField(&ptr)) != NULL) {
328: char *lc = value;
2.20 frystyk 329: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 330: anchor->content_encoding = HTAtom_for(value);
331: }
332: state = JUNK_LINE;
333: break;
334:
2.21 frystyk 335: case CONTENT_LANGUAGE: /* @@@ SHOULD BE A LIST @@@ */
336: if ((value = HTNextField(&ptr)) != NULL) {
337: char *lc = value;
338: while ((*lc = TOLOWER(*lc))) lc++;
339: anchor->content_language = HTAtom_for(value);
340: }
341: state = JUNK_LINE;
2.18 frystyk 342: break;
343:
344: case CONTENT_LENGTH:
345: if ((value = HTNextField(&ptr)) != NULL)
346: anchor->content_length = atol(value);
347: state = JUNK_LINE;
348: break;
349:
350: case CONTENT_TRANSFER_ENCODING:
351: if ((value = HTNextField(&ptr)) != NULL) {
352: char *lc = value;
2.20 frystyk 353: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 354: anchor->cte = HTAtom_for(value);
355: }
356: state = JUNK_LINE;
357: break;
358:
359: case CONTENT_TYPE:
360: if ((value = HTNextField(&ptr)) != NULL) {
361: char *lc = value;
362: while ((*lc = TOLOWER(*lc))) lc++;
363: anchor->content_type = HTAtom_for(value);
2.20 frystyk 364: while ((value = HTNextField(&ptr)) != NULL) { /* Charset */
365: if (!strcasecomp(value, "charset")) {
366: if ((value = HTNextField(&ptr)) != NULL) {
367: lc = value;
368: while ((*lc = TOLOWER(*lc))) lc++;
369: anchor->charset = HTAtom_for(value);
370: }
371: } else if (!strcasecomp(value, "level")) { /* Level */
372: if ((value = HTNextField(&ptr)) != NULL) {
373: lc = value;
374: while ((*lc = TOLOWER(*lc))) lc++;
375: anchor->level = HTAtom_for(value);
376: }
377: }
378: }
2.1 timbl 379: }
2.20 frystyk 380: state = JUNK_LINE;
2.18 frystyk 381: break;
382:
2.23 frystyk 383: case MIME_DATE:
2.18 frystyk 384: anchor->date = HTParseTime(ptr);
385: state = JUNK_LINE;
386: break;
387:
388: case DERIVED_FROM:
389: if ((value = HTNextField(&ptr)) != NULL)
390: StrAllocCopy(anchor->derived_from, value);
391: state = JUNK_LINE;
392: break;
393:
394: case EXPIRES:
395: anchor->expires = HTParseTime(ptr);
396: state = JUNK_LINE;
397: break;
398:
399: case LAST_MODIFIED:
400: anchor->last_modified = HTParseTime(ptr);
401: state = JUNK_LINE;
402: break;
403:
404: case LINK:
2.20 frystyk 405: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 406: break;
407:
408: case LOCATION:
409: if ((value = HTNextField(&ptr)) != NULL)
410: StrAllocCopy(request->redirect, value);
411: state = JUNK_LINE;
412: break;
413:
414: case PUBLIC_METHODS:
2.20 frystyk 415: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 416: break;
417:
418: case RETRY_AFTER:
2.19 frystyk 419: request->retry_after = HTParseTime(ptr);
420: state = JUNK_LINE;
2.18 frystyk 421: break;
422:
423: case TITLE: /* Can't reuse buffer as HTML version might differ */
424: if ((value = HTNextField(&ptr)) != NULL)
425: StrAllocCopy(anchor->title, value);
426: state = JUNK_LINE;
427: break;
428:
429: case URI_HEADER:
430: state = LOCATION; /* @@@ Need extended parsing */
431: break;
432:
433: case VERSION:
434: if ((value = HTNextField(&ptr)) != NULL)
435: StrAllocCopy(anchor->version, value);
436: state = JUNK_LINE;
437: break;
438:
439: case UNKNOWN:
440: if (STREAM_TRACE)
441: fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
442: HTAnchor_addExtra(anchor, header);
443:
444: /* Fall through */
445:
446: case JUNK_LINE:
447: while (*ptr) ptr++;
448: state = BEGINNING_OF_LINE;
449: break;
2.1 timbl 450: }
2.18 frystyk 451: }
452:
2.27 ! frystyk 453: /*
! 454: ** If coming from cache then check if the document has expired. We can
! 455: ** either ignore this or attempt a reload
! 456: */
! 457: {
! 458: char *msg;
! 459: HTExpiresMode expire_mode = HTAccess_expiresMode(&msg);
! 460: if (expire_mode != HT_EXPIRES_IGNORE) {
! 461: time_t cur = time(NULL);
! 462: if (anchor->expires>0 && cur>0 && anchor->expires<cur) {
! 463: if (expire_mode == HT_EXPIRES_NOTIFY)
! 464: HTAlert(msg);
! 465: else if (request->reloads < HTAccess_maxReload()-1) {
! 466: if (PROT_TRACE)
! 467: fprintf(TDEST, "MIMEParser.. Expired - auto reload\n");
! 468: if (anchor->cacheHit) {
! 469: request->RequestMask |= HT_IMS;
! 470: request->reload = HT_FORCE_RELOAD;
! 471: anchor->cacheHit = NO; /* Don't want to loop */
! 472: }
! 473: return HT_RELOAD;
! 474: }
! 475: }
! 476: }
! 477: }
! 478:
2.18 frystyk 479: if (STREAM_TRACE)
480: fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
481: HTAtom_name(anchor->content_type),
482: HTAtom_name(me->target_format));
483: if ((me->target = HTStreamStack(anchor->content_type,
484: me->target_format, me->target,
485: me->request, YES)) == NULL) {
486: if (STREAM_TRACE)
487: fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
488: me->target = HTBlackHole();
489: }
490: anchor->header_parsed = YES;
2.27 ! frystyk 491: me->transparent = YES; /* Pump rest of data right through */
! 492: return HT_OK;
2.1 timbl 493: }
494:
495:
2.18 frystyk 496: /*
497: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
498: ** Folding is either of CF LWS, LF LWS, CRLF LWS
499: */
500: PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
501: {
502: while (!me->transparent && l-- > 0) {
503: if (me->EOLstate == EOL_FCR) {
2.27 ! frystyk 504: if (*b == CR) { /* End of header */
! 505: int status = parseheader(me, me->request, me->request->anchor);
! 506: if (status != HT_OK)
! 507: return status;
! 508: } else if (*b == LF) /* CRLF */
2.18 frystyk 509: me->EOLstate = EOL_FLF;
510: else if (WHITE(*b)) { /* Folding: CR SP */
511: me->EOLstate = EOL_BEGIN;
512: HTChunkPutc(me->buffer, ' ');
513: } else { /* New line */
514: me->EOLstate = EOL_BEGIN;
515: HTChunkPutc(me->buffer, '\0');
516: HTChunkPutc(me->buffer, *b);
517: }
518: } else if (me->EOLstate == EOL_FLF) {
519: if (*b == CR) /* LF CR or CR LF CR */
520: me->EOLstate = EOL_SCR;
2.27 ! frystyk 521: else if (*b == LF) { /* End of header */
! 522: int status = parseheader(me, me->request, me->request->anchor);
! 523: if (status != HT_OK)
! 524: return status;
! 525: } else if (WHITE(*b)) { /* Folding: LF SP or CR LF SP */
2.18 frystyk 526: me->EOLstate = EOL_BEGIN;
527: HTChunkPutc(me->buffer, ' ');
528: } else { /* New line */
529: me->EOLstate = EOL_BEGIN;
530: HTChunkPutc(me->buffer, '\0');
531: HTChunkPutc(me->buffer, *b);
532: }
533: } else if (me->EOLstate == EOL_SCR) {
2.27 ! frystyk 534: if (*b==CR || *b==LF) { /* End of header */
! 535: int status = parseheader(me, me->request, me->request->anchor);
! 536: if (status != HT_OK)
! 537: return status;
! 538: } else if (WHITE(*b)) { /* Folding: LF CR SP or CR LF CR SP */
2.18 frystyk 539: me->EOLstate = EOL_BEGIN;
540: HTChunkPutc(me->buffer, ' ');
541: } else { /* New line */
542: me->EOLstate = EOL_BEGIN;
543: HTChunkPutc(me->buffer, '\0');
544: HTChunkPutc(me->buffer, *b);
545: }
546: } else if (*b == CR) {
547: me->EOLstate = EOL_FCR;
548: } else if (*b == LF) {
549: me->EOLstate = EOL_FLF; /* Line found */
550: } else
551: HTChunkPutc(me->buffer, *b);
552: b++;
553: }
2.26 frystyk 554: if (me->target) { /* Is the stream set up? */
555: if (l > 0) /* Anything left? */
556: return (*me->target->isa->put_block)(me->target, b, l);
557: return HT_OK;
558: }
559: return HT_WOULD_BLOCK;
2.18 frystyk 560: }
561:
562:
563: /* Character handling
564: ** ------------------
565: */
2.21 frystyk 566: PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, char, c)
2.18 frystyk 567: {
568: return HTMIME_put_block(me, &c, 1);
569: }
570:
2.1 timbl 571:
572: /* String handling
573: ** ---------------
574: */
2.18 frystyk 575: PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
2.1 timbl 576: {
2.18 frystyk 577: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 578: }
579:
580:
2.18 frystyk 581: /* Flush an stream object
582: ** ---------------------
2.1 timbl 583: */
2.18 frystyk 584: PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
2.1 timbl 585: {
2.18 frystyk 586: return (*me->target->isa->flush)(me->target);
2.1 timbl 587: }
588:
2.18 frystyk 589: /* Free a stream object
590: ** --------------------
2.1 timbl 591: */
2.14 frystyk 592: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 593: {
2.18 frystyk 594: int status = HT_OK;
2.25 frystyk 595: if (me->target) {
596: if ((status = (*me->target->isa->_free)(me->target))==HT_WOULD_BLOCK)
597: return HT_WOULD_BLOCK;
598: }
2.26 frystyk 599: if (PROT_TRACE)
600: fprintf(TDEST, "MIME........ FREEING....\n");
2.19 frystyk 601: HTChunkFree(me->buffer);
2.1 timbl 602: free(me);
2.18 frystyk 603: return status;
2.1 timbl 604: }
605:
606: /* End writing
607: */
2.14 frystyk 608: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 609: {
2.18 frystyk 610: int status = HT_ERROR;
611: if (me->target)
612: status = (*me->target->isa->abort)(me->target, e);
2.26 frystyk 613: if (PROT_TRACE)
614: fprintf(TDEST, "MIME........ ABORTING...\n");
615: HTChunkFree(me->buffer);
2.6 timbl 616: free(me);
2.18 frystyk 617: return status;
2.1 timbl 618: }
619:
620:
621:
622: /* Structured Object Class
623: ** -----------------------
624: */
2.6 timbl 625: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 626: {
627: "MIMEParser",
2.18 frystyk 628: HTMIME_flush,
2.1 timbl 629: HTMIME_free,
2.6 timbl 630: HTMIME_abort,
631: HTMIME_put_character,
632: HTMIME_put_string,
2.18 frystyk 633: HTMIME_put_block
2.1 timbl 634: };
635:
636:
637: /* Subclass-specific Methods
638: ** -------------------------
639: */
2.7 timbl 640: PUBLIC HTStream* HTMIMEConvert ARGS5(
641: HTRequest *, request,
642: void *, param,
643: HTFormat, input_format,
644: HTFormat, output_format,
645: HTStream *, output_stream)
2.1 timbl 646: {
647: HTStream* me;
2.18 frystyk 648: if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
649: outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 650: me->isa = &HTMIME;
2.18 frystyk 651: me->request = request;
652: me->target = output_stream;
653: me->target_format = output_format;
654: me->buffer = HTChunkCreate(512);
655: me->EOLstate = EOL_BEGIN;
2.1 timbl 656: return me;
657: }
Webmaster