Annotation of libwww/Library/src/HTMIME.c, revision 2.23
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 frystyk 16: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
17: ** static buffers etc.
2.1 timbl 18: */
2.17 frystyk 19:
20: /* Library include files */
21: #include "tcp.h"
22: #include "HTUtils.h"
23: #include "HTString.h"
2.9 luotonen 24: #include "HTFormat.h"
2.18 frystyk 25: #include "HTChunk.h"
2.17 frystyk 26: #include "HTFWrite.h"
2.14 frystyk 27: #include "HTMIME.h" /* Implemented here */
2.1 timbl 28:
29: /* MIME Object
30: ** -----------
31: */
32: typedef enum _MIME_state {
2.23 ! frystyk 33: BEGINNING_OF_LINE=0,
2.18 frystyk 34: CHECK, /* check against check_pointer */
35: UNKNOWN, /* Unknown header */
36: JUNK_LINE, /* Ignore rest of header */
37:
38: CONTENT, /* Intermediate states */
39: FIRSTLETTER_D,
40: FIRSTLETTER_L,
41: CONTENTLETTER_L,
42: CONTENTLETTER_T,
43:
44: ALLOW, /* Headers supported */
45: AUTHENTICATE,
46: CONTENT_ENCODING,
47: CONTENT_LANGUAGE,
48: CONTENT_LENGTH,
2.14 frystyk 49: CONTENT_TRANSFER_ENCODING,
50: CONTENT_TYPE,
2.23 ! frystyk 51: MIME_DATE,
2.18 frystyk 52: DERIVED_FROM,
53: EXPIRES,
54: LAST_MODIFIED,
55: LINK,
2.14 frystyk 56: LOCATION,
2.18 frystyk 57: PUBLIC_METHODS,
58: RETRY_AFTER,
59: TITLE,
60: URI_HEADER,
61: VERSION
2.1 timbl 62: } MIME_state;
63:
64: struct _HTStream {
2.18 frystyk 65: CONST HTStreamClass * isa;
66: HTRequest * request;
67: HTStream * target;
68: HTFormat target_format;
69: HTChunk * buffer;
70: HTSocketEOL EOLstate;
71: BOOL transparent;
2.1 timbl 72: };
73:
2.18 frystyk 74: /* ------------------------------------------------------------------------- */
2.1 timbl 75:
2.18 frystyk 76: /*
2.1 timbl 77: ** This is a FSM parser which is tolerant as it can be of all
78: ** syntax errors. It ignores field names it does not understand,
79: ** and resynchronises on line beginnings.
80: */
2.18 frystyk 81: PRIVATE void parseheader ARGS3(HTStream *, me, HTRequest *, request,
82: HTParentAnchor *, anchor)
83: {
84: MIME_state state = BEGINNING_OF_LINE;
85: MIME_state ok_state; /* got this state if match */
86: char *ptr = me->buffer->data-1; /* We dont change the data in length */
87: char *stop = ptr+me->buffer->size; /* When to stop */
88: char *header = ptr; /* For diagnostics */
89: CONST char * check_pointer; /* checking input */
90: char *value;
91: me->transparent = YES; /* Pump rest of data right through */
92: if (!ptr) /* No header to parse */
93: return;
94: while (ptr < stop) {
95: switch (state) {
96: case BEGINNING_OF_LINE:
97: header = ++ptr;
98: switch (TOLOWER(*ptr)) {
99: case 'a':
100: check_pointer = "llow";
101: ok_state = ALLOW;
102: state = CHECK;
103: break;
104:
105: case 'c':
106: check_pointer = "ontent-";
107: ok_state = CONTENT;
108: state = CHECK;
109: break;
110:
111: case 'd':
112: state = FIRSTLETTER_D;
113: break;
114:
115: case 'e':
116: check_pointer = "xpires";
117: ok_state = EXPIRES;
118: state = CHECK;
119: break;
120:
121: case 'l':
122: state = FIRSTLETTER_L;
123: break;
124:
125: case 'm':
126: check_pointer = "ime-version";
127: ok_state = JUNK_LINE; /* We don't use this but recognize it */
128: state = CHECK;
129: break;
130:
131: case 'p':
132: break;
133:
134: case 'r':
135: check_pointer = "etry-after";
136: ok_state = RETRY_AFTER;
137: state = CHECK;
138: break;
139:
140: case 's':
141: check_pointer = "erver";
142: ok_state = JUNK_LINE; /* We don't use this but recognize it */
143: state = CHECK;
144: break;
2.1 timbl 145:
2.18 frystyk 146: case 't':
147: check_pointer = "itle";
148: ok_state = TITLE;
149: state = CHECK;
150: break;
151:
152: case 'u':
153: check_pointer = "ri";
154: ok_state = URI_HEADER;
155: state = CHECK;
156: break;
157:
158: case 'v':
159: check_pointer = "ersion";
160: ok_state = VERSION;
161: state = CHECK;
162: break;
163:
164: case 'w':
165: check_pointer = "ww-authenticate";
166: ok_state = AUTHENTICATE;
167: state = CHECK;
168: break;
2.1 timbl 169:
2.18 frystyk 170: default:
171: state = UNKNOWN;
172: break;
173: }
174: ptr++;
2.1 timbl 175: break;
176:
2.18 frystyk 177: case FIRSTLETTER_D:
178: switch (TOLOWER(*ptr)) {
179: case 'a':
180: check_pointer = "te";
2.23 ! frystyk 181: ok_state = MIME_DATE;
2.18 frystyk 182: state = CHECK;
183: break;
184:
185: case 'e':
186: check_pointer = "rived-from";
187: ok_state = DERIVED_FROM;
188: state = CHECK;
189: break;
190:
191: default:
192: state = UNKNOWN;
193: break;
194: }
195: ptr++;
196: break;
197:
198: case FIRSTLETTER_L:
199: switch (TOLOWER(*ptr)) {
200: case 'a':
201: check_pointer = "st-modified";
202: ok_state = LAST_MODIFIED;
203: state = CHECK;
204: break;
205:
206: case 'i':
207: check_pointer = "nk";
208: ok_state = LINK;
209: state = CHECK;
210: break;
211:
212: case 'o':
213: check_pointer = "cation";
214: ok_state = LOCATION;
215: state = CHECK;
216: break;
217:
218: default:
219: state = UNKNOWN;
220: break;
221: }
222: ptr++;
223: break;
224:
225: case CONTENT:
226: switch (TOLOWER(*ptr)) {
227: case 'e':
228: check_pointer = "ncoding";
229: ok_state = CONTENT_ENCODING;
230: state = CHECK;
231: break;
232:
233: case 'l':
234: state = CONTENTLETTER_L;
235: break;
236:
237: case 't':
238: state = CONTENTLETTER_T;
239: break;
240:
241: default:
242: state = UNKNOWN;
243: break;
244: }
245: ptr++;
2.1 timbl 246: break;
2.14 frystyk 247:
2.18 frystyk 248: case CONTENTLETTER_L:
249: switch (TOLOWER(*ptr)) {
250: case 'a':
251: check_pointer = "nguage";
252: ok_state = CONTENT_LANGUAGE;
253: state = CHECK;
254: break;
255:
256: case 'e':
257: check_pointer = "ngth";
258: ok_state = CONTENT_LENGTH;
259: state = CHECK;
260: break;
261:
262: default:
263: state = UNKNOWN;
264: break;
265: }
266: ptr++;
2.14 frystyk 267: break;
268:
2.18 frystyk 269: case CONTENTLETTER_T:
270: switch (TOLOWER(*ptr)) {
271: case 'r':
272: check_pointer = "ansfer-encoding";
273: ok_state = CONTENT_TRANSFER_ENCODING;
274: state = CHECK;
275: break;
276:
277: case 'y':
278: check_pointer = "pe";
279: ok_state = CONTENT_TYPE;
280: state = CHECK;
281: break;
282:
283: default:
284: state = UNKNOWN;
285: break;
286: }
287: ptr++;
2.14 frystyk 288: break;
289:
2.18 frystyk 290: case CHECK: /* Check against string */
291: while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
292: if (!*--check_pointer) {
293: state = ok_state;
294: while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
295: ptr++;
296: } else
297: state = UNKNOWN;
2.14 frystyk 298: break;
299:
2.18 frystyk 300: case ALLOW:
2.20 frystyk 301: while ((value = HTNextField(&ptr)) != NULL) {
302: char *lc = value;
303: HTMethod new_method;
304: while ((*lc = TOUPPER(*lc))) lc++;
305: if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
306: anchor->methods += new_method;
2.1 timbl 307: }
2.18 frystyk 308: if (STREAM_TRACE)
309: fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
310: anchor->methods);
311: state = JUNK_LINE;
2.1 timbl 312: break;
2.18 frystyk 313:
314: case AUTHENTICATE:
315: if ((value = HTNextField(&ptr)) != NULL) {
316: StrAllocCopy(request->WWWAAScheme, value);
2.20 frystyk 317:
318: /* The parsing is done in HTSSUtils.c for the moment */
319: if (*ptr) StrAllocCopy(request->WWWAARealm, ptr);
2.1 timbl 320: }
2.18 frystyk 321: state = JUNK_LINE;
322: break;
323:
324: case CONTENT_ENCODING:
325: if ((value = HTNextField(&ptr)) != NULL) {
326: char *lc = value;
2.20 frystyk 327: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 328: anchor->content_encoding = HTAtom_for(value);
329: }
330: state = JUNK_LINE;
331: break;
332:
2.21 frystyk 333: case CONTENT_LANGUAGE: /* @@@ SHOULD BE A LIST @@@ */
334: if ((value = HTNextField(&ptr)) != NULL) {
335: char *lc = value;
336: while ((*lc = TOLOWER(*lc))) lc++;
337: anchor->content_language = HTAtom_for(value);
338: }
339: state = JUNK_LINE;
2.18 frystyk 340: break;
341:
342: case CONTENT_LENGTH:
343: if ((value = HTNextField(&ptr)) != NULL)
344: anchor->content_length = atol(value);
345: state = JUNK_LINE;
346: break;
347:
348: case CONTENT_TRANSFER_ENCODING:
349: if ((value = HTNextField(&ptr)) != NULL) {
350: char *lc = value;
2.20 frystyk 351: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 352: anchor->cte = HTAtom_for(value);
353: }
354: state = JUNK_LINE;
355: break;
356:
357: case CONTENT_TYPE:
358: if ((value = HTNextField(&ptr)) != NULL) {
359: char *lc = value;
360: while ((*lc = TOLOWER(*lc))) lc++;
361: anchor->content_type = HTAtom_for(value);
2.20 frystyk 362: while ((value = HTNextField(&ptr)) != NULL) { /* Charset */
363: if (!strcasecomp(value, "charset")) {
364: if ((value = HTNextField(&ptr)) != NULL) {
365: lc = value;
366: while ((*lc = TOLOWER(*lc))) lc++;
367: anchor->charset = HTAtom_for(value);
368: }
369: } else if (!strcasecomp(value, "level")) { /* Level */
370: if ((value = HTNextField(&ptr)) != NULL) {
371: lc = value;
372: while ((*lc = TOLOWER(*lc))) lc++;
373: anchor->level = HTAtom_for(value);
374: }
375: }
376: }
2.1 timbl 377: }
2.20 frystyk 378: state = JUNK_LINE;
2.18 frystyk 379: break;
380:
2.23 ! frystyk 381: case MIME_DATE:
2.18 frystyk 382: anchor->date = HTParseTime(ptr);
383: state = JUNK_LINE;
384: break;
385:
386: case DERIVED_FROM:
387: if ((value = HTNextField(&ptr)) != NULL)
388: StrAllocCopy(anchor->derived_from, value);
389: state = JUNK_LINE;
390: break;
391:
392: case EXPIRES:
393: anchor->expires = HTParseTime(ptr);
394: state = JUNK_LINE;
395: break;
396:
397: case LAST_MODIFIED:
398: anchor->last_modified = HTParseTime(ptr);
399: state = JUNK_LINE;
400: break;
401:
402: case LINK:
2.20 frystyk 403: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 404: break;
405:
406: case LOCATION:
407: if ((value = HTNextField(&ptr)) != NULL)
408: StrAllocCopy(request->redirect, value);
409: state = JUNK_LINE;
410: break;
411:
412: case PUBLIC_METHODS:
2.20 frystyk 413: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 414: break;
415:
416: case RETRY_AFTER:
2.19 frystyk 417: request->retry_after = HTParseTime(ptr);
418: state = JUNK_LINE;
2.18 frystyk 419: break;
420:
421: case TITLE: /* Can't reuse buffer as HTML version might differ */
422: if ((value = HTNextField(&ptr)) != NULL)
423: StrAllocCopy(anchor->title, value);
424: state = JUNK_LINE;
425: break;
426:
427: case URI_HEADER:
428: state = LOCATION; /* @@@ Need extended parsing */
429: break;
430:
431: case VERSION:
432: if ((value = HTNextField(&ptr)) != NULL)
433: StrAllocCopy(anchor->version, value);
434: state = JUNK_LINE;
435: break;
436:
437: case UNKNOWN:
438: if (STREAM_TRACE)
439: fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
440: HTAnchor_addExtra(anchor, header);
441:
442: /* Fall through */
443:
444: case JUNK_LINE:
445: while (*ptr) ptr++;
446: state = BEGINNING_OF_LINE;
447: break;
2.1 timbl 448: }
2.18 frystyk 449: }
450:
451: if (STREAM_TRACE)
452: fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
453: HTAtom_name(anchor->content_type),
454: HTAtom_name(me->target_format));
455: if ((me->target = HTStreamStack(anchor->content_type,
456: me->target_format, me->target,
457: me->request, YES)) == NULL) {
458: if (STREAM_TRACE)
459: fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
460: me->target = HTBlackHole();
461: }
462: anchor->header_parsed = YES;
2.1 timbl 463: }
464:
465:
2.18 frystyk 466: /*
467: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
468: ** Folding is either of CF LWS, LF LWS, CRLF LWS
469: */
470: PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
471: {
472: while (!me->transparent && l-- > 0) {
473: if (me->EOLstate == EOL_FCR) {
474: if (*b == CR) /* End of header */
475: parseheader(me, me->request, me->request->anchor);
476: else if (*b == LF) /* CRLF */
477: me->EOLstate = EOL_FLF;
478: else if (WHITE(*b)) { /* Folding: CR SP */
479: me->EOLstate = EOL_BEGIN;
480: HTChunkPutc(me->buffer, ' ');
481: } else { /* New line */
482: me->EOLstate = EOL_BEGIN;
483: HTChunkPutc(me->buffer, '\0');
484: HTChunkPutc(me->buffer, *b);
485: }
486: } else if (me->EOLstate == EOL_FLF) {
487: if (*b == CR) /* LF CR or CR LF CR */
488: me->EOLstate = EOL_SCR;
489: else if (*b == LF) /* End of header */
490: parseheader(me, me->request, me->request->anchor);
491: else if (WHITE(*b)) { /* Folding: LF SP or CR LF SP */
492: me->EOLstate = EOL_BEGIN;
493: HTChunkPutc(me->buffer, ' ');
494: } else { /* New line */
495: me->EOLstate = EOL_BEGIN;
496: HTChunkPutc(me->buffer, '\0');
497: HTChunkPutc(me->buffer, *b);
498: }
499: } else if (me->EOLstate == EOL_SCR) {
500: if (*b==CR || *b==LF) /* End of header */
501: parseheader(me, me->request, me->request->anchor);
502: else if (WHITE(*b)) { /* Folding: LF CR SP or CR LF CR SP */
503: me->EOLstate = EOL_BEGIN;
504: HTChunkPutc(me->buffer, ' ');
505: } else { /* New line */
506: me->EOLstate = EOL_BEGIN;
507: HTChunkPutc(me->buffer, '\0');
508: HTChunkPutc(me->buffer, *b);
509: }
510: } else if (*b == CR) {
511: me->EOLstate = EOL_FCR;
512: } else if (*b == LF) {
513: me->EOLstate = EOL_FLF; /* Line found */
514: } else
515: HTChunkPutc(me->buffer, *b);
516: b++;
517: }
518: if (l > 0) /* Anything left? */
519: return (*me->target->isa->put_block)(me->target, b, l);
520: return HT_OK;
521: }
522:
523:
524: /* Character handling
525: ** ------------------
526: */
2.21 frystyk 527: PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, char, c)
2.18 frystyk 528: {
529: return HTMIME_put_block(me, &c, 1);
530: }
531:
2.1 timbl 532:
533: /* String handling
534: ** ---------------
535: */
2.18 frystyk 536: PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
2.1 timbl 537: {
2.18 frystyk 538: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 539: }
540:
541:
2.18 frystyk 542: /* Flush an stream object
543: ** ---------------------
2.1 timbl 544: */
2.18 frystyk 545: PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
2.1 timbl 546: {
2.18 frystyk 547: return (*me->target->isa->flush)(me->target);
2.1 timbl 548: }
549:
2.18 frystyk 550: /* Free a stream object
551: ** --------------------
2.1 timbl 552: */
2.14 frystyk 553: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 554: {
2.18 frystyk 555: int status = HT_OK;
556: if (me->target)
557: status = (*me->target->isa->_free)(me->target);
2.19 frystyk 558: HTChunkFree(me->buffer);
2.1 timbl 559: free(me);
2.18 frystyk 560: return status;
2.1 timbl 561: }
562:
563: /* End writing
564: */
2.14 frystyk 565: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 566: {
2.18 frystyk 567: int status = HT_ERROR;
568: if (me->target)
569: status = (*me->target->isa->abort)(me->target, e);
2.6 timbl 570: free(me);
2.18 frystyk 571: return status;
2.1 timbl 572: }
573:
574:
575:
576: /* Structured Object Class
577: ** -----------------------
578: */
2.6 timbl 579: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 580: {
581: "MIMEParser",
2.18 frystyk 582: HTMIME_flush,
2.1 timbl 583: HTMIME_free,
2.6 timbl 584: HTMIME_abort,
585: HTMIME_put_character,
586: HTMIME_put_string,
2.18 frystyk 587: HTMIME_put_block
2.1 timbl 588: };
589:
590:
591: /* Subclass-specific Methods
592: ** -------------------------
593: */
2.7 timbl 594: PUBLIC HTStream* HTMIMEConvert ARGS5(
595: HTRequest *, request,
596: void *, param,
597: HTFormat, input_format,
598: HTFormat, output_format,
599: HTStream *, output_stream)
2.1 timbl 600: {
601: HTStream* me;
2.18 frystyk 602: if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
603: outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 604: me->isa = &HTMIME;
2.18 frystyk 605: me->request = request;
606: me->target = output_stream;
607: me->target_format = output_format;
608: me->buffer = HTChunkCreate(512);
609: me->EOLstate = EOL_BEGIN;
2.1 timbl 610: return me;
611: }
Webmaster