Return to HTMIME.c CVS log | Up to [Public] / libwww / Library / src |
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
2.22 frystyk 4: ** (c) COPYRIGHT MIT 1995.
2.15 frystyk 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 frystyk 16: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
17: ** static buffers etc.
2.1 timbl 18: */
2.17 frystyk 19:
20: /* Library include files */
21: #include "tcp.h"
22: #include "HTUtils.h"
23: #include "HTString.h"
2.9 luotonen 24: #include "HTFormat.h"
2.18 frystyk 25: #include "HTChunk.h"
2.24 ! frystyk 26: #include "HTSocket.h"
2.17 frystyk 27: #include "HTFWrite.h"
2.14 frystyk 28: #include "HTMIME.h" /* Implemented here */
2.1 timbl 29:
30: /* MIME Object
31: ** -----------
32: */
33: typedef enum _MIME_state {
2.23 frystyk 34: BEGINNING_OF_LINE=0,
2.18 frystyk 35: CHECK, /* check against check_pointer */
36: UNKNOWN, /* Unknown header */
37: JUNK_LINE, /* Ignore rest of header */
38:
39: CONTENT, /* Intermediate states */
40: FIRSTLETTER_D,
41: FIRSTLETTER_L,
42: CONTENTLETTER_L,
43: CONTENTLETTER_T,
44:
45: ALLOW, /* Headers supported */
46: AUTHENTICATE,
47: CONTENT_ENCODING,
48: CONTENT_LANGUAGE,
49: CONTENT_LENGTH,
2.14 frystyk 50: CONTENT_TRANSFER_ENCODING,
51: CONTENT_TYPE,
2.23 frystyk 52: MIME_DATE,
2.18 frystyk 53: DERIVED_FROM,
54: EXPIRES,
55: LAST_MODIFIED,
56: LINK,
2.14 frystyk 57: LOCATION,
2.18 frystyk 58: PUBLIC_METHODS,
59: RETRY_AFTER,
60: TITLE,
61: URI_HEADER,
62: VERSION
2.1 timbl 63: } MIME_state;
64:
65: struct _HTStream {
2.18 frystyk 66: CONST HTStreamClass * isa;
67: HTRequest * request;
68: HTStream * target;
69: HTFormat target_format;
70: HTChunk * buffer;
71: HTSocketEOL EOLstate;
72: BOOL transparent;
2.1 timbl 73: };
74:
2.18 frystyk 75: /* ------------------------------------------------------------------------- */
2.1 timbl 76:
2.18 frystyk 77: /*
2.1 timbl 78: ** This is a FSM parser which is tolerant as it can be of all
79: ** syntax errors. It ignores field names it does not understand,
80: ** and resynchronises on line beginnings.
81: */
2.18 frystyk 82: PRIVATE void parseheader ARGS3(HTStream *, me, HTRequest *, request,
83: HTParentAnchor *, anchor)
84: {
85: MIME_state state = BEGINNING_OF_LINE;
86: MIME_state ok_state; /* got this state if match */
87: char *ptr = me->buffer->data-1; /* We dont change the data in length */
88: char *stop = ptr+me->buffer->size; /* When to stop */
89: char *header = ptr; /* For diagnostics */
90: CONST char * check_pointer; /* checking input */
91: char *value;
92: me->transparent = YES; /* Pump rest of data right through */
93: if (!ptr) /* No header to parse */
94: return;
95: while (ptr < stop) {
96: switch (state) {
97: case BEGINNING_OF_LINE:
98: header = ++ptr;
99: switch (TOLOWER(*ptr)) {
100: case 'a':
101: check_pointer = "llow";
102: ok_state = ALLOW;
103: state = CHECK;
104: break;
105:
106: case 'c':
107: check_pointer = "ontent-";
108: ok_state = CONTENT;
109: state = CHECK;
110: break;
111:
112: case 'd':
113: state = FIRSTLETTER_D;
114: break;
115:
116: case 'e':
117: check_pointer = "xpires";
118: ok_state = EXPIRES;
119: state = CHECK;
120: break;
121:
122: case 'l':
123: state = FIRSTLETTER_L;
124: break;
125:
126: case 'm':
127: check_pointer = "ime-version";
128: ok_state = JUNK_LINE; /* We don't use this but recognize it */
129: state = CHECK;
130: break;
131:
132: case 'p':
133: break;
134:
135: case 'r':
136: check_pointer = "etry-after";
137: ok_state = RETRY_AFTER;
138: state = CHECK;
139: break;
140:
141: case 's':
142: check_pointer = "erver";
143: ok_state = JUNK_LINE; /* We don't use this but recognize it */
144: state = CHECK;
145: break;
2.1 timbl 146:
2.18 frystyk 147: case 't':
148: check_pointer = "itle";
149: ok_state = TITLE;
150: state = CHECK;
151: break;
152:
153: case 'u':
154: check_pointer = "ri";
155: ok_state = URI_HEADER;
156: state = CHECK;
157: break;
158:
159: case 'v':
160: check_pointer = "ersion";
161: ok_state = VERSION;
162: state = CHECK;
163: break;
164:
165: case 'w':
166: check_pointer = "ww-authenticate";
167: ok_state = AUTHENTICATE;
168: state = CHECK;
169: break;
2.1 timbl 170:
2.18 frystyk 171: default:
172: state = UNKNOWN;
173: break;
174: }
175: ptr++;
2.1 timbl 176: break;
177:
2.18 frystyk 178: case FIRSTLETTER_D:
179: switch (TOLOWER(*ptr)) {
180: case 'a':
181: check_pointer = "te";
2.23 frystyk 182: ok_state = MIME_DATE;
2.18 frystyk 183: state = CHECK;
184: break;
185:
186: case 'e':
187: check_pointer = "rived-from";
188: ok_state = DERIVED_FROM;
189: state = CHECK;
190: break;
191:
192: default:
193: state = UNKNOWN;
194: break;
195: }
196: ptr++;
197: break;
198:
199: case FIRSTLETTER_L:
200: switch (TOLOWER(*ptr)) {
201: case 'a':
202: check_pointer = "st-modified";
203: ok_state = LAST_MODIFIED;
204: state = CHECK;
205: break;
206:
207: case 'i':
208: check_pointer = "nk";
209: ok_state = LINK;
210: state = CHECK;
211: break;
212:
213: case 'o':
214: check_pointer = "cation";
215: ok_state = LOCATION;
216: state = CHECK;
217: break;
218:
219: default:
220: state = UNKNOWN;
221: break;
222: }
223: ptr++;
224: break;
225:
226: case CONTENT:
227: switch (TOLOWER(*ptr)) {
228: case 'e':
229: check_pointer = "ncoding";
230: ok_state = CONTENT_ENCODING;
231: state = CHECK;
232: break;
233:
234: case 'l':
235: state = CONTENTLETTER_L;
236: break;
237:
238: case 't':
239: state = CONTENTLETTER_T;
240: break;
241:
242: default:
243: state = UNKNOWN;
244: break;
245: }
246: ptr++;
2.1 timbl 247: break;
2.14 frystyk 248:
2.18 frystyk 249: case CONTENTLETTER_L:
250: switch (TOLOWER(*ptr)) {
251: case 'a':
252: check_pointer = "nguage";
253: ok_state = CONTENT_LANGUAGE;
254: state = CHECK;
255: break;
256:
257: case 'e':
258: check_pointer = "ngth";
259: ok_state = CONTENT_LENGTH;
260: state = CHECK;
261: break;
262:
263: default:
264: state = UNKNOWN;
265: break;
266: }
267: ptr++;
2.14 frystyk 268: break;
269:
2.18 frystyk 270: case CONTENTLETTER_T:
271: switch (TOLOWER(*ptr)) {
272: case 'r':
273: check_pointer = "ansfer-encoding";
274: ok_state = CONTENT_TRANSFER_ENCODING;
275: state = CHECK;
276: break;
277:
278: case 'y':
279: check_pointer = "pe";
280: ok_state = CONTENT_TYPE;
281: state = CHECK;
282: break;
283:
284: default:
285: state = UNKNOWN;
286: break;
287: }
288: ptr++;
2.14 frystyk 289: break;
290:
2.18 frystyk 291: case CHECK: /* Check against string */
292: while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
293: if (!*--check_pointer) {
294: state = ok_state;
295: while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
296: ptr++;
297: } else
298: state = UNKNOWN;
2.14 frystyk 299: break;
300:
2.18 frystyk 301: case ALLOW:
2.20 frystyk 302: while ((value = HTNextField(&ptr)) != NULL) {
303: char *lc = value;
304: HTMethod new_method;
305: while ((*lc = TOUPPER(*lc))) lc++;
306: if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
307: anchor->methods += new_method;
2.1 timbl 308: }
2.18 frystyk 309: if (STREAM_TRACE)
310: fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
311: anchor->methods);
312: state = JUNK_LINE;
2.1 timbl 313: break;
2.18 frystyk 314:
315: case AUTHENTICATE:
316: if ((value = HTNextField(&ptr)) != NULL) {
317: StrAllocCopy(request->WWWAAScheme, value);
2.20 frystyk 318:
319: /* The parsing is done in HTSSUtils.c for the moment */
320: if (*ptr) StrAllocCopy(request->WWWAARealm, ptr);
2.1 timbl 321: }
2.18 frystyk 322: state = JUNK_LINE;
323: break;
324:
325: case CONTENT_ENCODING:
326: if ((value = HTNextField(&ptr)) != NULL) {
327: char *lc = value;
2.20 frystyk 328: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 329: anchor->content_encoding = HTAtom_for(value);
330: }
331: state = JUNK_LINE;
332: break;
333:
2.21 frystyk 334: case CONTENT_LANGUAGE: /* @@@ SHOULD BE A LIST @@@ */
335: if ((value = HTNextField(&ptr)) != NULL) {
336: char *lc = value;
337: while ((*lc = TOLOWER(*lc))) lc++;
338: anchor->content_language = HTAtom_for(value);
339: }
340: state = JUNK_LINE;
2.18 frystyk 341: break;
342:
343: case CONTENT_LENGTH:
344: if ((value = HTNextField(&ptr)) != NULL)
345: anchor->content_length = atol(value);
346: state = JUNK_LINE;
347: break;
348:
349: case CONTENT_TRANSFER_ENCODING:
350: if ((value = HTNextField(&ptr)) != NULL) {
351: char *lc = value;
2.20 frystyk 352: while ((*lc = TOLOWER(*lc))) lc++;
2.18 frystyk 353: anchor->cte = HTAtom_for(value);
354: }
355: state = JUNK_LINE;
356: break;
357:
358: case CONTENT_TYPE:
359: if ((value = HTNextField(&ptr)) != NULL) {
360: char *lc = value;
361: while ((*lc = TOLOWER(*lc))) lc++;
362: anchor->content_type = HTAtom_for(value);
2.20 frystyk 363: while ((value = HTNextField(&ptr)) != NULL) { /* Charset */
364: if (!strcasecomp(value, "charset")) {
365: if ((value = HTNextField(&ptr)) != NULL) {
366: lc = value;
367: while ((*lc = TOLOWER(*lc))) lc++;
368: anchor->charset = HTAtom_for(value);
369: }
370: } else if (!strcasecomp(value, "level")) { /* Level */
371: if ((value = HTNextField(&ptr)) != NULL) {
372: lc = value;
373: while ((*lc = TOLOWER(*lc))) lc++;
374: anchor->level = HTAtom_for(value);
375: }
376: }
377: }
2.1 timbl 378: }
2.20 frystyk 379: state = JUNK_LINE;
2.18 frystyk 380: break;
381:
2.23 frystyk 382: case MIME_DATE:
2.18 frystyk 383: anchor->date = HTParseTime(ptr);
384: state = JUNK_LINE;
385: break;
386:
387: case DERIVED_FROM:
388: if ((value = HTNextField(&ptr)) != NULL)
389: StrAllocCopy(anchor->derived_from, value);
390: state = JUNK_LINE;
391: break;
392:
393: case EXPIRES:
394: anchor->expires = HTParseTime(ptr);
395: state = JUNK_LINE;
396: break;
397:
398: case LAST_MODIFIED:
399: anchor->last_modified = HTParseTime(ptr);
400: state = JUNK_LINE;
401: break;
402:
403: case LINK:
2.20 frystyk 404: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 405: break;
406:
407: case LOCATION:
408: if ((value = HTNextField(&ptr)) != NULL)
409: StrAllocCopy(request->redirect, value);
410: state = JUNK_LINE;
411: break;
412:
413: case PUBLIC_METHODS:
2.20 frystyk 414: state = UNKNOWN; /* @@@@@@@@@@@ */
2.18 frystyk 415: break;
416:
417: case RETRY_AFTER:
2.19 frystyk 418: request->retry_after = HTParseTime(ptr);
419: state = JUNK_LINE;
2.18 frystyk 420: break;
421:
422: case TITLE: /* Can't reuse buffer as HTML version might differ */
423: if ((value = HTNextField(&ptr)) != NULL)
424: StrAllocCopy(anchor->title, value);
425: state = JUNK_LINE;
426: break;
427:
428: case URI_HEADER:
429: state = LOCATION; /* @@@ Need extended parsing */
430: break;
431:
432: case VERSION:
433: if ((value = HTNextField(&ptr)) != NULL)
434: StrAllocCopy(anchor->version, value);
435: state = JUNK_LINE;
436: break;
437:
438: case UNKNOWN:
439: if (STREAM_TRACE)
440: fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
441: HTAnchor_addExtra(anchor, header);
442:
443: /* Fall through */
444:
445: case JUNK_LINE:
446: while (*ptr) ptr++;
447: state = BEGINNING_OF_LINE;
448: break;
2.1 timbl 449: }
2.18 frystyk 450: }
451:
452: if (STREAM_TRACE)
453: fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
454: HTAtom_name(anchor->content_type),
455: HTAtom_name(me->target_format));
456: if ((me->target = HTStreamStack(anchor->content_type,
457: me->target_format, me->target,
458: me->request, YES)) == NULL) {
459: if (STREAM_TRACE)
460: fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
461: me->target = HTBlackHole();
462: }
463: anchor->header_parsed = YES;
2.1 timbl 464: }
465:
466:
2.18 frystyk 467: /*
468: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
469: ** Folding is either of CF LWS, LF LWS, CRLF LWS
470: */
471: PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
472: {
473: while (!me->transparent && l-- > 0) {
474: if (me->EOLstate == EOL_FCR) {
475: if (*b == CR) /* End of header */
476: parseheader(me, me->request, me->request->anchor);
477: else if (*b == LF) /* CRLF */
478: me->EOLstate = EOL_FLF;
479: else if (WHITE(*b)) { /* Folding: CR SP */
480: me->EOLstate = EOL_BEGIN;
481: HTChunkPutc(me->buffer, ' ');
482: } else { /* New line */
483: me->EOLstate = EOL_BEGIN;
484: HTChunkPutc(me->buffer, '\0');
485: HTChunkPutc(me->buffer, *b);
486: }
487: } else if (me->EOLstate == EOL_FLF) {
488: if (*b == CR) /* LF CR or CR LF CR */
489: me->EOLstate = EOL_SCR;
490: else if (*b == LF) /* End of header */
491: parseheader(me, me->request, me->request->anchor);
492: else if (WHITE(*b)) { /* Folding: LF SP or CR LF SP */
493: me->EOLstate = EOL_BEGIN;
494: HTChunkPutc(me->buffer, ' ');
495: } else { /* New line */
496: me->EOLstate = EOL_BEGIN;
497: HTChunkPutc(me->buffer, '\0');
498: HTChunkPutc(me->buffer, *b);
499: }
500: } else if (me->EOLstate == EOL_SCR) {
501: if (*b==CR || *b==LF) /* End of header */
502: parseheader(me, me->request, me->request->anchor);
503: else if (WHITE(*b)) { /* Folding: LF CR SP or CR LF CR SP */
504: me->EOLstate = EOL_BEGIN;
505: HTChunkPutc(me->buffer, ' ');
506: } else { /* New line */
507: me->EOLstate = EOL_BEGIN;
508: HTChunkPutc(me->buffer, '\0');
509: HTChunkPutc(me->buffer, *b);
510: }
511: } else if (*b == CR) {
512: me->EOLstate = EOL_FCR;
513: } else if (*b == LF) {
514: me->EOLstate = EOL_FLF; /* Line found */
515: } else
516: HTChunkPutc(me->buffer, *b);
517: b++;
518: }
519: if (l > 0) /* Anything left? */
520: return (*me->target->isa->put_block)(me->target, b, l);
521: return HT_OK;
522: }
523:
524:
525: /* Character handling
526: ** ------------------
527: */
2.21 frystyk 528: PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, char, c)
2.18 frystyk 529: {
530: return HTMIME_put_block(me, &c, 1);
531: }
532:
2.1 timbl 533:
534: /* String handling
535: ** ---------------
536: */
2.18 frystyk 537: PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
2.1 timbl 538: {
2.18 frystyk 539: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 540: }
541:
542:
2.18 frystyk 543: /* Flush an stream object
544: ** ---------------------
2.1 timbl 545: */
2.18 frystyk 546: PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
2.1 timbl 547: {
2.18 frystyk 548: return (*me->target->isa->flush)(me->target);
2.1 timbl 549: }
550:
2.18 frystyk 551: /* Free a stream object
552: ** --------------------
2.1 timbl 553: */
2.14 frystyk 554: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 555: {
2.18 frystyk 556: int status = HT_OK;
557: if (me->target)
558: status = (*me->target->isa->_free)(me->target);
2.19 frystyk 559: HTChunkFree(me->buffer);
2.1 timbl 560: free(me);
2.18 frystyk 561: return status;
2.1 timbl 562: }
563:
564: /* End writing
565: */
2.14 frystyk 566: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 567: {
2.18 frystyk 568: int status = HT_ERROR;
569: if (me->target)
570: status = (*me->target->isa->abort)(me->target, e);
2.6 timbl 571: free(me);
2.18 frystyk 572: return status;
2.1 timbl 573: }
574:
575:
576:
577: /* Structured Object Class
578: ** -----------------------
579: */
2.6 timbl 580: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 581: {
582: "MIMEParser",
2.18 frystyk 583: HTMIME_flush,
2.1 timbl 584: HTMIME_free,
2.6 timbl 585: HTMIME_abort,
586: HTMIME_put_character,
587: HTMIME_put_string,
2.18 frystyk 588: HTMIME_put_block
2.1 timbl 589: };
590:
591:
592: /* Subclass-specific Methods
593: ** -------------------------
594: */
2.7 timbl 595: PUBLIC HTStream* HTMIMEConvert ARGS5(
596: HTRequest *, request,
597: void *, param,
598: HTFormat, input_format,
599: HTFormat, output_format,
600: HTStream *, output_stream)
2.1 timbl 601: {
602: HTStream* me;
2.18 frystyk 603: if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
604: outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 605: me->isa = &HTMIME;
2.18 frystyk 606: me->request = request;
607: me->target = output_stream;
608: me->target_format = output_format;
609: me->buffer = HTChunkCreate(512);
610: me->EOLstate = EOL_BEGIN;
2.1 timbl 611: return me;
612: }