Annotation of libwww/Library/src/HTMIME.c, revision 2.19
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.18 frystyk 16: ** 14 Mar 95 HFN Now using anchor for storing data. No more `\n',
17: ** static buffers etc.
2.1 timbl 18: */
2.17 frystyk 19:
20: /* Library include files */
21: #include "tcp.h"
22: #include "HTUtils.h"
23: #include "HTString.h"
2.9 luotonen 24: #include "HTFormat.h"
2.18 frystyk 25: #include "HTChunk.h"
2.17 frystyk 26: #include "HTFWrite.h"
2.14 frystyk 27: #include "HTMIME.h" /* Implemented here */
2.1 timbl 28:
29: /* MIME Object
30: ** -----------
31: */
32: typedef enum _MIME_state {
2.14 frystyk 33: BEGINNING_OF_LINE,
2.18 frystyk 34: CHECK, /* check against check_pointer */
35: UNKNOWN, /* Unknown header */
36: JUNK_LINE, /* Ignore rest of header */
37:
38: CONTENT, /* Intermediate states */
39: FIRSTLETTER_D,
40: FIRSTLETTER_L,
41: CONTENTLETTER_L,
42: CONTENTLETTER_T,
43:
44: ALLOW, /* Headers supported */
45: AUTHENTICATE,
46: CONTENT_ENCODING,
47: CONTENT_LANGUAGE,
48: CONTENT_LENGTH,
2.14 frystyk 49: CONTENT_TRANSFER_ENCODING,
50: CONTENT_TYPE,
2.18 frystyk 51: DATE,
52: DERIVED_FROM,
53: EXPIRES,
54: LAST_MODIFIED,
55: LINK,
2.14 frystyk 56: LOCATION,
2.18 frystyk 57: PUBLIC_METHODS,
58: RETRY_AFTER,
59: TITLE,
60: URI_HEADER,
61: VERSION
2.1 timbl 62: } MIME_state;
63:
64: struct _HTStream {
2.18 frystyk 65: CONST HTStreamClass * isa;
66: HTRequest * request;
67: HTStream * target;
68: HTFormat target_format;
69: HTChunk * buffer;
70: HTSocketEOL EOLstate;
71: BOOL transparent;
2.1 timbl 72: };
73:
2.18 frystyk 74: /* ------------------------------------------------------------------------- */
2.1 timbl 75:
2.18 frystyk 76: /*
2.1 timbl 77: ** This is a FSM parser which is tolerant as it can be of all
78: ** syntax errors. It ignores field names it does not understand,
79: ** and resynchronises on line beginnings.
80: */
2.18 frystyk 81: PRIVATE void parseheader ARGS3(HTStream *, me, HTRequest *, request,
82: HTParentAnchor *, anchor)
83: {
84: MIME_state state = BEGINNING_OF_LINE;
85: MIME_state ok_state; /* got this state if match */
86: char *ptr = me->buffer->data-1; /* We dont change the data in length */
87: char *stop = ptr+me->buffer->size; /* When to stop */
88: char *header = ptr; /* For diagnostics */
89: CONST char * check_pointer; /* checking input */
90: char *value;
91: me->transparent = YES; /* Pump rest of data right through */
92: if (!ptr) /* No header to parse */
93: return;
94: while (ptr < stop) {
95: switch (state) {
96: case BEGINNING_OF_LINE:
97: header = ++ptr;
98: switch (TOLOWER(*ptr)) {
99: case 'a':
100: check_pointer = "llow";
101: ok_state = ALLOW;
102: state = CHECK;
103: break;
104:
105: case 'c':
106: check_pointer = "ontent-";
107: ok_state = CONTENT;
108: state = CHECK;
109: break;
110:
111: case 'd':
112: state = FIRSTLETTER_D;
113: break;
114:
115: case 'e':
116: check_pointer = "xpires";
117: ok_state = EXPIRES;
118: state = CHECK;
119: break;
120:
121: case 'l':
122: state = FIRSTLETTER_L;
123: break;
124:
125: case 'm':
126: check_pointer = "ime-version";
127: ok_state = JUNK_LINE; /* We don't use this but recognize it */
128: state = CHECK;
129: break;
130:
131: case 'p':
132: break;
133:
134: case 'r':
135: check_pointer = "etry-after";
136: ok_state = RETRY_AFTER;
137: state = CHECK;
138: break;
139:
140: case 's':
141: check_pointer = "erver";
142: ok_state = JUNK_LINE; /* We don't use this but recognize it */
143: state = CHECK;
144: break;
2.1 timbl 145:
2.18 frystyk 146: case 't':
147: check_pointer = "itle";
148: ok_state = TITLE;
149: state = CHECK;
150: break;
151:
152: case 'u':
153: check_pointer = "ri";
154: ok_state = URI_HEADER;
155: state = CHECK;
156: break;
157:
158: case 'v':
159: check_pointer = "ersion";
160: ok_state = VERSION;
161: state = CHECK;
162: break;
163:
164: case 'w':
165: check_pointer = "ww-authenticate";
166: ok_state = AUTHENTICATE;
167: state = CHECK;
168: break;
2.1 timbl 169:
2.18 frystyk 170: default:
171: state = UNKNOWN;
172: break;
173: }
174: ptr++;
2.1 timbl 175: break;
176:
2.18 frystyk 177: case FIRSTLETTER_D:
178: switch (TOLOWER(*ptr)) {
179: case 'a':
180: check_pointer = "te";
181: ok_state = DATE;
182: state = CHECK;
183: break;
184:
185: case 'e':
186: check_pointer = "rived-from";
187: ok_state = DERIVED_FROM;
188: state = CHECK;
189: break;
190:
191: default:
192: state = UNKNOWN;
193: break;
194: }
195: ptr++;
196: break;
197:
198: case FIRSTLETTER_L:
199: switch (TOLOWER(*ptr)) {
200: case 'a':
201: check_pointer = "st-modified";
202: ok_state = LAST_MODIFIED;
203: state = CHECK;
204: break;
205:
206: case 'i':
207: check_pointer = "nk";
208: ok_state = LINK;
209: state = CHECK;
210: break;
211:
212: case 'o':
213: check_pointer = "cation";
214: ok_state = LOCATION;
215: state = CHECK;
216: break;
217:
218: default:
219: state = UNKNOWN;
220: break;
221: }
222: ptr++;
223: break;
224:
225: case CONTENT:
226: switch (TOLOWER(*ptr)) {
227: case 'e':
228: check_pointer = "ncoding";
229: ok_state = CONTENT_ENCODING;
230: state = CHECK;
231: break;
232:
233: case 'l':
234: state = CONTENTLETTER_L;
235: break;
236:
237: case 't':
238: state = CONTENTLETTER_T;
239: break;
240:
241: default:
242: state = UNKNOWN;
243: break;
244: }
245: ptr++;
2.1 timbl 246: break;
2.14 frystyk 247:
2.18 frystyk 248: case CONTENTLETTER_L:
249: switch (TOLOWER(*ptr)) {
250: case 'a':
251: check_pointer = "nguage";
252: ok_state = CONTENT_LANGUAGE;
253: state = CHECK;
254: break;
255:
256: case 'e':
257: check_pointer = "ngth";
258: ok_state = CONTENT_LENGTH;
259: state = CHECK;
260: break;
261:
262: default:
263: state = UNKNOWN;
264: break;
265: }
266: ptr++;
2.14 frystyk 267: break;
268:
2.18 frystyk 269: case CONTENTLETTER_T:
270: switch (TOLOWER(*ptr)) {
271: case 'r':
272: check_pointer = "ansfer-encoding";
273: ok_state = CONTENT_TRANSFER_ENCODING;
274: state = CHECK;
275: break;
276:
277: case 'y':
278: check_pointer = "pe";
279: ok_state = CONTENT_TYPE;
280: state = CHECK;
281: break;
282:
283: default:
284: state = UNKNOWN;
285: break;
286: }
287: ptr++;
2.14 frystyk 288: break;
289:
2.18 frystyk 290: case CHECK: /* Check against string */
291: while (TOLOWER(*ptr) == *(check_pointer)++) ptr++;
292: if (!*--check_pointer) {
293: state = ok_state;
294: while (*ptr && (WHITE(*ptr) || *ptr==':')) /* Spool to value */
295: ptr++;
296: } else
297: state = UNKNOWN;
2.14 frystyk 298: break;
299:
2.18 frystyk 300: case ALLOW:
2.1 timbl 301: {
2.18 frystyk 302: while ((value = HTNextField(&ptr)) != NULL) {
303: char *lc = value;
304: HTMethod new_method;
305: while ((*lc = TOUPPER(*lc))) lc++;;
306: if ((new_method = HTMethod_enum(value)) != METHOD_INVALID)
307: anchor->methods += new_method;
2.2 timbl 308: }
2.1 timbl 309: }
2.18 frystyk 310: if (STREAM_TRACE)
311: fprintf(TDEST, "MIMEParser.. Methods allowed: %d\n",
312: anchor->methods);
313: state = JUNK_LINE;
2.1 timbl 314: break;
2.18 frystyk 315:
316: case AUTHENTICATE:
317: if ((value = HTNextField(&ptr)) != NULL) {
318: StrAllocCopy(request->WWWAAScheme, value);
319: if ((value = HTNextField(&ptr)) != NULL) {
320: StrAllocCopy(request->WWWAARealm, value);
2.14 frystyk 321: }
2.1 timbl 322: }
2.18 frystyk 323: state = JUNK_LINE;
324: break;
325:
326: case CONTENT_ENCODING:
327: if ((value = HTNextField(&ptr)) != NULL) {
328: char *lc = value;
329: while ((*lc = TOLOWER(*lc))) lc++;;
330: anchor->content_encoding = HTAtom_for(value);
331: }
332: state = JUNK_LINE;
333: break;
334:
335: case CONTENT_LANGUAGE:
336: state = UNKNOWN;
337: break;
338:
339: case CONTENT_LENGTH:
340: if ((value = HTNextField(&ptr)) != NULL)
341: anchor->content_length = atol(value);
342: state = JUNK_LINE;
343: break;
344:
345: case CONTENT_TRANSFER_ENCODING:
346: if ((value = HTNextField(&ptr)) != NULL) {
347: char *lc = value;
348: while ((*lc = TOLOWER(*lc))) lc++;;
349: anchor->cte = HTAtom_for(value);
350: }
351: state = JUNK_LINE;
352: break;
353:
354: case CONTENT_TYPE:
355: if ((value = HTNextField(&ptr)) != NULL) {
356: char *lc = value;
357: while ((*lc = TOLOWER(*lc))) lc++;
358: anchor->content_type = HTAtom_for(value);
2.1 timbl 359: }
2.18 frystyk 360: state = JUNK_LINE; /* Skip charset :-( */
361: break;
362:
363: case DATE:
364: anchor->date = HTParseTime(ptr);
365: state = JUNK_LINE;
366: break;
367:
368: case DERIVED_FROM:
369: if ((value = HTNextField(&ptr)) != NULL)
370: StrAllocCopy(anchor->derived_from, value);
371: state = JUNK_LINE;
372: break;
373:
374: case EXPIRES:
375: anchor->expires = HTParseTime(ptr);
376: state = JUNK_LINE;
377: break;
378:
379: case LAST_MODIFIED:
380: anchor->last_modified = HTParseTime(ptr);
381: state = JUNK_LINE;
382: break;
383:
384: case LINK:
385: state = UNKNOWN;
386: break;
387:
388: case LOCATION:
389: if ((value = HTNextField(&ptr)) != NULL)
390: StrAllocCopy(request->redirect, value);
391: state = JUNK_LINE;
392: break;
393:
394: case PUBLIC_METHODS:
395: state = UNKNOWN;
396: break;
397:
398: case RETRY_AFTER:
2.19 ! frystyk 399: request->retry_after = HTParseTime(ptr);
! 400: state = JUNK_LINE;
2.18 frystyk 401: break;
402:
403: case TITLE: /* Can't reuse buffer as HTML version might differ */
404: if ((value = HTNextField(&ptr)) != NULL)
405: StrAllocCopy(anchor->title, value);
406: state = JUNK_LINE;
407: break;
408:
409: case URI_HEADER:
410: state = LOCATION; /* @@@ Need extended parsing */
411: break;
412:
413: case VERSION:
414: if ((value = HTNextField(&ptr)) != NULL)
415: StrAllocCopy(anchor->version, value);
416: state = JUNK_LINE;
417: break;
418:
419: case UNKNOWN:
420: if (STREAM_TRACE)
421: fprintf(TDEST,"MIMEParser.. Unknown header: `%s\'\n", header);
422: HTAnchor_addExtra(anchor, header);
423:
424: /* Fall through */
425:
426: case JUNK_LINE:
427: while (*ptr) ptr++;
428: state = BEGINNING_OF_LINE;
429: break;
2.1 timbl 430: }
2.18 frystyk 431: }
432:
433: if (STREAM_TRACE)
434: fprintf(TDEST, "MIMEParser.. Media type %s is converted to %s\n",
435: HTAtom_name(anchor->content_type),
436: HTAtom_name(me->target_format));
437: if ((me->target = HTStreamStack(anchor->content_type,
438: me->target_format, me->target,
439: me->request, YES)) == NULL) {
440: if (STREAM_TRACE)
441: fprintf(TDEST, "MIMEParser.. Can't convert media type\n");
442: me->target = HTBlackHole();
443: }
444: anchor->header_parsed = YES;
2.1 timbl 445: }
446:
447:
2.18 frystyk 448: /*
449: ** Header is terminated by CRCR, LFLF, CRLFLF, CRLFCRLF
450: ** Folding is either of CF LWS, LF LWS, CRLF LWS
451: */
452: PRIVATE int HTMIME_put_block ARGS3(HTStream *, me, CONST char *, b, int, l)
453: {
454: while (!me->transparent && l-- > 0) {
455: if (me->EOLstate == EOL_FCR) {
456: if (*b == CR) /* End of header */
457: parseheader(me, me->request, me->request->anchor);
458: else if (*b == LF) /* CRLF */
459: me->EOLstate = EOL_FLF;
460: else if (WHITE(*b)) { /* Folding: CR SP */
461: me->EOLstate = EOL_BEGIN;
462: HTChunkPutc(me->buffer, ' ');
463: } else { /* New line */
464: me->EOLstate = EOL_BEGIN;
465: HTChunkPutc(me->buffer, '\0');
466: HTChunkPutc(me->buffer, *b);
467: }
468: } else if (me->EOLstate == EOL_FLF) {
469: if (*b == CR) /* LF CR or CR LF CR */
470: me->EOLstate = EOL_SCR;
471: else if (*b == LF) /* End of header */
472: parseheader(me, me->request, me->request->anchor);
473: else if (WHITE(*b)) { /* Folding: LF SP or CR LF SP */
474: me->EOLstate = EOL_BEGIN;
475: HTChunkPutc(me->buffer, ' ');
476: } else { /* New line */
477: me->EOLstate = EOL_BEGIN;
478: HTChunkPutc(me->buffer, '\0');
479: HTChunkPutc(me->buffer, *b);
480: }
481: } else if (me->EOLstate == EOL_SCR) {
482: if (*b==CR || *b==LF) /* End of header */
483: parseheader(me, me->request, me->request->anchor);
484: else if (WHITE(*b)) { /* Folding: LF CR SP or CR LF CR SP */
485: me->EOLstate = EOL_BEGIN;
486: HTChunkPutc(me->buffer, ' ');
487: } else { /* New line */
488: me->EOLstate = EOL_BEGIN;
489: HTChunkPutc(me->buffer, '\0');
490: HTChunkPutc(me->buffer, *b);
491: }
492: } else if (*b == CR) {
493: me->EOLstate = EOL_FCR;
494: } else if (*b == LF) {
495: me->EOLstate = EOL_FLF; /* Line found */
496: } else
497: HTChunkPutc(me->buffer, *b);
498: b++;
499: }
500: if (l > 0) /* Anything left? */
501: return (*me->target->isa->put_block)(me->target, b, l);
502: return HT_OK;
503: }
504:
505:
506: /* Character handling
507: ** ------------------
508: */
509: PRIVATE int HTMIME_put_character ARGS2(HTStream *, me, CONST char, c)
510: {
511: return HTMIME_put_block(me, &c, 1);
512: }
513:
2.1 timbl 514:
515: /* String handling
516: ** ---------------
517: */
2.18 frystyk 518: PRIVATE int HTMIME_put_string ARGS2(HTStream *, me, CONST char *, s)
2.1 timbl 519: {
2.18 frystyk 520: return HTMIME_put_block(me, s, (int) strlen(s));
2.1 timbl 521: }
522:
523:
2.18 frystyk 524: /* Flush an stream object
525: ** ---------------------
2.1 timbl 526: */
2.18 frystyk 527: PRIVATE int HTMIME_flush ARGS1(HTStream *, me)
2.1 timbl 528: {
2.18 frystyk 529: return (*me->target->isa->flush)(me->target);
2.1 timbl 530: }
531:
2.18 frystyk 532: /* Free a stream object
533: ** --------------------
2.1 timbl 534: */
2.14 frystyk 535: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 536: {
2.18 frystyk 537: int status = HT_OK;
538: if (me->target)
539: status = (*me->target->isa->_free)(me->target);
2.19 ! frystyk 540: HTChunkFree(me->buffer);
2.1 timbl 541: free(me);
2.18 frystyk 542: return status;
2.1 timbl 543: }
544:
545: /* End writing
546: */
2.14 frystyk 547: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 548: {
2.18 frystyk 549: int status = HT_ERROR;
550: if (me->target)
551: status = (*me->target->isa->abort)(me->target, e);
2.6 timbl 552: free(me);
2.18 frystyk 553: return status;
2.1 timbl 554: }
555:
556:
557:
558: /* Structured Object Class
559: ** -----------------------
560: */
2.6 timbl 561: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 562: {
563: "MIMEParser",
2.18 frystyk 564: HTMIME_flush,
2.1 timbl 565: HTMIME_free,
2.6 timbl 566: HTMIME_abort,
567: HTMIME_put_character,
568: HTMIME_put_string,
2.18 frystyk 569: HTMIME_put_block
2.1 timbl 570: };
571:
572:
573: /* Subclass-specific Methods
574: ** -------------------------
575: */
2.7 timbl 576: PUBLIC HTStream* HTMIMEConvert ARGS5(
577: HTRequest *, request,
578: void *, param,
579: HTFormat, input_format,
580: HTFormat, output_format,
581: HTStream *, output_stream)
2.1 timbl 582: {
583: HTStream* me;
2.18 frystyk 584: if ((me=(HTStream *) calloc(1, sizeof(* me))) == NULL)
585: outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 586: me->isa = &HTMIME;
2.18 frystyk 587: me->request = request;
588: me->target = output_stream;
589: me->target_format = output_format;
590: me->buffer = HTChunkCreate(512);
591: me->EOLstate = EOL_BEGIN;
2.1 timbl 592: return me;
593: }
Webmaster