Annotation of libwww/Library/src/HTMIME.c, revision 2.16
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.1 timbl 16: **
17: */
2.16 ! roeber 18: #include "sysdep.h"
2.9 luotonen 19: #include "HTFormat.h"
2.1 timbl 20: #include "HTAlert.h"
2.14 frystyk 21: #include "HTFWriter.h"
22: #include "HTMIME.h" /* Implemented here */
2.1 timbl 23:
2.14 frystyk 24: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
2.1 timbl 25:
26: /* MIME Object
27: ** -----------
28: */
29:
30: typedef enum _MIME_state {
2.14 frystyk 31: MIME_TRANSPARENT, /* put straight through to target ASAP! */
32: BEGINNING_OF_LINE,
33: CONTENT_T,
34: CONTENT_TRANSFER_ENCODING,
35: CONTENT_TYPE,
36: AA,
37: AUTHENTICATE,
38: PROTECTION,
39: LOCATION,
40: SKIP_GET_VALUE, /* Skip space then get value */
41: GET_VALUE, /* Get value till white space */
42: JUNK_LINE, /* Ignore the rest of this folded line */
43: NEWLINE, /* Just found a LF .. maybe continuation */
44: CHECK /* check against check_pointer */
2.1 timbl 45: } MIME_state;
46:
47: struct _HTStream {
48: CONST HTStreamClass * isa;
49:
2.6 timbl 50: BOOL net_ascii; /* Is input net ascii? */
2.1 timbl 51: MIME_state state; /* current state */
52: MIME_state if_ok; /* got this state if match */
53: MIME_state field; /* remember which field */
54: MIME_state fold_state; /* state on a fold */
55: CONST char * check_pointer; /* checking input */
56:
57: char * value_pointer; /* storing values */
58: char value[VALUE_SIZE];
2.14 frystyk 59: int value_num; /* What token are we reading */
2.1 timbl 60:
61: HTStream * sink; /* Given on creation */
2.7 timbl 62: HTRequest * request; /* Given on creation */
2.1 timbl 63:
64: char * boundary; /* For multipart */
65:
66: HTFormat encoding; /* Content-Transfer-Encoding */
67: HTFormat format; /* Content-Type */
68: HTStream * target; /* While writing out */
69: HTAtom * targetRep; /* Converting into? */
70: };
71:
72:
73: /*_________________________________________________________________________
74: **
75: ** A C T I O N R O U T I N E S
76: */
77:
78: /* Character handling
79: ** ------------------
80: **
81: ** This is a FSM parser which is tolerant as it can be of all
82: ** syntax errors. It ignores field names it does not understand,
83: ** and resynchronises on line beginnings.
84: */
85:
86: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
87: {
2.6 timbl 88: /* This slightly simple conversion just strips CR and turns LF to
89: ** newline. On unix LF is \n but on Mac \n is CR for example.
90: ** See NetToText for an implementation which preserves single CR or LF.
91: */
92: if (me->net_ascii) {
93: c = FROMASCII(c);
94: if (c == CR) return;
95: else if (c == LF) c = '\n';
96: }
97:
2.1 timbl 98: switch(me->state) {
99:
2.14 frystyk 100: case MIME_TRANSPARENT:
101: (*me->target->isa->put_character)(me->target, c);
102: break;
2.1 timbl 103:
104: case NEWLINE:
105: if (c != '\n' && WHITE(c)) { /* Folded line */
106: me->state = me->fold_state; /* pop state before newline */
107: break;
108: }
2.14 frystyk 109: me->value_num = 0;
2.1 timbl 110:
111: /* else Falls through */
112:
113: case BEGINNING_OF_LINE:
114: switch(c) {
2.14 frystyk 115: case 'c':
116: case 'C':
2.1 timbl 117: me->check_pointer = "ontent-t";
118: me->if_ok = CONTENT_T;
119: me->state = CHECK;
120: break;
2.14 frystyk 121:
122: case 'l':
123: case 'L':
124: me->check_pointer = "ocation:";
125: me->if_ok = LOCATION;
126: me->state = CHECK;
127: break;
128:
129: case 'u':
130: case 'U':
131: me->check_pointer = "ri:";
132: me->if_ok = LOCATION;
133: me->state = CHECK;
134: break;
135:
136: case 'w':
137: case 'W':
138: me->check_pointer = "ww-";
139: me->if_ok = AA;
140: me->state = CHECK;
141: break;
142:
143: case '\n': /* Blank line: End of Header! */
2.1 timbl 144: {
145: if (TRACE) fprintf(stderr,
2.2 timbl 146: "HTMIME: MIME content type is %s, converting to %s\n",
147: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.14 frystyk 148: me->target = HTStreamStack(me->format, me->targetRep,
149: me->sink, me->request, NO);
2.2 timbl 150: if (me->target) {
2.14 frystyk 151: me->state = MIME_TRANSPARENT;
2.2 timbl 152: } else {
2.14 frystyk 153: if (TRACE)
154: fprintf(stderr, "MIMEParser.. Can't convert to output format\n");
155: me->target = me->sink; /* Cheat */
2.2 timbl 156: }
2.1 timbl 157: }
158: break;
159:
160: default:
161: goto bad_field_name;
162: break;
163:
164: } /* switch on character */
165: break;
166:
167: case CHECK: /* Check against string */
168: if (TOLOWER(c) == *(me->check_pointer)++) {
169: if (!*me->check_pointer) me->state = me->if_ok;
170: } else { /* Error */
171: if (TRACE) fprintf(stderr,
2.5 timbl 172: "HTMIME: Bad character `%c' found where `%s' expected\n",
2.1 timbl 173: c, me->check_pointer - 1);
174: goto bad_field_name;
175: }
176: break;
177:
178: case CONTENT_T:
179: switch(c) {
180: case 'r':
181: case 'R':
182: me->check_pointer = "ansfer-encoding:";
183: me->if_ok = CONTENT_TRANSFER_ENCODING;
184: me->state = CHECK;
185: break;
186:
187: case 'y':
188: case 'Y':
189: me->check_pointer = "pe:";
190: me->if_ok = CONTENT_TYPE;
191: me->state = CHECK;
192: break;
193:
194: default:
195: goto bad_field_name;
196:
197: } /* switch on character */
198: break;
2.14 frystyk 199:
200: case AA:
201: switch(c) {
202: case 'a':
203: case 'A':
204: me->check_pointer = "uthenticate:";
205: me->if_ok = AUTHENTICATE;
206: me->state = CHECK;
207: break;
208:
209: case 'p':
210: case 'P':
211: me->check_pointer = "rotection-template:";
212: me->if_ok = PROTECTION;
213: me->state = CHECK;
214: break;
215:
216: default:
217: goto bad_field_name;
218: }
219: break;
220:
221: case AUTHENTICATE:
222: me->field = me->state; /* remember it */
223: me->value_pointer = me->value;
224: me->state = GET_VALUE;
225: break;
226:
2.1 timbl 227: case CONTENT_TYPE:
228: case CONTENT_TRANSFER_ENCODING:
2.14 frystyk 229: case LOCATION:
230: case PROTECTION:
2.1 timbl 231: me->field = me->state; /* remember it */
232: me->state = SKIP_GET_VALUE;
2.14 frystyk 233:
2.1 timbl 234: /* Fall through! */
235: case SKIP_GET_VALUE:
236: if (c == '\n') {
237: me->fold_state = me->state;
238: me->state = NEWLINE;
239: break;
240: }
241: if (WHITE(c)) break; /* Skip white space */
242: me->value_pointer = me->value;
243: me->state = GET_VALUE;
244: /* Fall through to store first character */
245:
246: case GET_VALUE:
247: if (WHITE(c)) { /* End of field */
248: *me->value_pointer = 0;
2.14 frystyk 249: me->value_num++;
250: if (!*me->value) /* Ignore empty field */
251: break;
2.1 timbl 252: switch (me->field) {
253: case CONTENT_TYPE:
254: me->format = HTAtom_for(me->value);
255: break;
256: case CONTENT_TRANSFER_ENCODING:
257: me->encoding = HTAtom_for(me->value);
258: break;
2.14 frystyk 259: case LOCATION:
260: StrAllocCopy(me->request->redirect, me->value);
261: break;
262: case AUTHENTICATE:
263: if (me->value_num == 1) {
264: StrAllocCopy(me->request->WWWAAScheme, me->value);
265: me->value_pointer = me->value;
266: } else if (me->value_num == 2) {
267: StrAllocCopy(me->request->WWWAARealm, me->value);
268: }
269: break;
270: case PROTECTION:
271: StrAllocCopy(me->request->WWWprotection, me->value);
272: break;
2.1 timbl 273: default: /* Should never get here */
274: break;
275: }
276: } else {
277: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
278: *me->value_pointer++ = c;
279: break;
280: } else {
281: goto value_too_long;
282: }
283: }
284: /* Fall through */
285:
286: case JUNK_LINE:
287: if (c == '\n') {
288: me->state = NEWLINE;
289: me->fold_state = me->state;
290: }
291: break;
292:
293:
294: } /* switch on state*/
295:
296: return;
297:
298: value_too_long:
299: if (TRACE) fprintf(stderr,
300: "HTMIME: *** Syntax error. (string too long)\n");
301:
302: bad_field_name: /* Ignore it */
303: me->state = JUNK_LINE;
304: return;
305:
306: }
307:
308:
309:
310: /* String handling
311: ** ---------------
312: **
313: ** Strings must be smaller than this buffer size.
314: */
315: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
316: {
2.14 frystyk 317: while (me->state != MIME_TRANSPARENT && *s)
318: HTMIME_put_character(me, *s++);
319: if (*s)
320: (*me->target->isa->put_string)(me->target, s);
2.1 timbl 321: }
322:
323:
324: /* Buffer write. Buffers can (and should!) be big.
325: ** ------------
326: */
2.14 frystyk 327: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char *, b, int, l)
2.1 timbl 328: {
2.14 frystyk 329: while (me->state != MIME_TRANSPARENT && l-- > 0)
330: HTMIME_put_character(me, *b++);
331: if (l > 0)
332: (*me->target->isa->put_block)(me->target, b, l);
2.1 timbl 333: }
334:
335:
336: /* Free an HTML object
337: ** -------------------
338: **
339: */
2.14 frystyk 340: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 341: {
2.14 frystyk 342: if (me->target) (*me->target->isa->_free)(me->target);
2.1 timbl 343: free(me);
2.14 frystyk 344: return 0;
2.1 timbl 345: }
346:
347: /* End writing
348: */
349:
2.14 frystyk 350: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 351: {
2.14 frystyk 352: if (me->target) (*me->target->isa->abort)(me->target, e);
2.6 timbl 353: free(me);
2.14 frystyk 354: return EOF;
2.1 timbl 355: }
356:
357:
358:
359: /* Structured Object Class
360: ** -----------------------
361: */
2.6 timbl 362: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 363: {
364: "MIMEParser",
365: HTMIME_free,
2.6 timbl 366: HTMIME_abort,
367: HTMIME_put_character,
368: HTMIME_put_string,
2.1 timbl 369: HTMIME_write
370: };
371:
372:
373: /* Subclass-specific Methods
374: ** -------------------------
375: */
376:
2.7 timbl 377: PUBLIC HTStream* HTMIMEConvert ARGS5(
378: HTRequest *, request,
379: void *, param,
380: HTFormat, input_format,
381: HTFormat, output_format,
382: HTStream *, output_stream)
2.1 timbl 383: {
384: HTStream* me;
385:
2.12 frystyk 386: me = (HTStream*)calloc(1, sizeof(*me));
2.14 frystyk 387: if (me == NULL) outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 388: me->isa = &HTMIME;
389:
2.7 timbl 390: me->sink = output_stream;
391: me->request = request;
2.6 timbl 392: me->state = BEGINNING_OF_LINE;
393: me->format = WWW_PLAINTEXT;
2.7 timbl 394: me->targetRep = output_format;
2.6 timbl 395: return me;
396: }
397:
2.7 timbl 398: PUBLIC HTStream* HTNetMIME ARGS5(
399: HTRequest *, request,
400: void *, param,
401: HTFormat, input_format,
402: HTFormat, output_format,
403: HTStream *, output_stream)
2.6 timbl 404: {
2.7 timbl 405: HTStream* me = HTMIMEConvert(
406: request, param, input_format, output_format, output_stream);
2.6 timbl 407: if (!me) return NULL;
408:
409: me->net_ascii = YES;
2.1 timbl 410: return me;
411: }
412:
413:
Webmaster