Annotation of libwww/Library/src/HTMIME.c, revision 2.15
2.15 ! frystyk 1: /* HTMIME.c
! 2: ** MIME MESSAGE PARSE
! 3: **
! 4: ** (c) COPYRIGHT CERN 1994.
! 5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.1 timbl 16: **
17: */
2.9 luotonen 18: #include "HTFormat.h"
2.1 timbl 19: #include "HTAlert.h"
2.14 frystyk 20: #include "HTFWriter.h"
21: #include "HTMIME.h" /* Implemented here */
2.1 timbl 22:
2.14 frystyk 23: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
2.1 timbl 24:
25: /* MIME Object
26: ** -----------
27: */
28:
29: typedef enum _MIME_state {
2.14 frystyk 30: MIME_TRANSPARENT, /* put straight through to target ASAP! */
31: BEGINNING_OF_LINE,
32: CONTENT_T,
33: CONTENT_TRANSFER_ENCODING,
34: CONTENT_TYPE,
35: AA,
36: AUTHENTICATE,
37: PROTECTION,
38: LOCATION,
39: SKIP_GET_VALUE, /* Skip space then get value */
40: GET_VALUE, /* Get value till white space */
41: JUNK_LINE, /* Ignore the rest of this folded line */
42: NEWLINE, /* Just found a LF .. maybe continuation */
43: CHECK /* check against check_pointer */
2.1 timbl 44: } MIME_state;
45:
46: struct _HTStream {
47: CONST HTStreamClass * isa;
48:
2.6 timbl 49: BOOL net_ascii; /* Is input net ascii? */
2.1 timbl 50: MIME_state state; /* current state */
51: MIME_state if_ok; /* got this state if match */
52: MIME_state field; /* remember which field */
53: MIME_state fold_state; /* state on a fold */
54: CONST char * check_pointer; /* checking input */
55:
56: char * value_pointer; /* storing values */
57: char value[VALUE_SIZE];
2.14 frystyk 58: int value_num; /* What token are we reading */
2.1 timbl 59:
60: HTStream * sink; /* Given on creation */
2.7 timbl 61: HTRequest * request; /* Given on creation */
2.1 timbl 62:
63: char * boundary; /* For multipart */
64:
65: HTFormat encoding; /* Content-Transfer-Encoding */
66: HTFormat format; /* Content-Type */
67: HTStream * target; /* While writing out */
68: HTAtom * targetRep; /* Converting into? */
69: };
70:
71:
72: /*_________________________________________________________________________
73: **
74: ** A C T I O N R O U T I N E S
75: */
76:
77: /* Character handling
78: ** ------------------
79: **
80: ** This is a FSM parser which is tolerant as it can be of all
81: ** syntax errors. It ignores field names it does not understand,
82: ** and resynchronises on line beginnings.
83: */
84:
85: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
86: {
2.6 timbl 87: /* This slightly simple conversion just strips CR and turns LF to
88: ** newline. On unix LF is \n but on Mac \n is CR for example.
89: ** See NetToText for an implementation which preserves single CR or LF.
90: */
91: if (me->net_ascii) {
92: c = FROMASCII(c);
93: if (c == CR) return;
94: else if (c == LF) c = '\n';
95: }
96:
2.1 timbl 97: switch(me->state) {
98:
2.14 frystyk 99: case MIME_TRANSPARENT:
100: (*me->target->isa->put_character)(me->target, c);
101: break;
2.1 timbl 102:
103: case NEWLINE:
104: if (c != '\n' && WHITE(c)) { /* Folded line */
105: me->state = me->fold_state; /* pop state before newline */
106: break;
107: }
2.14 frystyk 108: me->value_num = 0;
2.1 timbl 109:
110: /* else Falls through */
111:
112: case BEGINNING_OF_LINE:
113: switch(c) {
2.14 frystyk 114: case 'c':
115: case 'C':
2.1 timbl 116: me->check_pointer = "ontent-t";
117: me->if_ok = CONTENT_T;
118: me->state = CHECK;
119: break;
2.14 frystyk 120:
121: case 'l':
122: case 'L':
123: me->check_pointer = "ocation:";
124: me->if_ok = LOCATION;
125: me->state = CHECK;
126: break;
127:
128: case 'u':
129: case 'U':
130: me->check_pointer = "ri:";
131: me->if_ok = LOCATION;
132: me->state = CHECK;
133: break;
134:
135: case 'w':
136: case 'W':
137: me->check_pointer = "ww-";
138: me->if_ok = AA;
139: me->state = CHECK;
140: break;
141:
142: case '\n': /* Blank line: End of Header! */
2.1 timbl 143: {
144: if (TRACE) fprintf(stderr,
2.2 timbl 145: "HTMIME: MIME content type is %s, converting to %s\n",
146: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.14 frystyk 147: me->target = HTStreamStack(me->format, me->targetRep,
148: me->sink, me->request, NO);
2.2 timbl 149: if (me->target) {
2.14 frystyk 150: me->state = MIME_TRANSPARENT;
2.2 timbl 151: } else {
2.14 frystyk 152: if (TRACE)
153: fprintf(stderr, "MIMEParser.. Can't convert to output format\n");
154: me->target = me->sink; /* Cheat */
2.2 timbl 155: }
2.1 timbl 156: }
157: break;
158:
159: default:
160: goto bad_field_name;
161: break;
162:
163: } /* switch on character */
164: break;
165:
166: case CHECK: /* Check against string */
167: if (TOLOWER(c) == *(me->check_pointer)++) {
168: if (!*me->check_pointer) me->state = me->if_ok;
169: } else { /* Error */
170: if (TRACE) fprintf(stderr,
2.5 timbl 171: "HTMIME: Bad character `%c' found where `%s' expected\n",
2.1 timbl 172: c, me->check_pointer - 1);
173: goto bad_field_name;
174: }
175: break;
176:
177: case CONTENT_T:
178: switch(c) {
179: case 'r':
180: case 'R':
181: me->check_pointer = "ansfer-encoding:";
182: me->if_ok = CONTENT_TRANSFER_ENCODING;
183: me->state = CHECK;
184: break;
185:
186: case 'y':
187: case 'Y':
188: me->check_pointer = "pe:";
189: me->if_ok = CONTENT_TYPE;
190: me->state = CHECK;
191: break;
192:
193: default:
194: goto bad_field_name;
195:
196: } /* switch on character */
197: break;
2.14 frystyk 198:
199: case AA:
200: switch(c) {
201: case 'a':
202: case 'A':
203: me->check_pointer = "uthenticate:";
204: me->if_ok = AUTHENTICATE;
205: me->state = CHECK;
206: break;
207:
208: case 'p':
209: case 'P':
210: me->check_pointer = "rotection-template:";
211: me->if_ok = PROTECTION;
212: me->state = CHECK;
213: break;
214:
215: default:
216: goto bad_field_name;
217: }
218: break;
219:
220: case AUTHENTICATE:
221: me->field = me->state; /* remember it */
222: me->value_pointer = me->value;
223: me->state = GET_VALUE;
224: break;
225:
2.1 timbl 226: case CONTENT_TYPE:
227: case CONTENT_TRANSFER_ENCODING:
2.14 frystyk 228: case LOCATION:
229: case PROTECTION:
2.1 timbl 230: me->field = me->state; /* remember it */
231: me->state = SKIP_GET_VALUE;
2.14 frystyk 232:
2.1 timbl 233: /* Fall through! */
234: case SKIP_GET_VALUE:
235: if (c == '\n') {
236: me->fold_state = me->state;
237: me->state = NEWLINE;
238: break;
239: }
240: if (WHITE(c)) break; /* Skip white space */
241: me->value_pointer = me->value;
242: me->state = GET_VALUE;
243: /* Fall through to store first character */
244:
245: case GET_VALUE:
246: if (WHITE(c)) { /* End of field */
247: *me->value_pointer = 0;
2.14 frystyk 248: me->value_num++;
249: if (!*me->value) /* Ignore empty field */
250: break;
2.1 timbl 251: switch (me->field) {
252: case CONTENT_TYPE:
253: me->format = HTAtom_for(me->value);
254: break;
255: case CONTENT_TRANSFER_ENCODING:
256: me->encoding = HTAtom_for(me->value);
257: break;
2.14 frystyk 258: case LOCATION:
259: StrAllocCopy(me->request->redirect, me->value);
260: break;
261: case AUTHENTICATE:
262: if (me->value_num == 1) {
263: StrAllocCopy(me->request->WWWAAScheme, me->value);
264: me->value_pointer = me->value;
265: } else if (me->value_num == 2) {
266: StrAllocCopy(me->request->WWWAARealm, me->value);
267: }
268: break;
269: case PROTECTION:
270: StrAllocCopy(me->request->WWWprotection, me->value);
271: break;
2.1 timbl 272: default: /* Should never get here */
273: break;
274: }
275: } else {
276: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
277: *me->value_pointer++ = c;
278: break;
279: } else {
280: goto value_too_long;
281: }
282: }
283: /* Fall through */
284:
285: case JUNK_LINE:
286: if (c == '\n') {
287: me->state = NEWLINE;
288: me->fold_state = me->state;
289: }
290: break;
291:
292:
293: } /* switch on state*/
294:
295: return;
296:
297: value_too_long:
298: if (TRACE) fprintf(stderr,
299: "HTMIME: *** Syntax error. (string too long)\n");
300:
301: bad_field_name: /* Ignore it */
302: me->state = JUNK_LINE;
303: return;
304:
305: }
306:
307:
308:
309: /* String handling
310: ** ---------------
311: **
312: ** Strings must be smaller than this buffer size.
313: */
314: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
315: {
2.14 frystyk 316: while (me->state != MIME_TRANSPARENT && *s)
317: HTMIME_put_character(me, *s++);
318: if (*s)
319: (*me->target->isa->put_string)(me->target, s);
2.1 timbl 320: }
321:
322:
323: /* Buffer write. Buffers can (and should!) be big.
324: ** ------------
325: */
2.14 frystyk 326: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char *, b, int, l)
2.1 timbl 327: {
2.14 frystyk 328: while (me->state != MIME_TRANSPARENT && l-- > 0)
329: HTMIME_put_character(me, *b++);
330: if (l > 0)
331: (*me->target->isa->put_block)(me->target, b, l);
2.1 timbl 332: }
333:
334:
335: /* Free an HTML object
336: ** -------------------
337: **
338: */
2.14 frystyk 339: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 340: {
2.14 frystyk 341: if (me->target) (*me->target->isa->_free)(me->target);
2.1 timbl 342: free(me);
2.14 frystyk 343: return 0;
2.1 timbl 344: }
345:
346: /* End writing
347: */
348:
2.14 frystyk 349: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 350: {
2.14 frystyk 351: if (me->target) (*me->target->isa->abort)(me->target, e);
2.6 timbl 352: free(me);
2.14 frystyk 353: return EOF;
2.1 timbl 354: }
355:
356:
357:
358: /* Structured Object Class
359: ** -----------------------
360: */
2.6 timbl 361: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 362: {
363: "MIMEParser",
364: HTMIME_free,
2.6 timbl 365: HTMIME_abort,
366: HTMIME_put_character,
367: HTMIME_put_string,
2.1 timbl 368: HTMIME_write
369: };
370:
371:
372: /* Subclass-specific Methods
373: ** -------------------------
374: */
375:
2.7 timbl 376: PUBLIC HTStream* HTMIMEConvert ARGS5(
377: HTRequest *, request,
378: void *, param,
379: HTFormat, input_format,
380: HTFormat, output_format,
381: HTStream *, output_stream)
2.1 timbl 382: {
383: HTStream* me;
384:
2.12 frystyk 385: me = (HTStream*)calloc(1, sizeof(*me));
2.14 frystyk 386: if (me == NULL) outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 387: me->isa = &HTMIME;
388:
2.7 timbl 389: me->sink = output_stream;
390: me->request = request;
2.6 timbl 391: me->state = BEGINNING_OF_LINE;
392: me->format = WWW_PLAINTEXT;
2.7 timbl 393: me->targetRep = output_format;
2.6 timbl 394: return me;
395: }
396:
2.7 timbl 397: PUBLIC HTStream* HTNetMIME ARGS5(
398: HTRequest *, request,
399: void *, param,
400: HTFormat, input_format,
401: HTFormat, output_format,
402: HTStream *, output_stream)
2.6 timbl 403: {
2.7 timbl 404: HTStream* me = HTMIMEConvert(
405: request, param, input_format, output_format, output_stream);
2.6 timbl 406: if (!me) return NULL;
407:
408: me->net_ascii = YES;
2.1 timbl 409: return me;
410: }
411:
412:
Webmaster