Annotation of libwww/Library/src/HTMIME.c, revision 2.15.2.1
2.15 frystyk 1: /* HTMIME.c
2: ** MIME MESSAGE PARSE
3: **
4: ** (c) COPYRIGHT CERN 1994.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.1 timbl 6: **
7: ** This is RFC 1341-specific code.
8: ** The input stream pushed into this parser is assumed to be
9: ** stripped on CRs, ie lines end with LF, not CR LF.
10: ** (It is easy to change this except for the body part where
11: ** conversion can be slow.)
12: **
13: ** History:
14: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 15: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.1 timbl 16: **
17: */
2.15.2.1! frystyk 18:
! 19: /* Library include files */
! 20: #include "tcp.h"
! 21: #include "HTUtils.h"
! 22: #include "HTString.h"
2.9 luotonen 23: #include "HTFormat.h"
2.1 timbl 24: #include "HTAlert.h"
2.15.2.1! frystyk 25: #include "HTFWrite.h"
2.14 frystyk 26: #include "HTMIME.h" /* Implemented here */
2.1 timbl 27:
2.14 frystyk 28: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
2.1 timbl 29:
30: /* MIME Object
31: ** -----------
32: */
33:
34: typedef enum _MIME_state {
2.14 frystyk 35: MIME_TRANSPARENT, /* put straight through to target ASAP! */
36: BEGINNING_OF_LINE,
37: CONTENT_T,
38: CONTENT_TRANSFER_ENCODING,
39: CONTENT_TYPE,
40: AA,
41: AUTHENTICATE,
42: PROTECTION,
43: LOCATION,
44: SKIP_GET_VALUE, /* Skip space then get value */
45: GET_VALUE, /* Get value till white space */
46: JUNK_LINE, /* Ignore the rest of this folded line */
47: NEWLINE, /* Just found a LF .. maybe continuation */
48: CHECK /* check against check_pointer */
2.1 timbl 49: } MIME_state;
50:
51: struct _HTStream {
52: CONST HTStreamClass * isa;
53:
2.6 timbl 54: BOOL net_ascii; /* Is input net ascii? */
2.1 timbl 55: MIME_state state; /* current state */
56: MIME_state if_ok; /* got this state if match */
57: MIME_state field; /* remember which field */
58: MIME_state fold_state; /* state on a fold */
59: CONST char * check_pointer; /* checking input */
60:
61: char * value_pointer; /* storing values */
62: char value[VALUE_SIZE];
2.14 frystyk 63: int value_num; /* What token are we reading */
2.1 timbl 64:
65: HTStream * sink; /* Given on creation */
2.7 timbl 66: HTRequest * request; /* Given on creation */
2.1 timbl 67:
68: char * boundary; /* For multipart */
69:
70: HTFormat encoding; /* Content-Transfer-Encoding */
71: HTFormat format; /* Content-Type */
72: HTStream * target; /* While writing out */
73: HTAtom * targetRep; /* Converting into? */
74: };
75:
76:
77: /*_________________________________________________________________________
78: **
79: ** A C T I O N R O U T I N E S
80: */
81:
82: /* Character handling
83: ** ------------------
84: **
85: ** This is a FSM parser which is tolerant as it can be of all
86: ** syntax errors. It ignores field names it does not understand,
87: ** and resynchronises on line beginnings.
88: */
89:
90: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
91: {
2.6 timbl 92: /* This slightly simple conversion just strips CR and turns LF to
93: ** newline. On unix LF is \n but on Mac \n is CR for example.
94: ** See NetToText for an implementation which preserves single CR or LF.
95: */
96: if (me->net_ascii) {
97: c = FROMASCII(c);
98: if (c == CR) return;
99: else if (c == LF) c = '\n';
100: }
101:
2.1 timbl 102: switch(me->state) {
103:
2.14 frystyk 104: case MIME_TRANSPARENT:
105: (*me->target->isa->put_character)(me->target, c);
106: break;
2.1 timbl 107:
108: case NEWLINE:
109: if (c != '\n' && WHITE(c)) { /* Folded line */
110: me->state = me->fold_state; /* pop state before newline */
111: break;
112: }
2.14 frystyk 113: me->value_num = 0;
2.1 timbl 114:
115: /* else Falls through */
116:
117: case BEGINNING_OF_LINE:
118: switch(c) {
2.14 frystyk 119: case 'c':
120: case 'C':
2.1 timbl 121: me->check_pointer = "ontent-t";
122: me->if_ok = CONTENT_T;
123: me->state = CHECK;
124: break;
2.14 frystyk 125:
126: case 'l':
127: case 'L':
128: me->check_pointer = "ocation:";
129: me->if_ok = LOCATION;
130: me->state = CHECK;
131: break;
132:
133: case 'u':
134: case 'U':
135: me->check_pointer = "ri:";
136: me->if_ok = LOCATION;
137: me->state = CHECK;
138: break;
139:
140: case 'w':
141: case 'W':
142: me->check_pointer = "ww-";
143: me->if_ok = AA;
144: me->state = CHECK;
145: break;
146:
147: case '\n': /* Blank line: End of Header! */
2.1 timbl 148: {
2.15.2.1! frystyk 149: if (TRACE) fprintf(TDEST,
2.2 timbl 150: "HTMIME: MIME content type is %s, converting to %s\n",
151: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.14 frystyk 152: me->target = HTStreamStack(me->format, me->targetRep,
153: me->sink, me->request, NO);
2.2 timbl 154: if (me->target) {
2.14 frystyk 155: me->state = MIME_TRANSPARENT;
2.2 timbl 156: } else {
2.14 frystyk 157: if (TRACE)
2.15.2.1! frystyk 158: fprintf(TDEST, "MIMEParser.. Can't convert to output format\n");
2.14 frystyk 159: me->target = me->sink; /* Cheat */
2.2 timbl 160: }
2.1 timbl 161: }
162: break;
163:
164: default:
165: goto bad_field_name;
166: break;
167:
168: } /* switch on character */
169: break;
170:
171: case CHECK: /* Check against string */
172: if (TOLOWER(c) == *(me->check_pointer)++) {
173: if (!*me->check_pointer) me->state = me->if_ok;
174: } else { /* Error */
2.15.2.1! frystyk 175: if (TRACE) fprintf(TDEST,
2.5 timbl 176: "HTMIME: Bad character `%c' found where `%s' expected\n",
2.1 timbl 177: c, me->check_pointer - 1);
178: goto bad_field_name;
179: }
180: break;
181:
182: case CONTENT_T:
183: switch(c) {
184: case 'r':
185: case 'R':
186: me->check_pointer = "ansfer-encoding:";
187: me->if_ok = CONTENT_TRANSFER_ENCODING;
188: me->state = CHECK;
189: break;
190:
191: case 'y':
192: case 'Y':
193: me->check_pointer = "pe:";
194: me->if_ok = CONTENT_TYPE;
195: me->state = CHECK;
196: break;
197:
198: default:
199: goto bad_field_name;
200:
201: } /* switch on character */
202: break;
2.14 frystyk 203:
204: case AA:
205: switch(c) {
206: case 'a':
207: case 'A':
208: me->check_pointer = "uthenticate:";
209: me->if_ok = AUTHENTICATE;
210: me->state = CHECK;
211: break;
212:
213: case 'p':
214: case 'P':
215: me->check_pointer = "rotection-template:";
216: me->if_ok = PROTECTION;
217: me->state = CHECK;
218: break;
219:
220: default:
221: goto bad_field_name;
222: }
223: break;
224:
225: case AUTHENTICATE:
226: me->field = me->state; /* remember it */
227: me->value_pointer = me->value;
228: me->state = GET_VALUE;
229: break;
230:
2.1 timbl 231: case CONTENT_TYPE:
232: case CONTENT_TRANSFER_ENCODING:
2.14 frystyk 233: case LOCATION:
234: case PROTECTION:
2.1 timbl 235: me->field = me->state; /* remember it */
236: me->state = SKIP_GET_VALUE;
2.14 frystyk 237:
2.1 timbl 238: /* Fall through! */
239: case SKIP_GET_VALUE:
240: if (c == '\n') {
241: me->fold_state = me->state;
242: me->state = NEWLINE;
243: break;
244: }
245: if (WHITE(c)) break; /* Skip white space */
246: me->value_pointer = me->value;
247: me->state = GET_VALUE;
248: /* Fall through to store first character */
249:
250: case GET_VALUE:
251: if (WHITE(c)) { /* End of field */
252: *me->value_pointer = 0;
2.14 frystyk 253: me->value_num++;
254: if (!*me->value) /* Ignore empty field */
255: break;
2.1 timbl 256: switch (me->field) {
257: case CONTENT_TYPE:
258: me->format = HTAtom_for(me->value);
259: break;
260: case CONTENT_TRANSFER_ENCODING:
261: me->encoding = HTAtom_for(me->value);
262: break;
2.14 frystyk 263: case LOCATION:
264: StrAllocCopy(me->request->redirect, me->value);
265: break;
266: case AUTHENTICATE:
267: if (me->value_num == 1) {
268: StrAllocCopy(me->request->WWWAAScheme, me->value);
269: me->value_pointer = me->value;
270: } else if (me->value_num == 2) {
271: StrAllocCopy(me->request->WWWAARealm, me->value);
272: }
273: break;
274: case PROTECTION:
275: StrAllocCopy(me->request->WWWprotection, me->value);
276: break;
2.1 timbl 277: default: /* Should never get here */
278: break;
279: }
280: } else {
281: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
282: *me->value_pointer++ = c;
283: break;
284: } else {
285: goto value_too_long;
286: }
287: }
288: /* Fall through */
289:
290: case JUNK_LINE:
291: if (c == '\n') {
292: me->state = NEWLINE;
293: me->fold_state = me->state;
294: }
295: break;
296:
297:
298: } /* switch on state*/
299:
300: return;
301:
302: value_too_long:
2.15.2.1! frystyk 303: if (TRACE) fprintf(TDEST,
2.1 timbl 304: "HTMIME: *** Syntax error. (string too long)\n");
305:
306: bad_field_name: /* Ignore it */
307: me->state = JUNK_LINE;
308: return;
309:
310: }
311:
312:
313:
314: /* String handling
315: ** ---------------
316: **
317: ** Strings must be smaller than this buffer size.
318: */
319: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
320: {
2.14 frystyk 321: while (me->state != MIME_TRANSPARENT && *s)
322: HTMIME_put_character(me, *s++);
323: if (*s)
324: (*me->target->isa->put_string)(me->target, s);
2.1 timbl 325: }
326:
327:
328: /* Buffer write. Buffers can (and should!) be big.
329: ** ------------
330: */
2.14 frystyk 331: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char *, b, int, l)
2.1 timbl 332: {
2.14 frystyk 333: while (me->state != MIME_TRANSPARENT && l-- > 0)
334: HTMIME_put_character(me, *b++);
335: if (l > 0)
336: (*me->target->isa->put_block)(me->target, b, l);
2.1 timbl 337: }
338:
339:
340: /* Free an HTML object
341: ** -------------------
342: **
343: */
2.14 frystyk 344: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 345: {
2.14 frystyk 346: if (me->target) (*me->target->isa->_free)(me->target);
2.1 timbl 347: free(me);
2.14 frystyk 348: return 0;
2.1 timbl 349: }
350:
351: /* End writing
352: */
353:
2.14 frystyk 354: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 355: {
2.14 frystyk 356: if (me->target) (*me->target->isa->abort)(me->target, e);
2.6 timbl 357: free(me);
2.14 frystyk 358: return EOF;
2.1 timbl 359: }
360:
361:
362:
363: /* Structured Object Class
364: ** -----------------------
365: */
2.6 timbl 366: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 367: {
368: "MIMEParser",
369: HTMIME_free,
2.6 timbl 370: HTMIME_abort,
371: HTMIME_put_character,
372: HTMIME_put_string,
2.1 timbl 373: HTMIME_write
374: };
375:
376:
377: /* Subclass-specific Methods
378: ** -------------------------
379: */
380:
2.7 timbl 381: PUBLIC HTStream* HTMIMEConvert ARGS5(
382: HTRequest *, request,
383: void *, param,
384: HTFormat, input_format,
385: HTFormat, output_format,
386: HTStream *, output_stream)
2.1 timbl 387: {
388: HTStream* me;
389:
2.12 frystyk 390: me = (HTStream*)calloc(1, sizeof(*me));
2.14 frystyk 391: if (me == NULL) outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 392: me->isa = &HTMIME;
393:
2.7 timbl 394: me->sink = output_stream;
395: me->request = request;
2.6 timbl 396: me->state = BEGINNING_OF_LINE;
397: me->format = WWW_PLAINTEXT;
2.7 timbl 398: me->targetRep = output_format;
2.6 timbl 399: return me;
400: }
401:
2.7 timbl 402: PUBLIC HTStream* HTNetMIME ARGS5(
403: HTRequest *, request,
404: void *, param,
405: HTFormat, input_format,
406: HTFormat, output_format,
407: HTStream *, output_stream)
2.6 timbl 408: {
2.7 timbl 409: HTStream* me = HTMIMEConvert(
410: request, param, input_format, output_format, output_stream);
2.6 timbl 411: if (!me) return NULL;
412:
413: me->net_ascii = YES;
2.1 timbl 414: return me;
415: }
416:
417:
Webmaster