Annotation of libwww/Library/src/HTMIME.c, revision 2.14
2.1 timbl 1: /* MIME Message Parse HTMIME.c
2: ** ==================
3: **
4: ** This is RFC 1341-specific code.
5: ** The input stream pushed into this parser is assumed to be
6: ** stripped on CRs, ie lines end with LF, not CR LF.
7: ** (It is easy to change this except for the body part where
8: ** conversion can be slow.)
9: **
10: ** History:
11: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 duns 12: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.1 timbl 13: **
14: */
2.9 luotonen 15: #include "HTFormat.h"
2.1 timbl 16: #include "HTAlert.h"
2.14 ! frystyk 17: #include "HTFWriter.h"
! 18: #include "HTMIME.h" /* Implemented here */
2.1 timbl 19:
2.14 ! frystyk 20: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
2.1 timbl 21:
22: /* MIME Object
23: ** -----------
24: */
25:
26: typedef enum _MIME_state {
2.14 ! frystyk 27: MIME_TRANSPARENT, /* put straight through to target ASAP! */
! 28: BEGINNING_OF_LINE,
! 29: CONTENT_T,
! 30: CONTENT_TRANSFER_ENCODING,
! 31: CONTENT_TYPE,
! 32: AA,
! 33: AUTHENTICATE,
! 34: PROTECTION,
! 35: LOCATION,
! 36: SKIP_GET_VALUE, /* Skip space then get value */
! 37: GET_VALUE, /* Get value till white space */
! 38: JUNK_LINE, /* Ignore the rest of this folded line */
! 39: NEWLINE, /* Just found a LF .. maybe continuation */
! 40: CHECK /* check against check_pointer */
2.1 timbl 41: } MIME_state;
42:
43: struct _HTStream {
44: CONST HTStreamClass * isa;
45:
2.6 timbl 46: BOOL net_ascii; /* Is input net ascii? */
2.1 timbl 47: MIME_state state; /* current state */
48: MIME_state if_ok; /* got this state if match */
49: MIME_state field; /* remember which field */
50: MIME_state fold_state; /* state on a fold */
51: CONST char * check_pointer; /* checking input */
52:
53: char * value_pointer; /* storing values */
54: char value[VALUE_SIZE];
2.14 ! frystyk 55: int value_num; /* What token are we reading */
2.1 timbl 56:
57: HTStream * sink; /* Given on creation */
2.7 timbl 58: HTRequest * request; /* Given on creation */
2.1 timbl 59:
60: char * boundary; /* For multipart */
61:
62: HTFormat encoding; /* Content-Transfer-Encoding */
63: HTFormat format; /* Content-Type */
64: HTStream * target; /* While writing out */
65: HTAtom * targetRep; /* Converting into? */
66: };
67:
68:
69: /*_________________________________________________________________________
70: **
71: ** A C T I O N R O U T I N E S
72: */
73:
74: /* Character handling
75: ** ------------------
76: **
77: ** This is a FSM parser which is tolerant as it can be of all
78: ** syntax errors. It ignores field names it does not understand,
79: ** and resynchronises on line beginnings.
80: */
81:
82: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
83: {
2.6 timbl 84: /* This slightly simple conversion just strips CR and turns LF to
85: ** newline. On unix LF is \n but on Mac \n is CR for example.
86: ** See NetToText for an implementation which preserves single CR or LF.
87: */
88: if (me->net_ascii) {
89: c = FROMASCII(c);
90: if (c == CR) return;
91: else if (c == LF) c = '\n';
92: }
93:
2.1 timbl 94: switch(me->state) {
95:
2.14 ! frystyk 96: case MIME_TRANSPARENT:
! 97: (*me->target->isa->put_character)(me->target, c);
! 98: break;
2.1 timbl 99:
100: case NEWLINE:
101: if (c != '\n' && WHITE(c)) { /* Folded line */
102: me->state = me->fold_state; /* pop state before newline */
103: break;
104: }
2.14 ! frystyk 105: me->value_num = 0;
2.1 timbl 106:
107: /* else Falls through */
108:
109: case BEGINNING_OF_LINE:
110: switch(c) {
2.14 ! frystyk 111: case 'c':
! 112: case 'C':
2.1 timbl 113: me->check_pointer = "ontent-t";
114: me->if_ok = CONTENT_T;
115: me->state = CHECK;
116: break;
2.14 ! frystyk 117:
! 118: case 'l':
! 119: case 'L':
! 120: me->check_pointer = "ocation:";
! 121: me->if_ok = LOCATION;
! 122: me->state = CHECK;
! 123: break;
! 124:
! 125: case 'u':
! 126: case 'U':
! 127: me->check_pointer = "ri:";
! 128: me->if_ok = LOCATION;
! 129: me->state = CHECK;
! 130: break;
! 131:
! 132: case 'w':
! 133: case 'W':
! 134: me->check_pointer = "ww-";
! 135: me->if_ok = AA;
! 136: me->state = CHECK;
! 137: break;
! 138:
! 139: case '\n': /* Blank line: End of Header! */
2.1 timbl 140: {
141: if (TRACE) fprintf(stderr,
2.2 timbl 142: "HTMIME: MIME content type is %s, converting to %s\n",
143: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.14 ! frystyk 144: me->target = HTStreamStack(me->format, me->targetRep,
! 145: me->sink, me->request, NO);
2.2 timbl 146: if (me->target) {
2.14 ! frystyk 147: me->state = MIME_TRANSPARENT;
2.2 timbl 148: } else {
2.14 ! frystyk 149: if (TRACE)
! 150: fprintf(stderr, "MIMEParser.. Can't convert to output format\n");
! 151: me->target = me->sink; /* Cheat */
2.2 timbl 152: }
2.1 timbl 153: }
154: break;
155:
156: default:
157: goto bad_field_name;
158: break;
159:
160: } /* switch on character */
161: break;
162:
163: case CHECK: /* Check against string */
164: if (TOLOWER(c) == *(me->check_pointer)++) {
165: if (!*me->check_pointer) me->state = me->if_ok;
166: } else { /* Error */
167: if (TRACE) fprintf(stderr,
2.5 timbl 168: "HTMIME: Bad character `%c' found where `%s' expected\n",
2.1 timbl 169: c, me->check_pointer - 1);
170: goto bad_field_name;
171: }
172: break;
173:
174: case CONTENT_T:
175: switch(c) {
176: case 'r':
177: case 'R':
178: me->check_pointer = "ansfer-encoding:";
179: me->if_ok = CONTENT_TRANSFER_ENCODING;
180: me->state = CHECK;
181: break;
182:
183: case 'y':
184: case 'Y':
185: me->check_pointer = "pe:";
186: me->if_ok = CONTENT_TYPE;
187: me->state = CHECK;
188: break;
189:
190: default:
191: goto bad_field_name;
192:
193: } /* switch on character */
194: break;
2.14 ! frystyk 195:
! 196: case AA:
! 197: switch(c) {
! 198: case 'a':
! 199: case 'A':
! 200: me->check_pointer = "uthenticate:";
! 201: me->if_ok = AUTHENTICATE;
! 202: me->state = CHECK;
! 203: break;
! 204:
! 205: case 'p':
! 206: case 'P':
! 207: me->check_pointer = "rotection-template:";
! 208: me->if_ok = PROTECTION;
! 209: me->state = CHECK;
! 210: break;
! 211:
! 212: default:
! 213: goto bad_field_name;
! 214: }
! 215: break;
! 216:
! 217: case AUTHENTICATE:
! 218: me->field = me->state; /* remember it */
! 219: me->value_pointer = me->value;
! 220: me->state = GET_VALUE;
! 221: break;
! 222:
2.1 timbl 223: case CONTENT_TYPE:
224: case CONTENT_TRANSFER_ENCODING:
2.14 ! frystyk 225: case LOCATION:
! 226: case PROTECTION:
2.1 timbl 227: me->field = me->state; /* remember it */
228: me->state = SKIP_GET_VALUE;
2.14 ! frystyk 229:
2.1 timbl 230: /* Fall through! */
231: case SKIP_GET_VALUE:
232: if (c == '\n') {
233: me->fold_state = me->state;
234: me->state = NEWLINE;
235: break;
236: }
237: if (WHITE(c)) break; /* Skip white space */
238: me->value_pointer = me->value;
239: me->state = GET_VALUE;
240: /* Fall through to store first character */
241:
242: case GET_VALUE:
243: if (WHITE(c)) { /* End of field */
244: *me->value_pointer = 0;
2.14 ! frystyk 245: me->value_num++;
! 246: if (!*me->value) /* Ignore empty field */
! 247: break;
2.1 timbl 248: switch (me->field) {
249: case CONTENT_TYPE:
250: me->format = HTAtom_for(me->value);
251: break;
252: case CONTENT_TRANSFER_ENCODING:
253: me->encoding = HTAtom_for(me->value);
254: break;
2.14 ! frystyk 255: case LOCATION:
! 256: StrAllocCopy(me->request->redirect, me->value);
! 257: break;
! 258: case AUTHENTICATE:
! 259: if (me->value_num == 1) {
! 260: StrAllocCopy(me->request->WWWAAScheme, me->value);
! 261: me->value_pointer = me->value;
! 262: } else if (me->value_num == 2) {
! 263: StrAllocCopy(me->request->WWWAARealm, me->value);
! 264: }
! 265: break;
! 266: case PROTECTION:
! 267: StrAllocCopy(me->request->WWWprotection, me->value);
! 268: break;
2.1 timbl 269: default: /* Should never get here */
270: break;
271: }
272: } else {
273: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
274: *me->value_pointer++ = c;
275: break;
276: } else {
277: goto value_too_long;
278: }
279: }
280: /* Fall through */
281:
282: case JUNK_LINE:
283: if (c == '\n') {
284: me->state = NEWLINE;
285: me->fold_state = me->state;
286: }
287: break;
288:
289:
290: } /* switch on state*/
291:
292: return;
293:
294: value_too_long:
295: if (TRACE) fprintf(stderr,
296: "HTMIME: *** Syntax error. (string too long)\n");
297:
298: bad_field_name: /* Ignore it */
299: me->state = JUNK_LINE;
300: return;
301:
302: }
303:
304:
305:
306: /* String handling
307: ** ---------------
308: **
309: ** Strings must be smaller than this buffer size.
310: */
311: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
312: {
2.14 ! frystyk 313: while (me->state != MIME_TRANSPARENT && *s)
! 314: HTMIME_put_character(me, *s++);
! 315: if (*s)
! 316: (*me->target->isa->put_string)(me->target, s);
2.1 timbl 317: }
318:
319:
320: /* Buffer write. Buffers can (and should!) be big.
321: ** ------------
322: */
2.14 ! frystyk 323: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char *, b, int, l)
2.1 timbl 324: {
2.14 ! frystyk 325: while (me->state != MIME_TRANSPARENT && l-- > 0)
! 326: HTMIME_put_character(me, *b++);
! 327: if (l > 0)
! 328: (*me->target->isa->put_block)(me->target, b, l);
2.1 timbl 329: }
330:
331:
332: /* Free an HTML object
333: ** -------------------
334: **
335: */
2.14 ! frystyk 336: PRIVATE int HTMIME_free ARGS1(HTStream *, me)
2.1 timbl 337: {
2.14 ! frystyk 338: if (me->target) (*me->target->isa->_free)(me->target);
2.1 timbl 339: free(me);
2.14 ! frystyk 340: return 0;
2.1 timbl 341: }
342:
343: /* End writing
344: */
345:
2.14 ! frystyk 346: PRIVATE int HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 347: {
2.14 ! frystyk 348: if (me->target) (*me->target->isa->abort)(me->target, e);
2.6 timbl 349: free(me);
2.14 ! frystyk 350: return EOF;
2.1 timbl 351: }
352:
353:
354:
355: /* Structured Object Class
356: ** -----------------------
357: */
2.6 timbl 358: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 359: {
360: "MIMEParser",
361: HTMIME_free,
2.6 timbl 362: HTMIME_abort,
363: HTMIME_put_character,
364: HTMIME_put_string,
2.1 timbl 365: HTMIME_write
366: };
367:
368:
369: /* Subclass-specific Methods
370: ** -------------------------
371: */
372:
2.7 timbl 373: PUBLIC HTStream* HTMIMEConvert ARGS5(
374: HTRequest *, request,
375: void *, param,
376: HTFormat, input_format,
377: HTFormat, output_format,
378: HTStream *, output_stream)
2.1 timbl 379: {
380: HTStream* me;
381:
2.12 frystyk 382: me = (HTStream*)calloc(1, sizeof(*me));
2.14 ! frystyk 383: if (me == NULL) outofmem(__FILE__, "HTMIMEConvert");
2.1 timbl 384: me->isa = &HTMIME;
385:
2.7 timbl 386: me->sink = output_stream;
387: me->request = request;
2.6 timbl 388: me->state = BEGINNING_OF_LINE;
389: me->format = WWW_PLAINTEXT;
2.7 timbl 390: me->targetRep = output_format;
2.6 timbl 391: return me;
392: }
393:
2.7 timbl 394: PUBLIC HTStream* HTNetMIME ARGS5(
395: HTRequest *, request,
396: void *, param,
397: HTFormat, input_format,
398: HTFormat, output_format,
399: HTStream *, output_stream)
2.6 timbl 400: {
2.7 timbl 401: HTStream* me = HTMIMEConvert(
402: request, param, input_format, output_format, output_stream);
2.6 timbl 403: if (!me) return NULL;
404:
405: me->net_ascii = YES;
2.1 timbl 406: return me;
407: }
408:
409:
Webmaster