Annotation of libwww/Library/src/HTMIME.c, revision 2.7
2.1 timbl 1: /* MIME Message Parse HTMIME.c
2: ** ==================
3: **
4: ** This is RFC 1341-specific code.
5: ** The input stream pushed into this parser is assumed to be
6: ** stripped on CRs, ie lines end with LF, not CR LF.
7: ** (It is easy to change this except for the body part where
8: ** conversion can be slow.)
9: **
10: ** History:
11: ** Feb 92 Written Tim Berners-Lee, CERN
12: **
13: */
14: #include "HTMIME.h" /* Implemented here */
15: #include "HTAlert.h"
16:
17:
18: /* MIME Object
19: ** -----------
20: */
21:
22: typedef enum _MIME_state {
2.6 timbl 23: MIME_TRANSPARENT, /* put straight through to target ASAP! */
2.1 timbl 24: BEGINNING_OF_LINE,
25: CONTENT_T,
26: CONTENT_TRANSFER_ENCODING,
27: CONTENT_TYPE,
28: SKIP_GET_VALUE, /* Skip space then get value */
29: GET_VALUE, /* Get value till white space */
30: JUNK_LINE, /* Ignore the rest of this folded line */
31: NEWLINE, /* Just found a LF .. maybe continuation */
32: CHECK, /* check against check_pointer */
2.6 timbl 33: MIME_NET_ASCII, /* Translate from net ascii */
2.3 timbl 34: MIME_IGNORE /* ignore entire file */
35: /* TRANSPARENT and IGNORE are defined as stg else in _WINDOWS */
2.1 timbl 36: } MIME_state;
37:
38: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
39: struct _HTStream {
40: CONST HTStreamClass * isa;
41:
2.6 timbl 42: BOOL net_ascii; /* Is input net ascii? */
2.1 timbl 43: MIME_state state; /* current state */
44: MIME_state if_ok; /* got this state if match */
45: MIME_state field; /* remember which field */
46: MIME_state fold_state; /* state on a fold */
47: CONST char * check_pointer; /* checking input */
48:
49: char * value_pointer; /* storing values */
50: char value[VALUE_SIZE];
51:
52: HTStream * sink; /* Given on creation */
2.7 ! timbl 53: HTRequest * request; /* Given on creation */
2.1 timbl 54:
55: char * boundary; /* For multipart */
56:
57: HTFormat encoding; /* Content-Transfer-Encoding */
58: HTFormat format; /* Content-Type */
59: HTStream * target; /* While writing out */
60: HTStreamClass targetClass;
61:
62: HTAtom * targetRep; /* Converting into? */
63: };
64:
65:
66: /*_________________________________________________________________________
67: **
68: ** A C T I O N R O U T I N E S
69: */
70:
71: /* Character handling
72: ** ------------------
73: **
74: ** This is a FSM parser which is tolerant as it can be of all
75: ** syntax errors. It ignores field names it does not understand,
76: ** and resynchronises on line beginnings.
77: */
78:
79: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
80: {
2.6 timbl 81: if (me->state == MIME_TRANSPARENT) {
82: (*me->targetClass.put_character)(me->target, c);/* MUST BE FAST */
83: return;
84: }
85:
86: /* This slightly simple conversion just strips CR and turns LF to
87: ** newline. On unix LF is \n but on Mac \n is CR for example.
88: ** See NetToText for an implementation which preserves single CR or LF.
89: */
90: if (me->net_ascii) {
91: c = FROMASCII(c);
92: if (c == CR) return;
93: else if (c == LF) c = '\n';
94: }
95:
2.1 timbl 96: switch(me->state) {
97:
2.3 timbl 98: case MIME_IGNORE:
2.2 timbl 99: return;
2.6 timbl 100:
101: case MIME_TRANSPARENT: /* Not reached see above */
102: (*me->targetClass.put_character)(me->target, c);
103: return;
2.2 timbl 104:
2.6 timbl 105: case MIME_NET_ASCII:
106: (*me->targetClass.put_character)(me->target, c); /* MUST BE FAST */
2.1 timbl 107: return;
108:
109: case NEWLINE:
110: if (c != '\n' && WHITE(c)) { /* Folded line */
111: me->state = me->fold_state; /* pop state before newline */
112: break;
113: }
114:
115: /* else Falls through */
116:
117: case BEGINNING_OF_LINE:
118: switch(c) {
119: case 'c':
120: case 'C':
121: me->check_pointer = "ontent-t";
122: me->if_ok = CONTENT_T;
123: me->state = CHECK;
124: break;
125: case '\n': /* Blank line: End of Header! */
126: {
127: if (TRACE) fprintf(stderr,
2.2 timbl 128: "HTMIME: MIME content type is %s, converting to %s\n",
129: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.7 ! timbl 130: me->target = HTStreamStack(me->targetRep, me->request);
2.2 timbl 131: if (!me->target) {
132: if (TRACE) fprintf(stderr, "MIME: Can't translate! ** \n");
133: me->target = me->sink; /* Cheat */
134: }
135: if (me->target) {
136: me->targetClass = *me->target->isa;
2.1 timbl 137: /* Check for encoding and select state from there @@ */
138:
2.3 timbl 139: me->state = MIME_TRANSPARENT; /* From now push straigh through */
2.2 timbl 140: } else {
2.3 timbl 141: me->state = MIME_IGNORE; /* What else to do? */
2.2 timbl 142: }
2.1 timbl 143: }
144: break;
145:
146: default:
147: goto bad_field_name;
148: break;
149:
150: } /* switch on character */
151: break;
152:
153: case CHECK: /* Check against string */
154: if (TOLOWER(c) == *(me->check_pointer)++) {
155: if (!*me->check_pointer) me->state = me->if_ok;
156: } else { /* Error */
157: if (TRACE) fprintf(stderr,
2.5 timbl 158: "HTMIME: Bad character `%c' found where `%s' expected\n",
2.1 timbl 159: c, me->check_pointer - 1);
160: goto bad_field_name;
161: }
162: break;
163:
164: case CONTENT_T:
165: switch(c) {
166: case 'r':
167: case 'R':
168: me->check_pointer = "ansfer-encoding:";
169: me->if_ok = CONTENT_TRANSFER_ENCODING;
170: me->state = CHECK;
171: break;
172:
173: case 'y':
174: case 'Y':
175: me->check_pointer = "pe:";
176: me->if_ok = CONTENT_TYPE;
177: me->state = CHECK;
178: break;
179:
180: default:
181: goto bad_field_name;
182:
183: } /* switch on character */
184: break;
185:
186: case CONTENT_TYPE:
187: case CONTENT_TRANSFER_ENCODING:
188: me->field = me->state; /* remember it */
189: me->state = SKIP_GET_VALUE;
190: /* Fall through! */
191: case SKIP_GET_VALUE:
192: if (c == '\n') {
193: me->fold_state = me->state;
194: me->state = NEWLINE;
195: break;
196: }
197: if (WHITE(c)) break; /* Skip white space */
198:
199: me->value_pointer = me->value;
200: me->state = GET_VALUE;
201: /* Fall through to store first character */
202:
203: case GET_VALUE:
204: if (WHITE(c)) { /* End of field */
205: *me->value_pointer = 0;
206: switch (me->field) {
207: case CONTENT_TYPE:
208: me->format = HTAtom_for(me->value);
209: break;
210: case CONTENT_TRANSFER_ENCODING:
211: me->encoding = HTAtom_for(me->value);
212: break;
213: default: /* Should never get here */
214: break;
215: }
216: } else {
217: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
218: *me->value_pointer++ = c;
219: break;
220: } else {
221: goto value_too_long;
222: }
223: }
224: /* Fall through */
225:
226: case JUNK_LINE:
227: if (c == '\n') {
228: me->state = NEWLINE;
229: me->fold_state = me->state;
230: }
231: break;
232:
233:
234: } /* switch on state*/
235:
236: return;
237:
238: value_too_long:
239: if (TRACE) fprintf(stderr,
240: "HTMIME: *** Syntax error. (string too long)\n");
241:
242: bad_field_name: /* Ignore it */
243: me->state = JUNK_LINE;
244: return;
245:
246: }
247:
248:
249:
250: /* String handling
251: ** ---------------
252: **
253: ** Strings must be smaller than this buffer size.
254: */
255: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
256: {
257: CONST char * p;
2.3 timbl 258: if (me->state == MIME_TRANSPARENT) /* Optimisation */
2.1 timbl 259: (*me->targetClass.put_string)(me->target,s);
2.3 timbl 260: else if (me->state != MIME_IGNORE)
2.1 timbl 261: for (p=s; *p; p++) HTMIME_put_character(me, *p);
262: }
263:
264:
265: /* Buffer write. Buffers can (and should!) be big.
266: ** ------------
267: */
268: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char*, s, int, l)
269: {
270: CONST char * p;
2.3 timbl 271: if (me->state == MIME_TRANSPARENT) /* Optimisation */
272: (*me->targetClass.put_block)(me->target, s, l);
2.1 timbl 273: else
274: for (p=s; p < s+l; p++) HTMIME_put_character(me, *p);
275: }
276:
277:
278:
279:
280: /* Free an HTML object
281: ** -------------------
282: **
283: */
284: PRIVATE void HTMIME_free ARGS1(HTStream *, me)
285: {
286: if (me->target) (*me->targetClass.free)(me->target);
287: free(me);
288: }
289:
290: /* End writing
291: */
292:
2.6 timbl 293: PRIVATE void HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 294: {
2.6 timbl 295: if (me->target) (*me->targetClass.abort)(me->target, e);
296: free(me);
2.1 timbl 297: }
298:
299:
300:
301: /* Structured Object Class
302: ** -----------------------
303: */
2.6 timbl 304: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 305: {
306: "MIMEParser",
307: HTMIME_free,
2.6 timbl 308: HTMIME_abort,
309: HTMIME_put_character,
310: HTMIME_put_string,
2.1 timbl 311: HTMIME_write
312: };
313:
314:
315: /* Subclass-specific Methods
316: ** -------------------------
317: */
318:
2.7 ! timbl 319: PUBLIC HTStream* HTMIMEConvert ARGS5(
! 320: HTRequest *, request,
! 321: void *, param,
! 322: HTFormat, input_format,
! 323: HTFormat, output_format,
! 324: HTStream *, output_stream)
2.1 timbl 325: {
326: HTStream* me;
327:
328: me = malloc(sizeof(*me));
329: if (me == NULL) outofmem(__FILE__, "HTML_new");
330: me->isa = &HTMIME;
331:
2.7 ! timbl 332: me->sink = output_stream;
! 333: me->request = request;
2.6 timbl 334: me->target = NULL;
335: me->state = BEGINNING_OF_LINE;
336: me->format = WWW_PLAINTEXT;
2.7 ! timbl 337: me->targetRep = output_format;
2.6 timbl 338: me->boundary = 0; /* Not set yet */
339: me->net_ascii = NO; /* Local character set */
340: return me;
341: }
342:
2.7 ! timbl 343: PUBLIC HTStream* HTNetMIME ARGS5(
! 344: HTRequest *, request,
! 345: void *, param,
! 346: HTFormat, input_format,
! 347: HTFormat, output_format,
! 348: HTStream *, output_stream)
2.6 timbl 349: {
2.7 ! timbl 350: HTStream* me = HTMIMEConvert(
! 351: request, param, input_format, output_format, output_stream);
2.6 timbl 352: if (!me) return NULL;
353:
354: me->net_ascii = YES;
2.1 timbl 355: return me;
356: }
357:
358:
Webmaster