Annotation of libwww/Library/src/HTMIME.c, revision 2.13
2.1 timbl 1: /* MIME Message Parse HTMIME.c
2: ** ==================
3: **
4: ** This is RFC 1341-specific code.
5: ** The input stream pushed into this parser is assumed to be
6: ** stripped on CRs, ie lines end with LF, not CR LF.
7: ** (It is easy to change this except for the body part where
8: ** conversion can be slow.)
9: **
10: ** History:
11: ** Feb 92 Written Tim Berners-Lee, CERN
2.13 ! duns 12: ** 8 Jul 94 FM Insulate free() from _free structure element.
2.1 timbl 13: **
14: */
2.9 luotonen 15: #include "HTFormat.h"
2.1 timbl 16: #include "HTMIME.h" /* Implemented here */
17: #include "HTAlert.h"
18:
19:
20: /* MIME Object
21: ** -----------
22: */
23:
24: typedef enum _MIME_state {
2.6 timbl 25: MIME_TRANSPARENT, /* put straight through to target ASAP! */
2.1 timbl 26: BEGINNING_OF_LINE,
27: CONTENT_T,
28: CONTENT_TRANSFER_ENCODING,
29: CONTENT_TYPE,
30: SKIP_GET_VALUE, /* Skip space then get value */
31: GET_VALUE, /* Get value till white space */
32: JUNK_LINE, /* Ignore the rest of this folded line */
33: NEWLINE, /* Just found a LF .. maybe continuation */
34: CHECK, /* check against check_pointer */
2.6 timbl 35: MIME_NET_ASCII, /* Translate from net ascii */
2.3 timbl 36: MIME_IGNORE /* ignore entire file */
37: /* TRANSPARENT and IGNORE are defined as stg else in _WINDOWS */
2.1 timbl 38: } MIME_state;
39:
40: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
41: struct _HTStream {
42: CONST HTStreamClass * isa;
43:
2.6 timbl 44: BOOL net_ascii; /* Is input net ascii? */
2.1 timbl 45: MIME_state state; /* current state */
46: MIME_state if_ok; /* got this state if match */
47: MIME_state field; /* remember which field */
48: MIME_state fold_state; /* state on a fold */
49: CONST char * check_pointer; /* checking input */
50:
51: char * value_pointer; /* storing values */
52: char value[VALUE_SIZE];
53:
54: HTStream * sink; /* Given on creation */
2.7 timbl 55: HTRequest * request; /* Given on creation */
2.1 timbl 56:
57: char * boundary; /* For multipart */
58:
59: HTFormat encoding; /* Content-Transfer-Encoding */
60: HTFormat format; /* Content-Type */
61: HTStream * target; /* While writing out */
62: HTStreamClass targetClass;
63:
64: HTAtom * targetRep; /* Converting into? */
65: };
66:
67:
68: /*_________________________________________________________________________
69: **
70: ** A C T I O N R O U T I N E S
71: */
72:
73: /* Character handling
74: ** ------------------
75: **
76: ** This is a FSM parser which is tolerant as it can be of all
77: ** syntax errors. It ignores field names it does not understand,
78: ** and resynchronises on line beginnings.
79: */
80:
81: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
82: {
2.6 timbl 83: if (me->state == MIME_TRANSPARENT) {
84: (*me->targetClass.put_character)(me->target, c);/* MUST BE FAST */
85: return;
86: }
87:
88: /* This slightly simple conversion just strips CR and turns LF to
89: ** newline. On unix LF is \n but on Mac \n is CR for example.
90: ** See NetToText for an implementation which preserves single CR or LF.
91: */
92: if (me->net_ascii) {
93: c = FROMASCII(c);
94: if (c == CR) return;
95: else if (c == LF) c = '\n';
96: }
97:
2.1 timbl 98: switch(me->state) {
99:
2.3 timbl 100: case MIME_IGNORE:
2.2 timbl 101: return;
2.6 timbl 102:
103: case MIME_TRANSPARENT: /* Not reached see above */
104: (*me->targetClass.put_character)(me->target, c);
105: return;
2.2 timbl 106:
2.6 timbl 107: case MIME_NET_ASCII:
108: (*me->targetClass.put_character)(me->target, c); /* MUST BE FAST */
2.1 timbl 109: return;
110:
111: case NEWLINE:
112: if (c != '\n' && WHITE(c)) { /* Folded line */
113: me->state = me->fold_state; /* pop state before newline */
114: break;
115: }
116:
117: /* else Falls through */
118:
119: case BEGINNING_OF_LINE:
120: switch(c) {
121: case 'c':
122: case 'C':
123: me->check_pointer = "ontent-t";
124: me->if_ok = CONTENT_T;
125: me->state = CHECK;
126: break;
127: case '\n': /* Blank line: End of Header! */
128: {
129: if (TRACE) fprintf(stderr,
2.2 timbl 130: "HTMIME: MIME content type is %s, converting to %s\n",
131: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.11 luotonen 132: me->target = HTStreamStack(me->format, me->request, NO);
2.2 timbl 133: if (!me->target) {
134: if (TRACE) fprintf(stderr, "MIME: Can't translate! ** \n");
135: me->target = me->sink; /* Cheat */
136: }
137: if (me->target) {
138: me->targetClass = *me->target->isa;
2.1 timbl 139: /* Check for encoding and select state from there @@ */
140:
2.3 timbl 141: me->state = MIME_TRANSPARENT; /* From now push straigh through */
2.2 timbl 142: } else {
2.3 timbl 143: me->state = MIME_IGNORE; /* What else to do? */
2.2 timbl 144: }
2.1 timbl 145: }
146: break;
147:
148: default:
149: goto bad_field_name;
150: break;
151:
152: } /* switch on character */
153: break;
154:
155: case CHECK: /* Check against string */
156: if (TOLOWER(c) == *(me->check_pointer)++) {
157: if (!*me->check_pointer) me->state = me->if_ok;
158: } else { /* Error */
159: if (TRACE) fprintf(stderr,
2.5 timbl 160: "HTMIME: Bad character `%c' found where `%s' expected\n",
2.1 timbl 161: c, me->check_pointer - 1);
162: goto bad_field_name;
163: }
164: break;
165:
166: case CONTENT_T:
167: switch(c) {
168: case 'r':
169: case 'R':
170: me->check_pointer = "ansfer-encoding:";
171: me->if_ok = CONTENT_TRANSFER_ENCODING;
172: me->state = CHECK;
173: break;
174:
175: case 'y':
176: case 'Y':
177: me->check_pointer = "pe:";
178: me->if_ok = CONTENT_TYPE;
179: me->state = CHECK;
180: break;
181:
182: default:
183: goto bad_field_name;
184:
185: } /* switch on character */
186: break;
187:
188: case CONTENT_TYPE:
189: case CONTENT_TRANSFER_ENCODING:
190: me->field = me->state; /* remember it */
191: me->state = SKIP_GET_VALUE;
192: /* Fall through! */
193: case SKIP_GET_VALUE:
194: if (c == '\n') {
195: me->fold_state = me->state;
196: me->state = NEWLINE;
197: break;
198: }
199: if (WHITE(c)) break; /* Skip white space */
200:
201: me->value_pointer = me->value;
202: me->state = GET_VALUE;
203: /* Fall through to store first character */
204:
205: case GET_VALUE:
206: if (WHITE(c)) { /* End of field */
207: *me->value_pointer = 0;
208: switch (me->field) {
209: case CONTENT_TYPE:
210: me->format = HTAtom_for(me->value);
211: break;
212: case CONTENT_TRANSFER_ENCODING:
213: me->encoding = HTAtom_for(me->value);
214: break;
215: default: /* Should never get here */
216: break;
217: }
218: } else {
219: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
220: *me->value_pointer++ = c;
221: break;
222: } else {
223: goto value_too_long;
224: }
225: }
226: /* Fall through */
227:
228: case JUNK_LINE:
229: if (c == '\n') {
230: me->state = NEWLINE;
231: me->fold_state = me->state;
232: }
233: break;
234:
235:
236: } /* switch on state*/
237:
238: return;
239:
240: value_too_long:
241: if (TRACE) fprintf(stderr,
242: "HTMIME: *** Syntax error. (string too long)\n");
243:
244: bad_field_name: /* Ignore it */
245: me->state = JUNK_LINE;
246: return;
247:
248: }
249:
250:
251:
252: /* String handling
253: ** ---------------
254: **
255: ** Strings must be smaller than this buffer size.
256: */
257: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
258: {
259: CONST char * p;
2.3 timbl 260: if (me->state == MIME_TRANSPARENT) /* Optimisation */
2.1 timbl 261: (*me->targetClass.put_string)(me->target,s);
2.3 timbl 262: else if (me->state != MIME_IGNORE)
2.1 timbl 263: for (p=s; *p; p++) HTMIME_put_character(me, *p);
264: }
265:
266:
267: /* Buffer write. Buffers can (and should!) be big.
268: ** ------------
269: */
270: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char*, s, int, l)
271: {
272: CONST char * p;
2.3 timbl 273: if (me->state == MIME_TRANSPARENT) /* Optimisation */
274: (*me->targetClass.put_block)(me->target, s, l);
2.1 timbl 275: else
276: for (p=s; p < s+l; p++) HTMIME_put_character(me, *p);
277: }
278:
279:
280:
281:
282: /* Free an HTML object
283: ** -------------------
284: **
285: */
286: PRIVATE void HTMIME_free ARGS1(HTStream *, me)
287: {
2.13 ! duns 288: if (me->target) (*me->targetClass._free)(me->target);
2.1 timbl 289: free(me);
290: }
291:
292: /* End writing
293: */
294:
2.6 timbl 295: PRIVATE void HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 296: {
2.6 timbl 297: if (me->target) (*me->targetClass.abort)(me->target, e);
298: free(me);
2.1 timbl 299: }
300:
301:
302:
303: /* Structured Object Class
304: ** -----------------------
305: */
2.6 timbl 306: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 307: {
308: "MIMEParser",
309: HTMIME_free,
2.6 timbl 310: HTMIME_abort,
311: HTMIME_put_character,
312: HTMIME_put_string,
2.1 timbl 313: HTMIME_write
314: };
315:
316:
317: /* Subclass-specific Methods
318: ** -------------------------
319: */
320:
2.7 timbl 321: PUBLIC HTStream* HTMIMEConvert ARGS5(
322: HTRequest *, request,
323: void *, param,
324: HTFormat, input_format,
325: HTFormat, output_format,
326: HTStream *, output_stream)
2.1 timbl 327: {
328: HTStream* me;
329:
2.12 frystyk 330: me = (HTStream*)calloc(1, sizeof(*me));
2.1 timbl 331: if (me == NULL) outofmem(__FILE__, "HTML_new");
332: me->isa = &HTMIME;
333:
2.7 timbl 334: me->sink = output_stream;
335: me->request = request;
2.6 timbl 336: me->state = BEGINNING_OF_LINE;
337: me->format = WWW_PLAINTEXT;
2.7 timbl 338: me->targetRep = output_format;
2.6 timbl 339: return me;
340: }
341:
2.7 timbl 342: PUBLIC HTStream* HTNetMIME ARGS5(
343: HTRequest *, request,
344: void *, param,
345: HTFormat, input_format,
346: HTFormat, output_format,
347: HTStream *, output_stream)
2.6 timbl 348: {
2.7 timbl 349: HTStream* me = HTMIMEConvert(
350: request, param, input_format, output_format, output_stream);
2.6 timbl 351: if (!me) return NULL;
352:
353: me->net_ascii = YES;
2.1 timbl 354: return me;
355: }
356:
357:
Webmaster