Annotation of libwww/Library/src/HTMIME.c, revision 2.11
2.1 timbl 1: /* MIME Message Parse HTMIME.c
2: ** ==================
3: **
4: ** This is RFC 1341-specific code.
5: ** The input stream pushed into this parser is assumed to be
6: ** stripped on CRs, ie lines end with LF, not CR LF.
7: ** (It is easy to change this except for the body part where
8: ** conversion can be slow.)
9: **
10: ** History:
11: ** Feb 92 Written Tim Berners-Lee, CERN
12: **
13: */
2.9 luotonen 14: #include "HTFormat.h"
2.1 timbl 15: #include "HTMIME.h" /* Implemented here */
16: #include "HTAlert.h"
17:
18:
19: /* MIME Object
20: ** -----------
21: */
22:
23: typedef enum _MIME_state {
2.6 timbl 24: MIME_TRANSPARENT, /* put straight through to target ASAP! */
2.1 timbl 25: BEGINNING_OF_LINE,
26: CONTENT_T,
27: CONTENT_TRANSFER_ENCODING,
28: CONTENT_TYPE,
29: SKIP_GET_VALUE, /* Skip space then get value */
30: GET_VALUE, /* Get value till white space */
31: JUNK_LINE, /* Ignore the rest of this folded line */
32: NEWLINE, /* Just found a LF .. maybe continuation */
33: CHECK, /* check against check_pointer */
2.6 timbl 34: MIME_NET_ASCII, /* Translate from net ascii */
2.3 timbl 35: MIME_IGNORE /* ignore entire file */
36: /* TRANSPARENT and IGNORE are defined as stg else in _WINDOWS */
2.1 timbl 37: } MIME_state;
38:
39: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
40: struct _HTStream {
41: CONST HTStreamClass * isa;
42:
2.6 timbl 43: BOOL net_ascii; /* Is input net ascii? */
2.1 timbl 44: MIME_state state; /* current state */
45: MIME_state if_ok; /* got this state if match */
46: MIME_state field; /* remember which field */
47: MIME_state fold_state; /* state on a fold */
48: CONST char * check_pointer; /* checking input */
49:
50: char * value_pointer; /* storing values */
51: char value[VALUE_SIZE];
52:
53: HTStream * sink; /* Given on creation */
2.7 timbl 54: HTRequest * request; /* Given on creation */
2.1 timbl 55:
56: char * boundary; /* For multipart */
57:
58: HTFormat encoding; /* Content-Transfer-Encoding */
59: HTFormat format; /* Content-Type */
60: HTStream * target; /* While writing out */
61: HTStreamClass targetClass;
62:
63: HTAtom * targetRep; /* Converting into? */
64: };
65:
66:
67: /*_________________________________________________________________________
68: **
69: ** A C T I O N R O U T I N E S
70: */
71:
72: /* Character handling
73: ** ------------------
74: **
75: ** This is a FSM parser which is tolerant as it can be of all
76: ** syntax errors. It ignores field names it does not understand,
77: ** and resynchronises on line beginnings.
78: */
79:
80: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
81: {
2.6 timbl 82: if (me->state == MIME_TRANSPARENT) {
83: (*me->targetClass.put_character)(me->target, c);/* MUST BE FAST */
84: return;
85: }
86:
87: /* This slightly simple conversion just strips CR and turns LF to
88: ** newline. On unix LF is \n but on Mac \n is CR for example.
89: ** See NetToText for an implementation which preserves single CR or LF.
90: */
91: if (me->net_ascii) {
92: c = FROMASCII(c);
93: if (c == CR) return;
94: else if (c == LF) c = '\n';
95: }
96:
2.1 timbl 97: switch(me->state) {
98:
2.3 timbl 99: case MIME_IGNORE:
2.2 timbl 100: return;
2.6 timbl 101:
102: case MIME_TRANSPARENT: /* Not reached see above */
103: (*me->targetClass.put_character)(me->target, c);
104: return;
2.2 timbl 105:
2.6 timbl 106: case MIME_NET_ASCII:
107: (*me->targetClass.put_character)(me->target, c); /* MUST BE FAST */
2.1 timbl 108: return;
109:
110: case NEWLINE:
111: if (c != '\n' && WHITE(c)) { /* Folded line */
112: me->state = me->fold_state; /* pop state before newline */
113: break;
114: }
115:
116: /* else Falls through */
117:
118: case BEGINNING_OF_LINE:
119: switch(c) {
120: case 'c':
121: case 'C':
122: me->check_pointer = "ontent-t";
123: me->if_ok = CONTENT_T;
124: me->state = CHECK;
125: break;
126: case '\n': /* Blank line: End of Header! */
127: {
128: if (TRACE) fprintf(stderr,
2.2 timbl 129: "HTMIME: MIME content type is %s, converting to %s\n",
130: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.11 ! luotonen 131: me->target = HTStreamStack(me->format, me->request, NO);
2.2 timbl 132: if (!me->target) {
133: if (TRACE) fprintf(stderr, "MIME: Can't translate! ** \n");
134: me->target = me->sink; /* Cheat */
135: }
136: if (me->target) {
137: me->targetClass = *me->target->isa;
2.1 timbl 138: /* Check for encoding and select state from there @@ */
139:
2.3 timbl 140: me->state = MIME_TRANSPARENT; /* From now push straigh through */
2.2 timbl 141: } else {
2.3 timbl 142: me->state = MIME_IGNORE; /* What else to do? */
2.2 timbl 143: }
2.1 timbl 144: }
145: break;
146:
147: default:
148: goto bad_field_name;
149: break;
150:
151: } /* switch on character */
152: break;
153:
154: case CHECK: /* Check against string */
155: if (TOLOWER(c) == *(me->check_pointer)++) {
156: if (!*me->check_pointer) me->state = me->if_ok;
157: } else { /* Error */
158: if (TRACE) fprintf(stderr,
2.5 timbl 159: "HTMIME: Bad character `%c' found where `%s' expected\n",
2.1 timbl 160: c, me->check_pointer - 1);
161: goto bad_field_name;
162: }
163: break;
164:
165: case CONTENT_T:
166: switch(c) {
167: case 'r':
168: case 'R':
169: me->check_pointer = "ansfer-encoding:";
170: me->if_ok = CONTENT_TRANSFER_ENCODING;
171: me->state = CHECK;
172: break;
173:
174: case 'y':
175: case 'Y':
176: me->check_pointer = "pe:";
177: me->if_ok = CONTENT_TYPE;
178: me->state = CHECK;
179: break;
180:
181: default:
182: goto bad_field_name;
183:
184: } /* switch on character */
185: break;
186:
187: case CONTENT_TYPE:
188: case CONTENT_TRANSFER_ENCODING:
189: me->field = me->state; /* remember it */
190: me->state = SKIP_GET_VALUE;
191: /* Fall through! */
192: case SKIP_GET_VALUE:
193: if (c == '\n') {
194: me->fold_state = me->state;
195: me->state = NEWLINE;
196: break;
197: }
198: if (WHITE(c)) break; /* Skip white space */
199:
200: me->value_pointer = me->value;
201: me->state = GET_VALUE;
202: /* Fall through to store first character */
203:
204: case GET_VALUE:
205: if (WHITE(c)) { /* End of field */
206: *me->value_pointer = 0;
207: switch (me->field) {
208: case CONTENT_TYPE:
209: me->format = HTAtom_for(me->value);
210: break;
211: case CONTENT_TRANSFER_ENCODING:
212: me->encoding = HTAtom_for(me->value);
213: break;
214: default: /* Should never get here */
215: break;
216: }
217: } else {
218: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
219: *me->value_pointer++ = c;
220: break;
221: } else {
222: goto value_too_long;
223: }
224: }
225: /* Fall through */
226:
227: case JUNK_LINE:
228: if (c == '\n') {
229: me->state = NEWLINE;
230: me->fold_state = me->state;
231: }
232: break;
233:
234:
235: } /* switch on state*/
236:
237: return;
238:
239: value_too_long:
240: if (TRACE) fprintf(stderr,
241: "HTMIME: *** Syntax error. (string too long)\n");
242:
243: bad_field_name: /* Ignore it */
244: me->state = JUNK_LINE;
245: return;
246:
247: }
248:
249:
250:
251: /* String handling
252: ** ---------------
253: **
254: ** Strings must be smaller than this buffer size.
255: */
256: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
257: {
258: CONST char * p;
2.3 timbl 259: if (me->state == MIME_TRANSPARENT) /* Optimisation */
2.1 timbl 260: (*me->targetClass.put_string)(me->target,s);
2.3 timbl 261: else if (me->state != MIME_IGNORE)
2.1 timbl 262: for (p=s; *p; p++) HTMIME_put_character(me, *p);
263: }
264:
265:
266: /* Buffer write. Buffers can (and should!) be big.
267: ** ------------
268: */
269: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char*, s, int, l)
270: {
271: CONST char * p;
2.3 timbl 272: if (me->state == MIME_TRANSPARENT) /* Optimisation */
273: (*me->targetClass.put_block)(me->target, s, l);
2.1 timbl 274: else
275: for (p=s; p < s+l; p++) HTMIME_put_character(me, *p);
276: }
277:
278:
279:
280:
281: /* Free an HTML object
282: ** -------------------
283: **
284: */
285: PRIVATE void HTMIME_free ARGS1(HTStream *, me)
286: {
287: if (me->target) (*me->targetClass.free)(me->target);
288: free(me);
289: }
290:
291: /* End writing
292: */
293:
2.6 timbl 294: PRIVATE void HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 295: {
2.6 timbl 296: if (me->target) (*me->targetClass.abort)(me->target, e);
297: free(me);
2.1 timbl 298: }
299:
300:
301:
302: /* Structured Object Class
303: ** -----------------------
304: */
2.6 timbl 305: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 306: {
307: "MIMEParser",
308: HTMIME_free,
2.6 timbl 309: HTMIME_abort,
310: HTMIME_put_character,
311: HTMIME_put_string,
2.1 timbl 312: HTMIME_write
313: };
314:
315:
316: /* Subclass-specific Methods
317: ** -------------------------
318: */
319:
2.7 timbl 320: PUBLIC HTStream* HTMIMEConvert ARGS5(
321: HTRequest *, request,
322: void *, param,
323: HTFormat, input_format,
324: HTFormat, output_format,
325: HTStream *, output_stream)
2.1 timbl 326: {
327: HTStream* me;
328:
2.10 luotonen 329: me = (HTStream*)malloc(sizeof(*me));
2.1 timbl 330: if (me == NULL) outofmem(__FILE__, "HTML_new");
331: me->isa = &HTMIME;
332:
2.7 timbl 333: me->sink = output_stream;
334: me->request = request;
2.6 timbl 335: me->target = NULL;
336: me->state = BEGINNING_OF_LINE;
337: me->format = WWW_PLAINTEXT;
2.7 timbl 338: me->targetRep = output_format;
2.6 timbl 339: me->boundary = 0; /* Not set yet */
340: me->net_ascii = NO; /* Local character set */
341: return me;
342: }
343:
2.7 timbl 344: PUBLIC HTStream* HTNetMIME ARGS5(
345: HTRequest *, request,
346: void *, param,
347: HTFormat, input_format,
348: HTFormat, output_format,
349: HTStream *, output_stream)
2.6 timbl 350: {
2.7 timbl 351: HTStream* me = HTMIMEConvert(
352: request, param, input_format, output_format, output_stream);
2.6 timbl 353: if (!me) return NULL;
354:
355: me->net_ascii = YES;
2.1 timbl 356: return me;
357: }
358:
359:
Webmaster