Annotation of libwww/Library/src/HTMIME.c, revision 2.6
2.1 timbl 1: /* MIME Message Parse HTMIME.c
2: ** ==================
3: **
4: ** This is RFC 1341-specific code.
5: ** The input stream pushed into this parser is assumed to be
6: ** stripped on CRs, ie lines end with LF, not CR LF.
7: ** (It is easy to change this except for the body part where
8: ** conversion can be slow.)
9: **
10: ** History:
11: ** Feb 92 Written Tim Berners-Lee, CERN
12: **
13: */
14: #include "HTMIME.h" /* Implemented here */
15: #include "HTAlert.h"
16:
17:
18: /* MIME Object
19: ** -----------
20: */
21:
22: typedef enum _MIME_state {
2.6 ! timbl 23: MIME_TRANSPARENT, /* put straight through to target ASAP! */
2.1 timbl 24: BEGINNING_OF_LINE,
25: CONTENT_T,
26: CONTENT_TRANSFER_ENCODING,
27: CONTENT_TYPE,
28: SKIP_GET_VALUE, /* Skip space then get value */
29: GET_VALUE, /* Get value till white space */
30: JUNK_LINE, /* Ignore the rest of this folded line */
31: NEWLINE, /* Just found a LF .. maybe continuation */
32: CHECK, /* check against check_pointer */
2.6 ! timbl 33: MIME_NET_ASCII, /* Translate from net ascii */
2.3 timbl 34: MIME_IGNORE /* ignore entire file */
35: /* TRANSPARENT and IGNORE are defined as stg else in _WINDOWS */
2.1 timbl 36: } MIME_state;
37:
38: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
39: struct _HTStream {
40: CONST HTStreamClass * isa;
41:
2.6 ! timbl 42: BOOL net_ascii; /* Is input net ascii? */
2.1 timbl 43: MIME_state state; /* current state */
44: MIME_state if_ok; /* got this state if match */
45: MIME_state field; /* remember which field */
46: MIME_state fold_state; /* state on a fold */
47: CONST char * check_pointer; /* checking input */
48:
49: char * value_pointer; /* storing values */
50: char value[VALUE_SIZE];
51:
52: HTParentAnchor * anchor; /* Given on creation */
53: HTStream * sink; /* Given on creation */
54:
55: char * boundary; /* For multipart */
56:
57: HTFormat encoding; /* Content-Transfer-Encoding */
58: HTFormat format; /* Content-Type */
59: HTStream * target; /* While writing out */
60: HTStreamClass targetClass;
61:
62: HTAtom * targetRep; /* Converting into? */
63: };
64:
65:
66: /*_________________________________________________________________________
67: **
68: ** A C T I O N R O U T I N E S
69: */
70:
71: /* Character handling
72: ** ------------------
73: **
74: ** This is a FSM parser which is tolerant as it can be of all
75: ** syntax errors. It ignores field names it does not understand,
76: ** and resynchronises on line beginnings.
77: */
78:
79: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
80: {
2.6 ! timbl 81: if (me->state == MIME_TRANSPARENT) {
! 82: (*me->targetClass.put_character)(me->target, c);/* MUST BE FAST */
! 83: return;
! 84: }
! 85:
! 86: /* This slightly simple conversion just strips CR and turns LF to
! 87: ** newline. On unix LF is \n but on Mac \n is CR for example.
! 88: ** See NetToText for an implementation which preserves single CR or LF.
! 89: */
! 90: if (me->net_ascii) {
! 91: c = FROMASCII(c);
! 92: if (c == CR) return;
! 93: else if (c == LF) c = '\n';
! 94: }
! 95:
2.1 timbl 96: switch(me->state) {
97:
2.3 timbl 98: case MIME_IGNORE:
2.2 timbl 99: return;
2.6 ! timbl 100:
! 101: case MIME_TRANSPARENT: /* Not reached see above */
! 102: (*me->targetClass.put_character)(me->target, c);
! 103: return;
2.2 timbl 104:
2.6 ! timbl 105: case MIME_NET_ASCII:
! 106: (*me->targetClass.put_character)(me->target, c); /* MUST BE FAST */
2.1 timbl 107: return;
108:
109: case NEWLINE:
110: if (c != '\n' && WHITE(c)) { /* Folded line */
111: me->state = me->fold_state; /* pop state before newline */
112: break;
113: }
114:
115: /* else Falls through */
116:
117: case BEGINNING_OF_LINE:
118: switch(c) {
119: case 'c':
120: case 'C':
121: me->check_pointer = "ontent-t";
122: me->if_ok = CONTENT_T;
123: me->state = CHECK;
124: break;
125: case '\n': /* Blank line: End of Header! */
126: {
127: if (TRACE) fprintf(stderr,
2.2 timbl 128: "HTMIME: MIME content type is %s, converting to %s\n",
129: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.1 timbl 130: me->target = HTStreamStack(me->format, me->targetRep,
131: me->sink , me->anchor);
2.2 timbl 132: if (!me->target) {
133: if (TRACE) fprintf(stderr, "MIME: Can't translate! ** \n");
134: me->target = me->sink; /* Cheat */
135: }
136: if (me->target) {
137: me->targetClass = *me->target->isa;
2.1 timbl 138: /* Check for encoding and select state from there @@ */
139:
2.3 timbl 140: me->state = MIME_TRANSPARENT; /* From now push straigh through */
2.2 timbl 141: } else {
2.3 timbl 142: me->state = MIME_IGNORE; /* What else to do? */
2.2 timbl 143: }
2.1 timbl 144: }
145: break;
146:
147: default:
148: goto bad_field_name;
149: break;
150:
151: } /* switch on character */
152: break;
153:
154: case CHECK: /* Check against string */
155: if (TOLOWER(c) == *(me->check_pointer)++) {
156: if (!*me->check_pointer) me->state = me->if_ok;
157: } else { /* Error */
158: if (TRACE) fprintf(stderr,
2.5 timbl 159: "HTMIME: Bad character `%c' found where `%s' expected\n",
2.1 timbl 160: c, me->check_pointer - 1);
161: goto bad_field_name;
162: }
163: break;
164:
165: case CONTENT_T:
166: switch(c) {
167: case 'r':
168: case 'R':
169: me->check_pointer = "ansfer-encoding:";
170: me->if_ok = CONTENT_TRANSFER_ENCODING;
171: me->state = CHECK;
172: break;
173:
174: case 'y':
175: case 'Y':
176: me->check_pointer = "pe:";
177: me->if_ok = CONTENT_TYPE;
178: me->state = CHECK;
179: break;
180:
181: default:
182: goto bad_field_name;
183:
184: } /* switch on character */
185: break;
186:
187: case CONTENT_TYPE:
188: case CONTENT_TRANSFER_ENCODING:
189: me->field = me->state; /* remember it */
190: me->state = SKIP_GET_VALUE;
191: /* Fall through! */
192: case SKIP_GET_VALUE:
193: if (c == '\n') {
194: me->fold_state = me->state;
195: me->state = NEWLINE;
196: break;
197: }
198: if (WHITE(c)) break; /* Skip white space */
199:
200: me->value_pointer = me->value;
201: me->state = GET_VALUE;
202: /* Fall through to store first character */
203:
204: case GET_VALUE:
205: if (WHITE(c)) { /* End of field */
206: *me->value_pointer = 0;
207: switch (me->field) {
208: case CONTENT_TYPE:
209: me->format = HTAtom_for(me->value);
210: break;
211: case CONTENT_TRANSFER_ENCODING:
212: me->encoding = HTAtom_for(me->value);
213: break;
214: default: /* Should never get here */
215: break;
216: }
217: } else {
218: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
219: *me->value_pointer++ = c;
220: break;
221: } else {
222: goto value_too_long;
223: }
224: }
225: /* Fall through */
226:
227: case JUNK_LINE:
228: if (c == '\n') {
229: me->state = NEWLINE;
230: me->fold_state = me->state;
231: }
232: break;
233:
234:
235: } /* switch on state*/
236:
237: return;
238:
239: value_too_long:
240: if (TRACE) fprintf(stderr,
241: "HTMIME: *** Syntax error. (string too long)\n");
242:
243: bad_field_name: /* Ignore it */
244: me->state = JUNK_LINE;
245: return;
246:
247: }
248:
249:
250:
251: /* String handling
252: ** ---------------
253: **
254: ** Strings must be smaller than this buffer size.
255: */
256: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
257: {
258: CONST char * p;
2.3 timbl 259: if (me->state == MIME_TRANSPARENT) /* Optimisation */
2.1 timbl 260: (*me->targetClass.put_string)(me->target,s);
2.3 timbl 261: else if (me->state != MIME_IGNORE)
2.1 timbl 262: for (p=s; *p; p++) HTMIME_put_character(me, *p);
263: }
264:
265:
266: /* Buffer write. Buffers can (and should!) be big.
267: ** ------------
268: */
269: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char*, s, int, l)
270: {
271: CONST char * p;
2.3 timbl 272: if (me->state == MIME_TRANSPARENT) /* Optimisation */
273: (*me->targetClass.put_block)(me->target, s, l);
2.1 timbl 274: else
275: for (p=s; p < s+l; p++) HTMIME_put_character(me, *p);
276: }
277:
278:
279:
280:
281: /* Free an HTML object
282: ** -------------------
283: **
284: */
285: PRIVATE void HTMIME_free ARGS1(HTStream *, me)
286: {
287: if (me->target) (*me->targetClass.free)(me->target);
288: free(me);
289: }
290:
291: /* End writing
292: */
293:
2.6 ! timbl 294: PRIVATE void HTMIME_abort ARGS2(HTStream *, me, HTError, e)
2.1 timbl 295: {
2.6 ! timbl 296: if (me->target) (*me->targetClass.abort)(me->target, e);
! 297: free(me);
2.1 timbl 298: }
299:
300:
301:
302: /* Structured Object Class
303: ** -----------------------
304: */
2.6 ! timbl 305: PRIVATE CONST HTStreamClass HTMIME =
2.1 timbl 306: {
307: "MIMEParser",
308: HTMIME_free,
2.6 ! timbl 309: HTMIME_abort,
! 310: HTMIME_put_character,
! 311: HTMIME_put_string,
2.1 timbl 312: HTMIME_write
313: };
314:
315:
316: /* Subclass-specific Methods
317: ** -------------------------
318: */
319:
320: PUBLIC HTStream* HTMIMEConvert ARGS3(
321: HTPresentation *, pres,
322: HTParentAnchor *, anchor,
323: HTStream *, sink)
324: {
325: HTStream* me;
326:
327: me = malloc(sizeof(*me));
328: if (me == NULL) outofmem(__FILE__, "HTML_new");
329: me->isa = &HTMIME;
330:
2.6 ! timbl 331: me->sink = sink;
! 332: me->anchor = anchor;
! 333: me->target = NULL;
! 334: me->state = BEGINNING_OF_LINE;
! 335: me->format = WWW_PLAINTEXT;
! 336: me->targetRep = pres->rep_out;
! 337: me->boundary = 0; /* Not set yet */
! 338: me->net_ascii = NO; /* Local character set */
! 339: return me;
! 340: }
! 341:
! 342: PUBLIC HTStream* HTNetMIME ARGS3(
! 343: HTPresentation *, pres,
! 344: HTParentAnchor *, anchor,
! 345: HTStream *, sink)
! 346: {
! 347: HTStream* me = HTMIMEConvert(pres,anchor, sink);
! 348: if (!me) return NULL;
! 349:
! 350: me->net_ascii = YES;
2.1 timbl 351: return me;
352: }
353:
354:
Webmaster