Annotation of libwww/Library/src/HTMIME.c, revision 2.3
2.1 timbl 1: /* MIME Message Parse HTMIME.c
2: ** ==================
3: **
4: ** This is RFC 1341-specific code.
5: ** The input stream pushed into this parser is assumed to be
6: ** stripped on CRs, ie lines end with LF, not CR LF.
7: ** (It is easy to change this except for the body part where
8: ** conversion can be slow.)
9: **
10: ** History:
11: ** Feb 92 Written Tim Berners-Lee, CERN
12: **
13: */
14: #include "HTMIME.h" /* Implemented here */
15: #include "HTAlert.h"
16:
17:
18: /* MIME Object
19: ** -----------
20: */
21:
22: typedef enum _MIME_state {
23: BEGINNING_OF_LINE,
24: CONTENT_T,
25: CONTENT_TRANSFER_ENCODING,
26: CONTENT_TYPE,
27: SKIP_GET_VALUE, /* Skip space then get value */
28: GET_VALUE, /* Get value till white space */
29: JUNK_LINE, /* Ignore the rest of this folded line */
30: NEWLINE, /* Just found a LF .. maybe continuation */
31: CHECK, /* check against check_pointer */
2.3 ! timbl 32: MIME_TRANSPARENT, /* put straight through to target ASAP! */
! 33: MIME_IGNORE /* ignore entire file */
! 34: /* TRANSPARENT and IGNORE are defined as stg else in _WINDOWS */
2.1 timbl 35: } MIME_state;
36:
37: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
38: struct _HTStream {
39: CONST HTStreamClass * isa;
40:
41: MIME_state state; /* current state */
42: MIME_state if_ok; /* got this state if match */
43: MIME_state field; /* remember which field */
44: MIME_state fold_state; /* state on a fold */
45: CONST char * check_pointer; /* checking input */
46:
47: char * value_pointer; /* storing values */
48: char value[VALUE_SIZE];
49:
50: HTParentAnchor * anchor; /* Given on creation */
51: HTStream * sink; /* Given on creation */
52:
53: char * boundary; /* For multipart */
54:
55: HTFormat encoding; /* Content-Transfer-Encoding */
56: HTFormat format; /* Content-Type */
57: HTStream * target; /* While writing out */
58: HTStreamClass targetClass;
59:
60: HTAtom * targetRep; /* Converting into? */
61: };
62:
63:
64: /*_________________________________________________________________________
65: **
66: ** A C T I O N R O U T I N E S
67: */
68:
69: /* Character handling
70: ** ------------------
71: **
72: ** This is a FSM parser which is tolerant as it can be of all
73: ** syntax errors. It ignores field names it does not understand,
74: ** and resynchronises on line beginnings.
75: */
76:
77: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
78: {
79: switch(me->state) {
80:
2.3 ! timbl 81: case MIME_IGNORE:
2.2 timbl 82: return;
83:
2.3 ! timbl 84: case MIME_TRANSPARENT:
2.1 timbl 85: (*me->targetClass.put_character)(me->target, c); /* MUST BE FAST */
86: return;
87:
88: case NEWLINE:
89: if (c != '\n' && WHITE(c)) { /* Folded line */
90: me->state = me->fold_state; /* pop state before newline */
91: break;
92: }
93:
94: /* else Falls through */
95:
96: case BEGINNING_OF_LINE:
97: switch(c) {
98: case 'c':
99: case 'C':
100: me->check_pointer = "ontent-t";
101: me->if_ok = CONTENT_T;
102: me->state = CHECK;
103: break;
104: case '\n': /* Blank line: End of Header! */
105: {
106: if (TRACE) fprintf(stderr,
2.2 timbl 107: "HTMIME: MIME content type is %s, converting to %s\n",
108: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.1 timbl 109: me->target = HTStreamStack(me->format, me->targetRep,
110: me->sink , me->anchor);
2.2 timbl 111: if (!me->target) {
112: if (TRACE) fprintf(stderr, "MIME: Can't translate! ** \n");
113: me->target = me->sink; /* Cheat */
114: }
115: if (me->target) {
116: me->targetClass = *me->target->isa;
2.1 timbl 117: /* Check for encoding and select state from there @@ */
118:
2.3 ! timbl 119: me->state = MIME_TRANSPARENT; /* From now push straigh through */
2.2 timbl 120: } else {
2.3 ! timbl 121: me->state = MIME_IGNORE; /* What else to do? */
2.2 timbl 122: }
2.1 timbl 123: }
124: break;
125:
126: default:
127: goto bad_field_name;
128: break;
129:
130: } /* switch on character */
131: break;
132:
133: case CHECK: /* Check against string */
134: if (TOLOWER(c) == *(me->check_pointer)++) {
135: if (!*me->check_pointer) me->state = me->if_ok;
136: } else { /* Error */
137: if (TRACE) fprintf(stderr,
138: "HTMIME: Bad character `%c' found where `%s' expected",
139: c, me->check_pointer - 1);
140: goto bad_field_name;
141: }
142: break;
143:
144: case CONTENT_T:
145: switch(c) {
146: case 'r':
147: case 'R':
148: me->check_pointer = "ansfer-encoding:";
149: me->if_ok = CONTENT_TRANSFER_ENCODING;
150: me->state = CHECK;
151: break;
152:
153: case 'y':
154: case 'Y':
155: me->check_pointer = "pe:";
156: me->if_ok = CONTENT_TYPE;
157: me->state = CHECK;
158: break;
159:
160: default:
161: goto bad_field_name;
162:
163: } /* switch on character */
164: break;
165:
166: case CONTENT_TYPE:
167: case CONTENT_TRANSFER_ENCODING:
168: me->field = me->state; /* remember it */
169: me->state = SKIP_GET_VALUE;
170: /* Fall through! */
171: case SKIP_GET_VALUE:
172: if (c == '\n') {
173: me->fold_state = me->state;
174: me->state = NEWLINE;
175: break;
176: }
177: if (WHITE(c)) break; /* Skip white space */
178:
179: me->value_pointer = me->value;
180: me->state = GET_VALUE;
181: /* Fall through to store first character */
182:
183: case GET_VALUE:
184: if (WHITE(c)) { /* End of field */
185: *me->value_pointer = 0;
186: switch (me->field) {
187: case CONTENT_TYPE:
188: me->format = HTAtom_for(me->value);
189: break;
190: case CONTENT_TRANSFER_ENCODING:
191: me->encoding = HTAtom_for(me->value);
192: break;
193: default: /* Should never get here */
194: break;
195: }
196: } else {
197: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
198: *me->value_pointer++ = c;
199: break;
200: } else {
201: goto value_too_long;
202: }
203: }
204: /* Fall through */
205:
206: case JUNK_LINE:
207: if (c == '\n') {
208: me->state = NEWLINE;
209: me->fold_state = me->state;
210: }
211: break;
212:
213:
214: } /* switch on state*/
215:
216: return;
217:
218: value_too_long:
219: if (TRACE) fprintf(stderr,
220: "HTMIME: *** Syntax error. (string too long)\n");
221:
222: bad_field_name: /* Ignore it */
223: me->state = JUNK_LINE;
224: return;
225:
226: }
227:
228:
229:
230: /* String handling
231: ** ---------------
232: **
233: ** Strings must be smaller than this buffer size.
234: */
235: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
236: {
237: CONST char * p;
2.3 ! timbl 238: if (me->state == MIME_TRANSPARENT) /* Optimisation */
2.1 timbl 239: (*me->targetClass.put_string)(me->target,s);
2.3 ! timbl 240: else if (me->state != MIME_IGNORE)
2.1 timbl 241: for (p=s; *p; p++) HTMIME_put_character(me, *p);
242: }
243:
244:
245: /* Buffer write. Buffers can (and should!) be big.
246: ** ------------
247: */
248: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char*, s, int, l)
249: {
250: CONST char * p;
2.3 ! timbl 251: if (me->state == MIME_TRANSPARENT) /* Optimisation */
! 252: (*me->targetClass.put_block)(me->target, s, l);
2.1 timbl 253: else
254: for (p=s; p < s+l; p++) HTMIME_put_character(me, *p);
255: }
256:
257:
258:
259:
260: /* Free an HTML object
261: ** -------------------
262: **
263: */
264: PRIVATE void HTMIME_free ARGS1(HTStream *, me)
265: {
266: if (me->target) (*me->targetClass.free)(me->target);
267: free(me);
268: }
269:
270: /* End writing
271: */
272:
273: PRIVATE void HTMIME_end_document ARGS1(HTStream *, me)
274: {
275: (*me->targetClass.end_document)(me->target);
276: }
277:
278:
279:
280: /* Structured Object Class
281: ** -----------------------
282: */
283: PUBLIC CONST HTStreamClass HTMIME =
284: {
285: "MIMEParser",
286: HTMIME_free,
287: HTMIME_end_document,
288: HTMIME_put_character, HTMIME_put_string,
289: HTMIME_write
290: };
291:
292:
293: /* Subclass-specific Methods
294: ** -------------------------
295: */
296:
297: PUBLIC HTStream* HTMIMEConvert ARGS3(
298: HTPresentation *, pres,
299: HTParentAnchor *, anchor,
300: HTStream *, sink)
301: {
302: HTStream* me;
303:
304: me = malloc(sizeof(*me));
305: if (me == NULL) outofmem(__FILE__, "HTML_new");
306: me->isa = &HTMIME;
307:
308: me->sink = sink;
309: me->anchor = anchor;
310: me->target = NULL;
311: me->state = BEGINNING_OF_LINE;
312: me->format = WWW_PLAINTEXT;
313: me->targetRep = pres->rep_out;
314: me->boundary = 0; /* Not set yet */
315: return me;
316: }
317:
318:
Webmaster