Annotation of libwww/Library/src/HTMIME.c, revision 2.2
2.1 timbl 1: /* MIME Message Parse HTMIME.c
2: ** ==================
3: **
4: ** This is RFC 1341-specific code.
5: ** The input stream pushed into this parser is assumed to be
6: ** stripped on CRs, ie lines end with LF, not CR LF.
7: ** (It is easy to change this except for the body part where
8: ** conversion can be slow.)
9: **
10: ** History:
11: ** Feb 92 Written Tim Berners-Lee, CERN
12: **
13: */
14: #include "HTMIME.h" /* Implemented here */
15: #include "HTAlert.h"
16:
17:
18: /* MIME Object
19: ** -----------
20: */
21:
22: typedef enum _MIME_state {
23: BEGINNING_OF_LINE,
24: CONTENT_T,
25: CONTENT_TRANSFER_ENCODING,
26: CONTENT_TYPE,
27: SKIP_GET_VALUE, /* Skip space then get value */
28: GET_VALUE, /* Get value till white space */
29: JUNK_LINE, /* Ignore the rest of this folded line */
30: NEWLINE, /* Just found a LF .. maybe continuation */
31: CHECK, /* check against check_pointer */
2.2 ! timbl 32: TRANSPARENT, /* put straight through to target ASAP! */
! 33: IGNORE /* ignore entire file */
2.1 timbl 34: } MIME_state;
35:
36: #define VALUE_SIZE 128 /* @@@@@@@ Arbitrary? */
37: struct _HTStream {
38: CONST HTStreamClass * isa;
39:
40: MIME_state state; /* current state */
41: MIME_state if_ok; /* got this state if match */
42: MIME_state field; /* remember which field */
43: MIME_state fold_state; /* state on a fold */
44: CONST char * check_pointer; /* checking input */
45:
46: char * value_pointer; /* storing values */
47: char value[VALUE_SIZE];
48:
49: HTParentAnchor * anchor; /* Given on creation */
50: HTStream * sink; /* Given on creation */
51:
52: char * boundary; /* For multipart */
53:
54: HTFormat encoding; /* Content-Transfer-Encoding */
55: HTFormat format; /* Content-Type */
56: HTStream * target; /* While writing out */
57: HTStreamClass targetClass;
58:
59: HTAtom * targetRep; /* Converting into? */
60: };
61:
62:
63: /*_________________________________________________________________________
64: **
65: ** A C T I O N R O U T I N E S
66: */
67:
68: /* Character handling
69: ** ------------------
70: **
71: ** This is a FSM parser which is tolerant as it can be of all
72: ** syntax errors. It ignores field names it does not understand,
73: ** and resynchronises on line beginnings.
74: */
75:
76: PRIVATE void HTMIME_put_character ARGS2(HTStream *, me, char, c)
77: {
78: switch(me->state) {
79:
2.2 ! timbl 80: case IGNORE:
! 81: return;
! 82:
2.1 timbl 83: case TRANSPARENT:
84: (*me->targetClass.put_character)(me->target, c); /* MUST BE FAST */
85: return;
86:
87: case NEWLINE:
88: if (c != '\n' && WHITE(c)) { /* Folded line */
89: me->state = me->fold_state; /* pop state before newline */
90: break;
91: }
92:
93: /* else Falls through */
94:
95: case BEGINNING_OF_LINE:
96: switch(c) {
97: case 'c':
98: case 'C':
99: me->check_pointer = "ontent-t";
100: me->if_ok = CONTENT_T;
101: me->state = CHECK;
102: break;
103: case '\n': /* Blank line: End of Header! */
104: {
105: if (TRACE) fprintf(stderr,
2.2 ! timbl 106: "HTMIME: MIME content type is %s, converting to %s\n",
! 107: HTAtom_name(me->format), HTAtom_name(me->targetRep));
2.1 timbl 108: me->target = HTStreamStack(me->format, me->targetRep,
109: me->sink , me->anchor);
2.2 ! timbl 110: if (!me->target) {
! 111: if (TRACE) fprintf(stderr, "MIME: Can't translate! ** \n");
! 112: me->target = me->sink; /* Cheat */
! 113: }
! 114: if (me->target) {
! 115: me->targetClass = *me->target->isa;
2.1 timbl 116: /* Check for encoding and select state from there @@ */
117:
2.2 ! timbl 118: me->state = TRANSPARENT; /* From now push straigh through */
! 119: } else {
! 120: me->state = IGNORE; /* What else to do? */
! 121: }
2.1 timbl 122: }
123: break;
124:
125: default:
126: goto bad_field_name;
127: break;
128:
129: } /* switch on character */
130: break;
131:
132: case CHECK: /* Check against string */
133: if (TOLOWER(c) == *(me->check_pointer)++) {
134: if (!*me->check_pointer) me->state = me->if_ok;
135: } else { /* Error */
136: if (TRACE) fprintf(stderr,
137: "HTMIME: Bad character `%c' found where `%s' expected",
138: c, me->check_pointer - 1);
139: goto bad_field_name;
140: }
141: break;
142:
143: case CONTENT_T:
144: switch(c) {
145: case 'r':
146: case 'R':
147: me->check_pointer = "ansfer-encoding:";
148: me->if_ok = CONTENT_TRANSFER_ENCODING;
149: me->state = CHECK;
150: break;
151:
152: case 'y':
153: case 'Y':
154: me->check_pointer = "pe:";
155: me->if_ok = CONTENT_TYPE;
156: me->state = CHECK;
157: break;
158:
159: default:
160: goto bad_field_name;
161:
162: } /* switch on character */
163: break;
164:
165: case CONTENT_TYPE:
166: case CONTENT_TRANSFER_ENCODING:
167: me->field = me->state; /* remember it */
168: me->state = SKIP_GET_VALUE;
169: /* Fall through! */
170: case SKIP_GET_VALUE:
171: if (c == '\n') {
172: me->fold_state = me->state;
173: me->state = NEWLINE;
174: break;
175: }
176: if (WHITE(c)) break; /* Skip white space */
177:
178: me->value_pointer = me->value;
179: me->state = GET_VALUE;
180: /* Fall through to store first character */
181:
182: case GET_VALUE:
183: if (WHITE(c)) { /* End of field */
184: *me->value_pointer = 0;
185: switch (me->field) {
186: case CONTENT_TYPE:
187: me->format = HTAtom_for(me->value);
188: break;
189: case CONTENT_TRANSFER_ENCODING:
190: me->encoding = HTAtom_for(me->value);
191: break;
192: default: /* Should never get here */
193: break;
194: }
195: } else {
196: if (me->value_pointer < me->value + VALUE_SIZE - 1) {
197: *me->value_pointer++ = c;
198: break;
199: } else {
200: goto value_too_long;
201: }
202: }
203: /* Fall through */
204:
205: case JUNK_LINE:
206: if (c == '\n') {
207: me->state = NEWLINE;
208: me->fold_state = me->state;
209: }
210: break;
211:
212:
213: } /* switch on state*/
214:
215: return;
216:
217: value_too_long:
218: if (TRACE) fprintf(stderr,
219: "HTMIME: *** Syntax error. (string too long)\n");
220:
221: bad_field_name: /* Ignore it */
222: me->state = JUNK_LINE;
223: return;
224:
225: }
226:
227:
228:
229: /* String handling
230: ** ---------------
231: **
232: ** Strings must be smaller than this buffer size.
233: */
234: PRIVATE void HTMIME_put_string ARGS2(HTStream *, me, CONST char*, s)
235: {
236: CONST char * p;
237: if (me->state == TRANSPARENT) /* Optimisation */
238: (*me->targetClass.put_string)(me->target,s);
2.2 ! timbl 239: else if (me->state != IGNORE)
2.1 timbl 240: for (p=s; *p; p++) HTMIME_put_character(me, *p);
241: }
242:
243:
244: /* Buffer write. Buffers can (and should!) be big.
245: ** ------------
246: */
247: PRIVATE void HTMIME_write ARGS3(HTStream *, me, CONST char*, s, int, l)
248: {
249: CONST char * p;
250: if (me->state == TRANSPARENT) /* Optimisation */
251: (*me->targetClass.write)(me->target, s, l);
252: else
253: for (p=s; p < s+l; p++) HTMIME_put_character(me, *p);
254: }
255:
256:
257:
258:
259: /* Free an HTML object
260: ** -------------------
261: **
262: */
263: PRIVATE void HTMIME_free ARGS1(HTStream *, me)
264: {
265: if (me->target) (*me->targetClass.free)(me->target);
266: free(me);
267: }
268:
269: /* End writing
270: */
271:
272: PRIVATE void HTMIME_end_document ARGS1(HTStream *, me)
273: {
274: (*me->targetClass.end_document)(me->target);
275: }
276:
277:
278:
279: /* Structured Object Class
280: ** -----------------------
281: */
282: PUBLIC CONST HTStreamClass HTMIME =
283: {
284: "MIMEParser",
285: HTMIME_free,
286: HTMIME_end_document,
287: HTMIME_put_character, HTMIME_put_string,
288: HTMIME_write
289: };
290:
291:
292: /* Subclass-specific Methods
293: ** -------------------------
294: */
295:
296: PUBLIC HTStream* HTMIMEConvert ARGS3(
297: HTPresentation *, pres,
298: HTParentAnchor *, anchor,
299: HTStream *, sink)
300: {
301: HTStream* me;
302:
303: me = malloc(sizeof(*me));
304: if (me == NULL) outofmem(__FILE__, "HTML_new");
305: me->isa = &HTMIME;
306:
307: me->sink = sink;
308: me->anchor = anchor;
309: me->target = NULL;
310: me->state = BEGINNING_OF_LINE;
311: me->format = WWW_PLAINTEXT;
312: me->targetRep = pres->rep_out;
313: me->boundary = 0; /* Not set yet */
314: return me;
315: }
316:
317:
Webmaster