Annotation of libwww/Library/src/HTBound.c, revision 2.16
2.12 frystyk 1: /* HTBound.c
2: ** MIME MULTIPART PARSER STREAM
3: **
4: ** (c) COPYRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.16 ! vbancrof 6: ** @(#) $Id: HTBound.c,v 2.15 2005/10/16 13:12:23 vbancrof Exp $
2.12 frystyk 7: **
8: ** This stream parses a MIME multipart stream and builds a set of new
9: ** streams via the stream stack each time we encounter a boundary start.
10: ** We get the boundary from the normal MIME parser via the Request object
11: **
12: ** Authors
13: ** HF Henrik Frystyk <frystyk@w3.org>
14: **
15: ** History:
16: ** Nov 95 Written from scratch
2.15 vbancrof 17: ** SV Jun 05 Rewrote HTBoundary_put_block. Fixed many bugs+segfaults.
18: ** SV Jul 05 Fix double-counting of processed bytes.
2.12 frystyk 19: **
20: */
21:
22: /* Library include files */
23: #include "wwwsys.h"
24: #include "WWWUtil.h"
25: #include "WWWCore.h"
2.13 frystyk 26: #include "HTMerge.h"
2.12 frystyk 27: #include "HTReqMan.h"
2.15 vbancrof 28: #include "HTNetMan.h"
29: #include "HTChannl.h"
2.12 frystyk 30: #include "HTBound.h" /* Implemented here */
31:
2.15 vbancrof 32: #define PUTBLOCK(b, l) (me->target ? (*me->target->isa->put_block)(me->target, b, l):HT_OK)
33:
2.12 frystyk 34: #define PUTDEBUG(b, l) (*me->debug->isa->put_block)(me->debug, b, l)
35: #define FREE_TARGET (*me->target->isa->_free)(me->target)
36:
37: struct _HTStream {
38: const HTStreamClass * isa;
2.15 vbancrof 39: HTNet * net;
2.12 frystyk 40: HTStream * target;
41: HTStream * orig_target;
42: HTFormat format;
43: HTStream * debug; /* For preamble and epilog */
44: HTRequest * request;
45: char * boundary;
2.15 vbancrof 46:
47: BOOL keptcrlf;
48: int (*state)(HTStream *, const char *, int);
49:
50: char *boundary_ptr;
51:
2.12 frystyk 52: };
53:
2.15 vbancrof 54: PRIVATE int HTBoundary_flush (HTStream * me);
55:
2.12 frystyk 56: /* ------------------------------------------------------------------------- */
57:
2.15 vbancrof 58: PRIVATE int start_of_line (HTStream * me, const char * b, int l);
59: PRIVATE int seen_dash (HTStream * me, const char * b, int l);
60: PRIVATE int seen_doubledash (HTStream * me, const char * b, int l);
61: PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l);
62: PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l);
63: PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l);
64: PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l);
65: PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l);
66: PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra);
67: PRIVATE int seen_nothing(HTStream * me, const char * b, int l);
68: PRIVATE int seen_cr(HTStream * me, const char * b, int l);
69: PRIVATE void process_boundary(HTStream *me, int isterminal);
70:
71: #define UNUSED(l) (l=l) /* Shut up about unused variables */
72:
2.12 frystyk 73: PRIVATE int HTBoundary_put_block (HTStream * me, const char * b, int l)
74: {
2.15 vbancrof 75: /*
76: ** The HTBoundary object gets attached downstream of HTMime.
77: ** The HTBoundary object creates another HTMime object downstream of
78: ** the HTBoundary object.
79: **
80: ** When we push data downstream to the second HTBoundary object, it
81: ** updates the bytes read count in the HTNet object.
82: **
83: ** When we return to the parent HTMime object, itupdates the
84: ** bytes read count in the HTNet object again. Oops.
85: **
86: ** Same thing happens with the consumed byte count. We can prevent
87: ** the consumed byte counts from being updated by temporary setting
88: ** the input channel stream pointer to NULL, but for the byte counts
89: ** we have to save them and restore them before existing.
90: **
91: ** This bug was discovered by chance when a multipart/partial response
92: ** was partially received, and as a result of double-counting the
93: ** real response got cut off (because HTMime thought that more bytes
94: ** were processed than actually were, thus it processed only the
95: ** partial count of the remaining bytes in the response). When the
96: ** multipart/partial response was received all at once this bug did
97: ** not get triggered.
98: */
99:
100: HTHost *host=HTNet_host(me->net);
101: HTChannel *c=HTHost_channel(host);
102: HTInputStream *i=HTChannel_input(c);
103:
104: long saveBytesRead=HTNet_bytesRead(me->net);
105: long saveHeaderBytesRead=HTNet_headerBytesRead(me->net);
106:
107: if (i)
108: HTChannel_setInput(c, NULL);
109:
110: HTTRACE(STREAM_TRACE, "Boundary: processing %d bytes\n" _ l);
111: /* Main loop consumes all input */
112:
113: while (l)
114: {
115: int n= (*me->state)(me, b, l);
116:
117: if (n == 0)
118: return HT_ERROR;
119: b += n;
120: l -= n;
121: }
122:
123: if (i)
124: HTChannel_setInput(c, i);
125: HTNet_setBytesRead(me->net, saveBytesRead);
126: HTNet_setHeaderBytesRead(me->net, saveHeaderBytesRead);
127:
128: return HT_OK;
129: }
130:
131: /*
132: ** Start of line, keptcrlf=YES if we've kept the preceding CRLF from downstream
133: ** and we'll pass it along if we decide that this is not a boundary delimiter.
134: */
135:
136: PRIVATE int start_of_line (HTStream * me, const char * b, int l)
137: {
138: if (*b != '-')
139: return not_delimiter(me, b, l, 0);
140:
141: HTTRACE(STREAM_TRACE, "Boundary: start of line: input '-'\n");
142:
143: me->state= seen_dash;
144:
145: return 1;
146: }
147:
148: /*
149: ** Line: -
150: */
151:
152: PRIVATE int seen_dash (HTStream * me, const char * b, int l)
153: {
154: if (*b != '-')
155: return not_delimiter(me, b, l, 1);
156:
157: HTTRACE(STREAM_TRACE, "Boundary: start of line: input '--'\n");
158:
159: me->state= seen_doubledash;
160: me->boundary_ptr=me->boundary;
161: return 1;
162: }
163:
164: /*
165: ** Line: --
166: */
167:
168: PRIVATE int seen_doubledash (HTStream * me, const char * b, int l)
169: {
170: me->state=seen_doubledash;
171:
172: if (*me->boundary_ptr)
173: {
174: if (*b != *me->boundary_ptr)
175: {
176: return not_delimiter(me, b, l,
177: me->boundary_ptr - me->boundary
178: + 2);
2.12 frystyk 179: }
2.15 vbancrof 180: ++me->boundary_ptr;
181: return 1;
182: }
183:
184: /*
185: ** Line: --delimiter
186: */
187:
188: if (*b == '-')
189: {
190: HTTRACE(STREAM_TRACE,
191: "Boundary: start of line: input '--%s-'\n"
192: _ me->boundary);
193:
194: me->state=seen_delimiter_dash;
195: return 1;
196: }
197:
198: HTTRACE(STREAM_TRACE,
199: "Boundary: Found: '--%s'\n" _ me->boundary);
200:
201: return seen_delimiter_nonterminal(me, b, l);
202: }
203:
204: /*
205: ** Line: --delimiter
206: **
207: ** Waiting for CRLF.
208: */
209:
210:
211: PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l)
212: {
213: UNUSED(l);
214:
215: me->state=seen_delimiter_nonterminal;
216: if (*b == CR)
217: me->state=seen_delimiter_nonterminal_CR;
218:
219: return 1;
220: }
221:
222: /*
223: ** Line: --delimiter<CR>
224: */
225:
226: PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l)
227: {
228: HTTRACE(STREAM_TRACE,
229: "Boundary: Found: '--%s<CR>'\n" _ me->boundary);
230:
231: if (*b != LF)
232: return seen_delimiter_nonterminal(me, b, l);
233:
234: HTTRACE(STREAM_TRACE,
235: "Boundary: Found: '--%s<CR><LF>'\n" _ me->boundary);
236:
237: process_boundary(me, NO);
238: return 1;
239: }
240:
241: /*
242: ** Line: --delimiter-
243: */
244:
245: PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l)
246: {
247: if (*b != '-')
248: return seen_delimiter_nonterminal(me, b, l);
249:
250: HTTRACE(STREAM_TRACE,
251: "Boundary: start of line: input '--%s--'\n"
252: _ me->boundary);
253:
254: me->state=seen_delimiter_terminal;
255: return 1;
256: }
257:
258: /*
259: ** Line: --delimiter--
260: */
261:
262: PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l)
263: {
264: UNUSED(l);
265:
266: me->state=seen_delimiter_terminal;
267:
268: if (*b == CR)
269: me->state=seen_delimiter_terminal_CR;
270: return 1;
271: }
272: /*
273: ** Line: --delimiter--<CR>
274: */
275:
276: PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l)
277: {
278: HTTRACE(STREAM_TRACE,
279: "Boundary: Found '--%s--<CR>'\n"
280: _ me->boundary);
281:
282: if (*b != LF)
283: return seen_delimiter_terminal(me, b, l);
284: HTTRACE(STREAM_TRACE,
285: "Boundary: Found '--%s--<CR><LF>'\n"
286: _ me->boundary);
287:
288: process_boundary(me, YES);
289: return 1;
290: }
291:
292: /*
293: ** Beginning of the line does not contain a delimiter.
294: **
295: **
296: ** extra: Count of characters in a partially matched delimiter. Since it's
297: ** not a delimiter this is content that needs to go downstream.
298: */
299:
300: PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra)
301: {
302: HTTRACE(STREAM_TRACE, "Boundary: not a delimiter line\n");
303:
304: if (me->keptcrlf)
305: {
306: HTTRACE(STREAM_TRACE, "Boundary: Sending previous line's <CR><LF>\n");
307: /*
308: ** Did not process CRLF from previous line, because prev CRLF
309: ** is considered a part of the delimiter. See MIME RFC.
310: */
311:
312: me->keptcrlf=NO;
313: if (PUTBLOCK("\r\n", 2) != HT_OK)
314: return 0;
315: }
316:
317: /*
318: ** Potentially matched some of: --DELIMITER
319: */
320:
321: if (extra)
322: {
323: HTTRACE(STREAM_TRACE, "Boundary: Sending partially-matched %d characters\n" _ extra);
324:
325: if (PUTBLOCK("--", extra > 2 ? 2:extra) != HT_OK)
326: return 0;
327:
328: if (extra > 2)
329: if (PUTBLOCK(me->boundary, extra-2) != HT_OK)
330: return 0;
331: }
332: return seen_nothing(me, b, l);
333: }
334:
335: /*
336: ** We're not looking for a delimiter. Look for the next line of input
337: ** in the data that could potentially be a delimiter.
338: */
339:
340: PRIVATE int seen_nothing(HTStream * me, const char * b, int l)
341: {
342: int i;
343:
344: me->state=seen_nothing;
345:
346: for (i=0; i<l; i++)
347: {
348: if (b[i] != CR)
349: continue;
350:
351: /*
352: ** If we have at least four more characters in unconsumed
353: ** input, and they're not \r\n--, we can safely skip over
354: ** them.
355: */
356:
357: if (l-i > 4 &&
358: strncmp(b+i, "\r\n--", 4))
359: continue;
360: break;
361: }
362:
363: if (i == 0)
364: {
365: /* Could only be a CR here. */
366:
367: me->state=seen_cr;
368: return 1;
369: }
370:
371: HTTRACE(STREAM_TRACE, "Boundary: Processed %d (out of %d) bytes\n"
372: _ i _ l);
373:
374: if (PUTBLOCK(b, i) != HT_OK)
375: return 0;
376:
377: return i;
378: }
379:
380: /*
381: ** State: seen a CR
382: */
383:
384: PRIVATE int seen_cr(HTStream * me, const char * b, int l)
385: {
386: HTTRACE(STREAM_TRACE, "Boundary: Processed <CR>\n");
387:
388: if (*b != LF)
389: {
390: HTTRACE(STREAM_TRACE, "Boundary: ... <LF> didn't follow\n");
391: if (PUTBLOCK("\r", 1) != HT_OK)
392: return 0;
393: return seen_nothing(me, b, l);
394: }
395:
396: HTTRACE(STREAM_TRACE, "Boundary: Processed <CR><LF>\n");
397: me->state=start_of_line;
398: me->keptcrlf=YES;
399: return 1;
400: }
401:
402: PRIVATE void process_boundary(HTStream *me, int isterminal)
403: {
404: HTBoundary_flush(me);
405: if (me->target) FREE_TARGET;
406: me->target=NULL;
407: me->state=start_of_line;
408: me->keptcrlf=NO;
409:
410: if (!isterminal)
2.12 frystyk 411: me->target = HTStreamStack(WWW_MIME,me->format,
2.16 ! vbancrof 412: HTMerge(me->orig_target, 1),
2.12 frystyk 413: me->request, YES);
414: }
415:
2.15 vbancrof 416:
2.12 frystyk 417: PRIVATE int HTBoundary_put_string (HTStream * me, const char * s)
418: {
419: return HTBoundary_put_block(me, s, (int) strlen(s));
420: }
421:
422: PRIVATE int HTBoundary_put_character (HTStream * me, char c)
423: {
424: return HTBoundary_put_block(me, &c, 1);
425: }
426:
427: PRIVATE int HTBoundary_flush (HTStream * me)
428: {
2.15 vbancrof 429: if (me->target == NULL)
430: return HT_OK;
431: return (*me->target->isa->flush)(me->target);
2.12 frystyk 432: }
433:
434: PRIVATE int HTBoundary_free (HTStream * me)
435: {
436: int status = HT_OK;
437: if (me->target) {
438: if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
439: return HT_WOULD_BLOCK;
440: }
2.14 frystyk 441: HTTRACE(PROT_TRACE, "Boundary.... FREEING....\n");
2.12 frystyk 442: HT_FREE(me->boundary);
443: HT_FREE(me);
444: return status;
445: }
446:
447: PRIVATE int HTBoundary_abort (HTStream * me, HTList * e)
448: {
449: int status = HT_ERROR;
450: if (me->target) status = (*me->target->isa->abort)(me->target, e);
2.14 frystyk 451: HTTRACE(PROT_TRACE, "Boundary.... ABORTING...\n");
2.12 frystyk 452: HT_FREE(me->boundary);
453: HT_FREE(me);
454: return status;
455: }
456:
457: PRIVATE const HTStreamClass HTBoundaryClass =
458: {
459: "HTBoundary",
460: HTBoundary_flush,
461: HTBoundary_free,
462: HTBoundary_abort,
463: HTBoundary_put_character,
464: HTBoundary_put_string,
465: HTBoundary_put_block
466: };
467:
468: PUBLIC HTStream * HTBoundary (HTRequest * request,
469: void * param,
470: HTFormat input_format,
471: HTFormat output_format,
472: HTStream * output_stream)
473: {
474: HTResponse * response = HTRequest_response(request);
475: HTParentAnchor * anchor = HTRequest_anchor(request);
476: HTAssocList * type_param = response ?
477: HTResponse_formatParam(response) :
478: HTAnchor_formatParam(anchor);
479: char * boundary = HTAssocList_findObject(type_param, "boundary");
2.15 vbancrof 480:
481: UNUSED(param);
482: UNUSED(input_format);
483:
2.12 frystyk 484: if (boundary) {
485: HTStream * me;
486: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
487: HT_OUTOFMEM("HTBoundary");
488: me->isa = &HTBoundaryClass;
2.15 vbancrof 489: me->net = HTRequest_net(request);
490: me->request = request;
2.12 frystyk 491: me->format = output_format;
492: me->orig_target = output_stream;
493: me->debug = HTRequest_debugStream(request);
2.15 vbancrof 494:
495: me->state = start_of_line;
496: me->keptcrlf=NO;
497:
2.12 frystyk 498: StrAllocCopy(me->boundary, boundary); /* Local copy */
2.15 vbancrof 499:
2.14 frystyk 500: HTTRACE(STREAM_TRACE, "Boundary.... Stream created with boundary '%s\'\n" _ me->boundary);
2.12 frystyk 501: return me;
502: } else {
2.14 frystyk 503: HTTRACE(STREAM_TRACE, "Boundary.... UNKNOWN boundary!\n");
2.12 frystyk 504: return HTErrorStream();
505: }
506: }
Webmaster