Annotation of libwww/Library/src/HTBound.c, revision 2.15
2.12 frystyk 1: /* HTBound.c
2: ** MIME MULTIPART PARSER STREAM
3: **
4: ** (c) COPYRIGHT MIT 1995.
5: ** Please first read the full copyright statement in the file COPYRIGH.
2.15 ! vbancrof 6: ** @(#) $Id: HTBound.c,v 2.14 1999/02/22 22:10:10 frystyk Exp $
2.12 frystyk 7: **
8: ** This stream parses a MIME multipart stream and builds a set of new
9: ** streams via the stream stack each time we encounter a boundary start.
10: ** We get the boundary from the normal MIME parser via the Request object
11: **
12: ** Authors
13: ** HF Henrik Frystyk <frystyk@w3.org>
14: **
15: ** History:
16: ** Nov 95 Written from scratch
2.15 ! vbancrof 17: ** SV Jun 05 Rewrote HTBoundary_put_block. Fixed many bugs+segfaults.
! 18: ** SV Jul 05 Fix double-counting of processed bytes.
2.12 frystyk 19: **
20: */
21:
22: /* Library include files */
23: #include "wwwsys.h"
24: #include "WWWUtil.h"
25: #include "WWWCore.h"
2.13 frystyk 26: #include "HTMerge.h"
2.12 frystyk 27: #include "HTReqMan.h"
2.15 ! vbancrof 28: #include "HTNetMan.h"
! 29: #include "HTChannl.h"
2.12 frystyk 30: #include "HTBound.h" /* Implemented here */
31:
2.15 ! vbancrof 32: #define PUTBLOCK(b, l) (me->target ? (*me->target->isa->put_block)(me->target, b, l):HT_OK)
! 33:
2.12 frystyk 34: #define PUTDEBUG(b, l) (*me->debug->isa->put_block)(me->debug, b, l)
35: #define FREE_TARGET (*me->target->isa->_free)(me->target)
36:
37: struct _HTStream {
38: const HTStreamClass * isa;
2.15 ! vbancrof 39: HTNet * net;
2.12 frystyk 40: HTStream * target;
41: HTStream * orig_target;
42: HTFormat format;
43: HTStream * debug; /* For preamble and epilog */
44: HTRequest * request;
45: char * boundary;
2.15 ! vbancrof 46:
! 47: BOOL keptcrlf;
! 48: int (*state)(HTStream *, const char *, int);
! 49:
! 50: char *boundary_ptr;
! 51:
2.12 frystyk 52: };
53:
2.15 ! vbancrof 54: PRIVATE int HTBoundary_flush (HTStream * me);
! 55:
2.12 frystyk 56: /* ------------------------------------------------------------------------- */
57:
2.15 ! vbancrof 58: PRIVATE int start_of_line (HTStream * me, const char * b, int l);
! 59: PRIVATE int seen_dash (HTStream * me, const char * b, int l);
! 60: PRIVATE int seen_doubledash (HTStream * me, const char * b, int l);
! 61: PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l);
! 62: PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l);
! 63: PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l);
! 64: PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l);
! 65: PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l);
! 66: PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra);
! 67: PRIVATE int seen_nothing(HTStream * me, const char * b, int l);
! 68: PRIVATE int seen_cr(HTStream * me, const char * b, int l);
! 69: PRIVATE void process_boundary(HTStream *me, int isterminal);
! 70:
! 71: #define UNUSED(l) (l=l) /* Shut up about unused variables */
! 72:
2.12 frystyk 73: PRIVATE int HTBoundary_put_block (HTStream * me, const char * b, int l)
74: {
2.15 ! vbancrof 75: /*
! 76: ** The HTBoundary object gets attached downstream of HTMime.
! 77: ** The HTBoundary object creates another HTMime object downstream of
! 78: ** the HTBoundary object.
! 79: **
! 80: ** When we push data downstream to the second HTBoundary object, it
! 81: ** updates the bytes read count in the HTNet object.
! 82: **
! 83: ** When we return to the parent HTMime object, itupdates the
! 84: ** bytes read count in the HTNet object again. Oops.
! 85: **
! 86: ** Same thing happens with the consumed byte count. We can prevent
! 87: ** the consumed byte counts from being updated by temporary setting
! 88: ** the input channel stream pointer to NULL, but for the byte counts
! 89: ** we have to save them and restore them before existing.
! 90: **
! 91: ** This bug was discovered by chance when a multipart/partial response
! 92: ** was partially received, and as a result of double-counting the
! 93: ** real response got cut off (because HTMime thought that more bytes
! 94: ** were processed than actually were, thus it processed only the
! 95: ** partial count of the remaining bytes in the response). When the
! 96: ** multipart/partial response was received all at once this bug did
! 97: ** not get triggered.
! 98: */
! 99:
! 100: HTHost *host=HTNet_host(me->net);
! 101: HTChannel *c=HTHost_channel(host);
! 102: HTInputStream *i=HTChannel_input(c);
! 103:
! 104: long saveBytesRead=HTNet_bytesRead(me->net);
! 105: long saveHeaderBytesRead=HTNet_headerBytesRead(me->net);
! 106:
! 107: if (i)
! 108: HTChannel_setInput(c, NULL);
! 109:
! 110: HTTRACE(STREAM_TRACE, "Boundary: processing %d bytes\n" _ l);
! 111: /* Main loop consumes all input */
! 112:
! 113: while (l)
! 114: {
! 115: int n= (*me->state)(me, b, l);
! 116:
! 117: if (n == 0)
! 118: return HT_ERROR;
! 119: b += n;
! 120: l -= n;
! 121: }
! 122:
! 123: if (i)
! 124: HTChannel_setInput(c, i);
! 125: HTNet_setBytesRead(me->net, saveBytesRead);
! 126: HTNet_setHeaderBytesRead(me->net, saveHeaderBytesRead);
! 127:
! 128: return HT_OK;
! 129: }
! 130:
! 131: /*
! 132: ** Start of line, keptcrlf=YES if we've kept the preceding CRLF from downstream
! 133: ** and we'll pass it along if we decide that this is not a boundary delimiter.
! 134: */
! 135:
! 136: PRIVATE int start_of_line (HTStream * me, const char * b, int l)
! 137: {
! 138: if (*b != '-')
! 139: return not_delimiter(me, b, l, 0);
! 140:
! 141: HTTRACE(STREAM_TRACE, "Boundary: start of line: input '-'\n");
! 142:
! 143: me->state= seen_dash;
! 144:
! 145: return 1;
! 146: }
! 147:
! 148: /*
! 149: ** Line: -
! 150: */
! 151:
! 152: PRIVATE int seen_dash (HTStream * me, const char * b, int l)
! 153: {
! 154: if (*b != '-')
! 155: return not_delimiter(me, b, l, 1);
! 156:
! 157: HTTRACE(STREAM_TRACE, "Boundary: start of line: input '--'\n");
! 158:
! 159: me->state= seen_doubledash;
! 160: me->boundary_ptr=me->boundary;
! 161: return 1;
! 162: }
! 163:
! 164: /*
! 165: ** Line: --
! 166: */
! 167:
! 168: PRIVATE int seen_doubledash (HTStream * me, const char * b, int l)
! 169: {
! 170: me->state=seen_doubledash;
! 171:
! 172: if (*me->boundary_ptr)
! 173: {
! 174: if (*b != *me->boundary_ptr)
! 175: {
! 176: return not_delimiter(me, b, l,
! 177: me->boundary_ptr - me->boundary
! 178: + 2);
2.12 frystyk 179: }
2.15 ! vbancrof 180: ++me->boundary_ptr;
! 181: return 1;
! 182: }
! 183:
! 184: /*
! 185: ** Line: --delimiter
! 186: */
! 187:
! 188: if (*b == '-')
! 189: {
! 190: HTTRACE(STREAM_TRACE,
! 191: "Boundary: start of line: input '--%s-'\n"
! 192: _ me->boundary);
! 193:
! 194: me->state=seen_delimiter_dash;
! 195: return 1;
! 196: }
! 197:
! 198: HTTRACE(STREAM_TRACE,
! 199: "Boundary: Found: '--%s'\n" _ me->boundary);
! 200:
! 201: return seen_delimiter_nonterminal(me, b, l);
! 202: }
! 203:
! 204: /*
! 205: ** Line: --delimiter
! 206: **
! 207: ** Waiting for CRLF.
! 208: */
! 209:
! 210:
! 211: PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l)
! 212: {
! 213: UNUSED(l);
! 214:
! 215: me->state=seen_delimiter_nonterminal;
! 216: if (*b == CR)
! 217: me->state=seen_delimiter_nonterminal_CR;
! 218:
! 219: return 1;
! 220: }
! 221:
! 222: /*
! 223: ** Line: --delimiter<CR>
! 224: */
! 225:
! 226: PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l)
! 227: {
! 228: HTTRACE(STREAM_TRACE,
! 229: "Boundary: Found: '--%s<CR>'\n" _ me->boundary);
! 230:
! 231: if (*b != LF)
! 232: return seen_delimiter_nonterminal(me, b, l);
! 233:
! 234: HTTRACE(STREAM_TRACE,
! 235: "Boundary: Found: '--%s<CR><LF>'\n" _ me->boundary);
! 236:
! 237: process_boundary(me, NO);
! 238: return 1;
! 239: }
! 240:
! 241: /*
! 242: ** Line: --delimiter-
! 243: */
! 244:
! 245: PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l)
! 246: {
! 247: if (*b != '-')
! 248: return seen_delimiter_nonterminal(me, b, l);
! 249:
! 250: HTTRACE(STREAM_TRACE,
! 251: "Boundary: start of line: input '--%s--'\n"
! 252: _ me->boundary);
! 253:
! 254: me->state=seen_delimiter_terminal;
! 255: return 1;
! 256: }
! 257:
! 258: /*
! 259: ** Line: --delimiter--
! 260: */
! 261:
! 262: PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l)
! 263: {
! 264: UNUSED(l);
! 265:
! 266: me->state=seen_delimiter_terminal;
! 267:
! 268: if (*b == CR)
! 269: me->state=seen_delimiter_terminal_CR;
! 270: return 1;
! 271: }
! 272: /*
! 273: ** Line: --delimiter--<CR>
! 274: */
! 275:
! 276: PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l)
! 277: {
! 278: HTTRACE(STREAM_TRACE,
! 279: "Boundary: Found '--%s--<CR>'\n"
! 280: _ me->boundary);
! 281:
! 282: if (*b != LF)
! 283: return seen_delimiter_terminal(me, b, l);
! 284: HTTRACE(STREAM_TRACE,
! 285: "Boundary: Found '--%s--<CR><LF>'\n"
! 286: _ me->boundary);
! 287:
! 288: process_boundary(me, YES);
! 289: return 1;
! 290: }
! 291:
! 292: /*
! 293: ** Beginning of the line does not contain a delimiter.
! 294: **
! 295: **
! 296: ** extra: Count of characters in a partially matched delimiter. Since it's
! 297: ** not a delimiter this is content that needs to go downstream.
! 298: */
! 299:
! 300: PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra)
! 301: {
! 302: HTTRACE(STREAM_TRACE, "Boundary: not a delimiter line\n");
! 303:
! 304: if (me->keptcrlf)
! 305: {
! 306: HTTRACE(STREAM_TRACE, "Boundary: Sending previous line's <CR><LF>\n");
! 307: /*
! 308: ** Did not process CRLF from previous line, because prev CRLF
! 309: ** is considered a part of the delimiter. See MIME RFC.
! 310: */
! 311:
! 312: me->keptcrlf=NO;
! 313: if (PUTBLOCK("\r\n", 2) != HT_OK)
! 314: return 0;
! 315: }
! 316:
! 317: /*
! 318: ** Potentially matched some of: --DELIMITER
! 319: */
! 320:
! 321: if (extra)
! 322: {
! 323: HTTRACE(STREAM_TRACE, "Boundary: Sending partially-matched %d characters\n" _ extra);
! 324:
! 325: if (PUTBLOCK("--", extra > 2 ? 2:extra) != HT_OK)
! 326: return 0;
! 327:
! 328: if (extra > 2)
! 329: if (PUTBLOCK(me->boundary, extra-2) != HT_OK)
! 330: return 0;
! 331: }
! 332: return seen_nothing(me, b, l);
! 333: }
! 334:
! 335: /*
! 336: ** We're not looking for a delimiter. Look for the next line of input
! 337: ** in the data that could potentially be a delimiter.
! 338: */
! 339:
! 340: PRIVATE int seen_nothing(HTStream * me, const char * b, int l)
! 341: {
! 342: int i;
! 343:
! 344: me->state=seen_nothing;
! 345:
! 346: for (i=0; i<l; i++)
! 347: {
! 348: if (b[i] != CR)
! 349: continue;
! 350:
! 351: /*
! 352: ** If we have at least four more characters in unconsumed
! 353: ** input, and they're not \r\n--, we can safely skip over
! 354: ** them.
! 355: */
! 356:
! 357: if (l-i > 4 &&
! 358: strncmp(b+i, "\r\n--", 4))
! 359: continue;
! 360: break;
! 361: }
! 362:
! 363: if (i == 0)
! 364: {
! 365: /* Could only be a CR here. */
! 366:
! 367: me->state=seen_cr;
! 368: return 1;
! 369: }
! 370:
! 371: HTTRACE(STREAM_TRACE, "Boundary: Processed %d (out of %d) bytes\n"
! 372: _ i _ l);
! 373:
! 374: if (PUTBLOCK(b, i) != HT_OK)
! 375: return 0;
! 376:
! 377: return i;
! 378: }
! 379:
! 380: /*
! 381: ** State: seen a CR
! 382: */
! 383:
! 384: PRIVATE int seen_cr(HTStream * me, const char * b, int l)
! 385: {
! 386: HTTRACE(STREAM_TRACE, "Boundary: Processed <CR>\n");
! 387:
! 388: if (*b != LF)
! 389: {
! 390: HTTRACE(STREAM_TRACE, "Boundary: ... <LF> didn't follow\n");
! 391: if (PUTBLOCK("\r", 1) != HT_OK)
! 392: return 0;
! 393: return seen_nothing(me, b, l);
! 394: }
! 395:
! 396: HTTRACE(STREAM_TRACE, "Boundary: Processed <CR><LF>\n");
! 397: me->state=start_of_line;
! 398: me->keptcrlf=YES;
! 399: return 1;
! 400: }
! 401:
! 402: PRIVATE void process_boundary(HTStream *me, int isterminal)
! 403: {
! 404: HTBoundary_flush(me);
! 405: if (me->target) FREE_TARGET;
! 406: me->target=NULL;
! 407: me->state=start_of_line;
! 408: me->keptcrlf=NO;
! 409:
! 410: if (!isterminal)
2.12 frystyk 411: me->target = HTStreamStack(WWW_MIME,me->format,
412: HTMerge(me->orig_target, 2),
413: me->request, YES);
414: }
415:
2.15 ! vbancrof 416:
2.12 frystyk 417: PRIVATE int HTBoundary_put_string (HTStream * me, const char * s)
418: {
419: return HTBoundary_put_block(me, s, (int) strlen(s));
420: }
421:
422: PRIVATE int HTBoundary_put_character (HTStream * me, char c)
423: {
424: return HTBoundary_put_block(me, &c, 1);
425: }
426:
427: PRIVATE int HTBoundary_flush (HTStream * me)
428: {
2.15 ! vbancrof 429: if (me->target == NULL)
! 430: return HT_OK;
! 431: return (*me->target->isa->flush)(me->target);
2.12 frystyk 432: }
433:
434: PRIVATE int HTBoundary_free (HTStream * me)
435: {
436: int status = HT_OK;
437: if (me->target) {
438: if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
439: return HT_WOULD_BLOCK;
440: }
2.14 frystyk 441: HTTRACE(PROT_TRACE, "Boundary.... FREEING....\n");
2.12 frystyk 442: HT_FREE(me->boundary);
443: HT_FREE(me);
444: return status;
445: }
446:
447: PRIVATE int HTBoundary_abort (HTStream * me, HTList * e)
448: {
449: int status = HT_ERROR;
450: if (me->target) status = (*me->target->isa->abort)(me->target, e);
2.14 frystyk 451: HTTRACE(PROT_TRACE, "Boundary.... ABORTING...\n");
2.12 frystyk 452: HT_FREE(me->boundary);
453: HT_FREE(me);
454: return status;
455: }
456:
457: PRIVATE const HTStreamClass HTBoundaryClass =
458: {
459: "HTBoundary",
460: HTBoundary_flush,
461: HTBoundary_free,
462: HTBoundary_abort,
463: HTBoundary_put_character,
464: HTBoundary_put_string,
465: HTBoundary_put_block
466: };
467:
468: PUBLIC HTStream * HTBoundary (HTRequest * request,
469: void * param,
470: HTFormat input_format,
471: HTFormat output_format,
472: HTStream * output_stream)
473: {
474: HTResponse * response = HTRequest_response(request);
475: HTParentAnchor * anchor = HTRequest_anchor(request);
476: HTAssocList * type_param = response ?
477: HTResponse_formatParam(response) :
478: HTAnchor_formatParam(anchor);
479: char * boundary = HTAssocList_findObject(type_param, "boundary");
2.15 ! vbancrof 480:
! 481: UNUSED(param);
! 482: UNUSED(input_format);
! 483:
2.12 frystyk 484: if (boundary) {
485: HTStream * me;
486: if ((me = (HTStream *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
487: HT_OUTOFMEM("HTBoundary");
488: me->isa = &HTBoundaryClass;
2.15 ! vbancrof 489: me->net = HTRequest_net(request);
! 490: me->request = request;
2.12 frystyk 491: me->format = output_format;
492: me->orig_target = output_stream;
493: me->debug = HTRequest_debugStream(request);
2.15 ! vbancrof 494:
! 495: me->state = start_of_line;
! 496: me->keptcrlf=NO;
! 497:
2.12 frystyk 498: StrAllocCopy(me->boundary, boundary); /* Local copy */
2.15 ! vbancrof 499:
2.14 frystyk 500: HTTRACE(STREAM_TRACE, "Boundary.... Stream created with boundary '%s\'\n" _ me->boundary);
2.12 frystyk 501: return me;
502: } else {
2.14 frystyk 503: HTTRACE(STREAM_TRACE, "Boundary.... UNKNOWN boundary!\n");
2.12 frystyk 504: return HTErrorStream();
505: }
506: }
Webmaster