Annotation of libwww/Library/src/HTBound.c, revision 2.16

2.12      frystyk     1: /*                                                                  HTBound.c
                      2: **     MIME MULTIPART PARSER STREAM
                      3: **
                      4: **     (c) COPYRIGHT MIT 1995.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
2.16    ! vbancrof    6: **     @(#) $Id: HTBound.c,v 2.15 2005/10/16 13:12:23 vbancrof Exp $
2.12      frystyk     7: **
                      8: **     This stream parses a MIME multipart stream and builds a set of new
                      9: **     streams via the stream stack each time we encounter a boundary start.
                     10: **     We get the boundary from the normal MIME parser via the Request object
                     11: **
                     12: ** Authors
                     13: **     HF      Henrik Frystyk <frystyk@w3.org>
                     14: **
                     15: ** History:
                     16: **     Nov 95  Written from scratch
2.15      vbancrof   17: **   SV Jun 05  Rewrote HTBoundary_put_block.  Fixed many bugs+segfaults.
                     18: **   SV Jul 05  Fix double-counting of processed bytes.
2.12      frystyk    19: **
                     20: */
                     21: 
                     22: /* Library include files */
                     23: #include "wwwsys.h"
                     24: #include "WWWUtil.h"
                     25: #include "WWWCore.h"
2.13      frystyk    26: #include "HTMerge.h"
2.12      frystyk    27: #include "HTReqMan.h"
2.15      vbancrof   28: #include "HTNetMan.h"
                     29: #include "HTChannl.h"
2.12      frystyk    30: #include "HTBound.h"                                    /* Implemented here */
                     31: 
2.15      vbancrof   32: #define PUTBLOCK(b, l) (me->target ? (*me->target->isa->put_block)(me->target, b, l):HT_OK)
                     33: 
2.12      frystyk    34: #define PUTDEBUG(b, l) (*me->debug->isa->put_block)(me->debug, b, l)
                     35: #define FREE_TARGET    (*me->target->isa->_free)(me->target)
                     36: 
                     37: struct _HTStream {
                     38:     const HTStreamClass *      isa;
2.15      vbancrof   39:     HTNet *                      net;
2.12      frystyk    40:     HTStream *                 target;
                     41:     HTStream *                 orig_target;
                     42:     HTFormat                   format;
                     43:     HTStream *                 debug;            /* For preamble and epilog */
                     44:     HTRequest *                        request;
                     45:     char *                     boundary;
2.15      vbancrof   46: 
                     47:     BOOL                        keptcrlf;
                     48:     int                         (*state)(HTStream *, const char *, int);
                     49: 
                     50:     char                        *boundary_ptr;
                     51: 
2.12      frystyk    52: };
                     53: 
2.15      vbancrof   54: PRIVATE int HTBoundary_flush (HTStream * me);
                     55: 
2.12      frystyk    56: /* ------------------------------------------------------------------------- */
                     57: 
2.15      vbancrof   58: PRIVATE int start_of_line (HTStream * me, const char * b, int l);
                     59: PRIVATE int seen_dash (HTStream * me, const char * b, int l);
                     60: PRIVATE int seen_doubledash (HTStream * me, const char * b, int l);
                     61: PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l);
                     62: PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l);
                     63: PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l);
                     64: PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l);
                     65: PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l);
                     66: PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra);
                     67: PRIVATE int seen_nothing(HTStream * me, const char * b, int l);
                     68: PRIVATE int seen_cr(HTStream * me, const char * b, int l);
                     69: PRIVATE void process_boundary(HTStream *me, int isterminal);
                     70: 
                     71: #define UNUSED(l) (l=l)    /* Shut up about unused variables */
                     72: 
2.12      frystyk    73: PRIVATE int HTBoundary_put_block (HTStream * me, const char * b, int l)
                     74: {
2.15      vbancrof   75:        /*
                     76:        ** The HTBoundary object gets attached downstream of HTMime.
                     77:        ** The HTBoundary object creates another HTMime object downstream of
                     78:        ** the HTBoundary object.
                     79:        **
                     80:        ** When we push data downstream to the second HTBoundary object, it
                     81:        ** updates the bytes read count in the HTNet object.
                     82:        **
                     83:        ** When we return to the parent HTMime object, itupdates the
                     84:        ** bytes read count in the HTNet object again.  Oops.
                     85:        **
                     86:        ** Same thing happens with the consumed byte count.  We can prevent
                     87:        ** the consumed byte counts from being updated by temporary setting
                     88:        ** the input channel stream pointer to NULL, but for the byte counts
                     89:        ** we have to save them and restore them before existing.
                     90:        **
                     91:        ** This bug was discovered by chance when a multipart/partial response
                     92:        ** was partially received, and as a result of double-counting the
                     93:        ** real response got cut off (because HTMime thought that more bytes
                     94:        ** were processed than actually were, thus it processed only the
                     95:        ** partial count of the remaining bytes in the response).  When the
                     96:        ** multipart/partial response was received all at once this bug did
                     97:        ** not get triggered.
                     98:        */
                     99: 
                    100:        HTHost *host=HTNet_host(me->net);
                    101:        HTChannel *c=HTHost_channel(host);
                    102:        HTInputStream *i=HTChannel_input(c);
                    103: 
                    104:        long saveBytesRead=HTNet_bytesRead(me->net);
                    105:        long saveHeaderBytesRead=HTNet_headerBytesRead(me->net);
                    106: 
                    107:        if (i)
                    108:                HTChannel_setInput(c, NULL);
                    109: 
                    110:        HTTRACE(STREAM_TRACE, "Boundary: processing %d bytes\n" _ l);
                    111:        /* Main loop consumes all input */
                    112: 
                    113:        while (l)
                    114:        {
                    115:                int n= (*me->state)(me, b, l);
                    116: 
                    117:                if (n == 0)
                    118:                        return HT_ERROR;
                    119:                b += n;
                    120:                l -= n;
                    121:        }
                    122: 
                    123:        if (i)
                    124:                HTChannel_setInput(c, i);
                    125:        HTNet_setBytesRead(me->net, saveBytesRead);
                    126:        HTNet_setHeaderBytesRead(me->net, saveHeaderBytesRead);
                    127: 
                    128:        return HT_OK;
                    129: }
                    130: 
                    131: /*
                    132: ** Start of line, keptcrlf=YES if we've kept the preceding CRLF from downstream
                    133: ** and we'll pass it along if we decide that this is not a boundary delimiter.
                    134: */
                    135: 
                    136: PRIVATE int start_of_line (HTStream * me, const char * b, int l)
                    137: {
                    138:        if (*b != '-')
                    139:                return not_delimiter(me, b, l, 0);
                    140: 
                    141:        HTTRACE(STREAM_TRACE, "Boundary: start of line: input '-'\n");
                    142: 
                    143:        me->state= seen_dash;
                    144: 
                    145:        return 1;
                    146: }
                    147: 
                    148: /*
                    149: ** Line: -
                    150: */
                    151: 
                    152: PRIVATE int seen_dash (HTStream * me, const char * b, int l)
                    153: {
                    154:        if (*b != '-')
                    155:                return not_delimiter(me, b, l, 1);
                    156: 
                    157:        HTTRACE(STREAM_TRACE, "Boundary: start of line: input '--'\n");
                    158: 
                    159:        me->state= seen_doubledash;
                    160:        me->boundary_ptr=me->boundary;
                    161:        return 1;
                    162: }
                    163: 
                    164: /*
                    165: ** Line: --
                    166: */
                    167: 
                    168: PRIVATE int seen_doubledash (HTStream * me, const char * b, int l)
                    169: {
                    170:        me->state=seen_doubledash;
                    171: 
                    172:        if (*me->boundary_ptr)
                    173:        {
                    174:                if (*b != *me->boundary_ptr)
                    175:                {
                    176:                        return not_delimiter(me, b, l,
                    177:                                             me->boundary_ptr - me->boundary
                    178:                                             + 2);
2.12      frystyk   179:                }
2.15      vbancrof  180:                ++me->boundary_ptr;
                    181:                return 1;
                    182:        }
                    183: 
                    184:        /*
                    185:        ** Line: --delimiter
                    186:        */
                    187: 
                    188:        if (*b == '-')
                    189:        {
                    190:                HTTRACE(STREAM_TRACE,
                    191:                        "Boundary: start of line: input '--%s-'\n"
                    192:                        _ me->boundary);
                    193: 
                    194:                me->state=seen_delimiter_dash;
                    195:                return 1;
                    196:        }
                    197: 
                    198:        HTTRACE(STREAM_TRACE,
                    199:                "Boundary: Found: '--%s'\n" _ me->boundary);
                    200:        
                    201:        return seen_delimiter_nonterminal(me, b, l);
                    202: }
                    203: 
                    204: /*
                    205: ** Line: --delimiter
                    206: **
                    207: ** Waiting for CRLF.
                    208: */
                    209: 
                    210: 
                    211: PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l)
                    212: {
                    213:        UNUSED(l);
                    214: 
                    215:        me->state=seen_delimiter_nonterminal;
                    216:        if (*b == CR)
                    217:                me->state=seen_delimiter_nonterminal_CR;
                    218: 
                    219:        return 1;
                    220: }
                    221: 
                    222: /*
                    223: ** Line: --delimiter<CR>
                    224: */
                    225: 
                    226: PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l)
                    227: {
                    228:        HTTRACE(STREAM_TRACE,
                    229:                "Boundary: Found: '--%s<CR>'\n" _ me->boundary);
                    230:        
                    231:        if (*b != LF)
                    232:                return seen_delimiter_nonterminal(me, b, l);
                    233: 
                    234:        HTTRACE(STREAM_TRACE,
                    235:                "Boundary: Found: '--%s<CR><LF>'\n" _ me->boundary);
                    236:        
                    237:        process_boundary(me, NO);
                    238:        return 1;
                    239: }
                    240: 
                    241: /*
                    242: ** Line: --delimiter-
                    243: */
                    244: 
                    245: PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l)
                    246: {
                    247:        if (*b != '-')
                    248:                return seen_delimiter_nonterminal(me, b, l);
                    249: 
                    250:        HTTRACE(STREAM_TRACE,
                    251:                "Boundary: start of line: input '--%s--'\n"
                    252:                _ me->boundary);
                    253:        
                    254:        me->state=seen_delimiter_terminal;
                    255:        return 1;
                    256: }
                    257: 
                    258: /*
                    259: ** Line: --delimiter--
                    260: */
                    261: 
                    262: PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l)
                    263: {
                    264:        UNUSED(l);
                    265: 
                    266:        me->state=seen_delimiter_terminal;
                    267: 
                    268:        if (*b == CR)
                    269:                me->state=seen_delimiter_terminal_CR;
                    270:        return 1;
                    271: }
                    272: /*
                    273: ** Line: --delimiter--<CR>
                    274: */
                    275: 
                    276: PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l)
                    277: {
                    278:        HTTRACE(STREAM_TRACE,
                    279:                "Boundary: Found '--%s--<CR>'\n"
                    280:                _ me->boundary);
                    281:        
                    282:        if (*b != LF)
                    283:                return seen_delimiter_terminal(me, b, l);
                    284:        HTTRACE(STREAM_TRACE,
                    285:                "Boundary: Found '--%s--<CR><LF>'\n"
                    286:                _ me->boundary);
                    287:        
                    288:        process_boundary(me, YES);
                    289:        return 1;
                    290: }
                    291: 
                    292: /*
                    293: ** Beginning of the line does not contain a delimiter.
                    294: **
                    295: **
                    296: ** extra: Count of characters in a partially matched delimiter.  Since it's
                    297: ** not a delimiter this is content that needs to go downstream.
                    298: */
                    299: 
                    300: PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra)
                    301: {
                    302:        HTTRACE(STREAM_TRACE, "Boundary: not a delimiter line\n");
                    303:        
                    304:        if (me->keptcrlf)
                    305:        {
                    306:                HTTRACE(STREAM_TRACE, "Boundary: Sending previous line's <CR><LF>\n");
                    307:                /*
                    308:                ** Did not process CRLF from previous line, because prev CRLF
                    309:                ** is considered a part of the delimiter.  See MIME RFC.
                    310:                */
                    311: 
                    312:                me->keptcrlf=NO;
                    313:                if (PUTBLOCK("\r\n", 2) != HT_OK)
                    314:                        return 0;
                    315:        }
                    316: 
                    317:        /*
                    318:        ** Potentially matched some of: --DELIMITER
                    319:        */
                    320: 
                    321:        if (extra)
                    322:        {
                    323:                HTTRACE(STREAM_TRACE, "Boundary: Sending partially-matched %d characters\n" _ extra);
                    324: 
                    325:                if (PUTBLOCK("--", extra > 2 ? 2:extra) != HT_OK)
                    326:                        return 0;
                    327: 
                    328:                if (extra > 2)
                    329:                        if (PUTBLOCK(me->boundary, extra-2) != HT_OK)
                    330:                                return 0;
                    331:        }
                    332:        return seen_nothing(me, b, l);
                    333: }
                    334: 
                    335: /*
                    336: ** We're not looking for a delimiter.  Look for the next line of input
                    337: ** in the data that could potentially be a delimiter.
                    338: */
                    339: 
                    340: PRIVATE int seen_nothing(HTStream * me, const char * b, int l)
                    341: {
                    342:        int i;
                    343: 
                    344:        me->state=seen_nothing;
                    345: 
                    346:        for (i=0; i<l; i++)
                    347:        {
                    348:                if (b[i] != CR)
                    349:                        continue;
                    350: 
                    351:                /*
                    352:                ** If we have at least four more characters in unconsumed
                    353:                ** input, and they're not \r\n--, we can safely skip over
                    354:                ** them.
                    355:                */
                    356: 
                    357:                if (l-i > 4 &&
                    358:                    strncmp(b+i, "\r\n--", 4))
                    359:                        continue;
                    360:                break;
                    361:        }
                    362: 
                    363:        if (i == 0)
                    364:        {
                    365:                /* Could only be a CR here. */
                    366: 
                    367:                me->state=seen_cr;
                    368:                return 1;
                    369:        }
                    370: 
                    371:        HTTRACE(STREAM_TRACE, "Boundary: Processed %d (out of %d) bytes\n"
                    372:                _ i _ l);
                    373: 
                    374:        if (PUTBLOCK(b, i) != HT_OK)
                    375:                return 0;
                    376: 
                    377:        return i;
                    378: }
                    379: 
                    380: /*
                    381: ** State: seen a CR
                    382: */
                    383: 
                    384: PRIVATE int seen_cr(HTStream * me, const char * b, int l)
                    385: {
                    386:        HTTRACE(STREAM_TRACE, "Boundary: Processed <CR>\n");
                    387: 
                    388:        if (*b != LF)
                    389:        {
                    390:                HTTRACE(STREAM_TRACE, "Boundary: ... <LF> didn't follow\n");
                    391:                if (PUTBLOCK("\r", 1) != HT_OK)
                    392:                        return 0;
                    393:                return seen_nothing(me, b, l);
                    394:        }
                    395: 
                    396:        HTTRACE(STREAM_TRACE, "Boundary: Processed <CR><LF>\n");
                    397:        me->state=start_of_line;
                    398:        me->keptcrlf=YES;
                    399:        return 1;
                    400: }
                    401: 
                    402: PRIVATE void process_boundary(HTStream *me, int isterminal)
                    403: {
                    404:        HTBoundary_flush(me);
                    405:        if (me->target) FREE_TARGET;
                    406:        me->target=NULL;
                    407:        me->state=start_of_line;
                    408:        me->keptcrlf=NO;
                    409: 
                    410:        if (!isterminal)
2.12      frystyk   411:                me->target = HTStreamStack(WWW_MIME,me->format,
2.16    ! vbancrof  412:                                           HTMerge(me->orig_target, 1),
2.12      frystyk   413:                                           me->request, YES);
                    414: }
                    415: 
2.15      vbancrof  416: 
2.12      frystyk   417: PRIVATE int HTBoundary_put_string (HTStream * me, const char * s)
                    418: {
                    419:     return HTBoundary_put_block(me, s, (int) strlen(s));
                    420: }
                    421: 
                    422: PRIVATE int HTBoundary_put_character (HTStream * me, char c)
                    423: {
                    424:     return HTBoundary_put_block(me, &c, 1);
                    425: }
                    426: 
                    427: PRIVATE int HTBoundary_flush (HTStream * me)
                    428: {
2.15      vbancrof  429:        if (me->target == NULL)
                    430:                return HT_OK;
                    431:        return (*me->target->isa->flush)(me->target);
2.12      frystyk   432: }
                    433: 
                    434: PRIVATE int HTBoundary_free (HTStream * me)
                    435: {
                    436:     int status = HT_OK;
                    437:     if (me->target) {
                    438:        if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
                    439:            return HT_WOULD_BLOCK;
                    440:     }
2.14      frystyk   441:     HTTRACE(PROT_TRACE, "Boundary.... FREEING....\n");
2.12      frystyk   442:     HT_FREE(me->boundary);
                    443:     HT_FREE(me);
                    444:     return status;
                    445: }
                    446: 
                    447: PRIVATE int HTBoundary_abort (HTStream * me, HTList * e)
                    448: {
                    449:     int status = HT_ERROR;
                    450:     if (me->target) status = (*me->target->isa->abort)(me->target, e);
2.14      frystyk   451:     HTTRACE(PROT_TRACE, "Boundary.... ABORTING...\n");
2.12      frystyk   452:     HT_FREE(me->boundary);
                    453:     HT_FREE(me);
                    454:     return status;
                    455: }
                    456: 
                    457: PRIVATE const HTStreamClass HTBoundaryClass =
                    458: {              
                    459:     "HTBoundary",
                    460:     HTBoundary_flush,
                    461:     HTBoundary_free,
                    462:     HTBoundary_abort,
                    463:     HTBoundary_put_character,
                    464:     HTBoundary_put_string,
                    465:     HTBoundary_put_block
                    466: };
                    467: 
                    468: PUBLIC HTStream * HTBoundary   (HTRequest *    request,
                    469:                                void *          param,
                    470:                                HTFormat        input_format,
                    471:                                HTFormat        output_format,
                    472:                                HTStream *      output_stream)
                    473: {
                    474:     HTResponse * response = HTRequest_response(request);
                    475:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    476:     HTAssocList * type_param = response ?
                    477:        HTResponse_formatParam(response) :
                    478:        HTAnchor_formatParam(anchor);
                    479:     char * boundary = HTAssocList_findObject(type_param, "boundary");
2.15      vbancrof  480: 
                    481:     UNUSED(param);
                    482:     UNUSED(input_format);
                    483: 
2.12      frystyk   484:     if (boundary) {
                    485:        HTStream * me;
                    486:        if ((me = (HTStream  *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
                    487:            HT_OUTOFMEM("HTBoundary");
                    488:        me->isa = &HTBoundaryClass;
2.15      vbancrof  489:        me->net = HTRequest_net(request);
                    490:        me->request = request;
2.12      frystyk   491:        me->format = output_format;
                    492:        me->orig_target = output_stream;
                    493:        me->debug = HTRequest_debugStream(request);
2.15      vbancrof  494: 
                    495:        me->state = start_of_line;
                    496:        me->keptcrlf=NO;
                    497: 
2.12      frystyk   498:        StrAllocCopy(me->boundary, boundary);                  /* Local copy */
2.15      vbancrof  499: 
2.14      frystyk   500:        HTTRACE(STREAM_TRACE, "Boundary.... Stream created with boundary '%s\'\n" _ me->boundary);
2.12      frystyk   501:        return me;
                    502:     } else {
2.14      frystyk   503:        HTTRACE(STREAM_TRACE, "Boundary.... UNKNOWN boundary!\n");
2.12      frystyk   504:        return HTErrorStream();
                    505:     }
                    506: }

Webmaster