Annotation of libwww/Library/src/HTBound.c, revision 2.15

2.12      frystyk     1: /*                                                                  HTBound.c
                      2: **     MIME MULTIPART PARSER STREAM
                      3: **
                      4: **     (c) COPYRIGHT MIT 1995.
                      5: **     Please first read the full copyright statement in the file COPYRIGH.
2.15    ! vbancrof    6: **     @(#) $Id: HTBound.c,v 2.14 1999/02/22 22:10:10 frystyk Exp $
2.12      frystyk     7: **
                      8: **     This stream parses a MIME multipart stream and builds a set of new
                      9: **     streams via the stream stack each time we encounter a boundary start.
                     10: **     We get the boundary from the normal MIME parser via the Request object
                     11: **
                     12: ** Authors
                     13: **     HF      Henrik Frystyk <frystyk@w3.org>
                     14: **
                     15: ** History:
                     16: **     Nov 95  Written from scratch
2.15    ! vbancrof   17: **   SV Jun 05  Rewrote HTBoundary_put_block.  Fixed many bugs+segfaults.
        !            18: **   SV Jul 05  Fix double-counting of processed bytes.
2.12      frystyk    19: **
                     20: */
                     21: 
                     22: /* Library include files */
                     23: #include "wwwsys.h"
                     24: #include "WWWUtil.h"
                     25: #include "WWWCore.h"
2.13      frystyk    26: #include "HTMerge.h"
2.12      frystyk    27: #include "HTReqMan.h"
2.15    ! vbancrof   28: #include "HTNetMan.h"
        !            29: #include "HTChannl.h"
2.12      frystyk    30: #include "HTBound.h"                                    /* Implemented here */
                     31: 
2.15    ! vbancrof   32: #define PUTBLOCK(b, l) (me->target ? (*me->target->isa->put_block)(me->target, b, l):HT_OK)
        !            33: 
2.12      frystyk    34: #define PUTDEBUG(b, l) (*me->debug->isa->put_block)(me->debug, b, l)
                     35: #define FREE_TARGET    (*me->target->isa->_free)(me->target)
                     36: 
                     37: struct _HTStream {
                     38:     const HTStreamClass *      isa;
2.15    ! vbancrof   39:     HTNet *                      net;
2.12      frystyk    40:     HTStream *                 target;
                     41:     HTStream *                 orig_target;
                     42:     HTFormat                   format;
                     43:     HTStream *                 debug;            /* For preamble and epilog */
                     44:     HTRequest *                        request;
                     45:     char *                     boundary;
2.15    ! vbancrof   46: 
        !            47:     BOOL                        keptcrlf;
        !            48:     int                         (*state)(HTStream *, const char *, int);
        !            49: 
        !            50:     char                        *boundary_ptr;
        !            51: 
2.12      frystyk    52: };
                     53: 
2.15    ! vbancrof   54: PRIVATE int HTBoundary_flush (HTStream * me);
        !            55: 
2.12      frystyk    56: /* ------------------------------------------------------------------------- */
                     57: 
2.15    ! vbancrof   58: PRIVATE int start_of_line (HTStream * me, const char * b, int l);
        !            59: PRIVATE int seen_dash (HTStream * me, const char * b, int l);
        !            60: PRIVATE int seen_doubledash (HTStream * me, const char * b, int l);
        !            61: PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l);
        !            62: PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l);
        !            63: PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l);
        !            64: PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l);
        !            65: PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l);
        !            66: PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra);
        !            67: PRIVATE int seen_nothing(HTStream * me, const char * b, int l);
        !            68: PRIVATE int seen_cr(HTStream * me, const char * b, int l);
        !            69: PRIVATE void process_boundary(HTStream *me, int isterminal);
        !            70: 
        !            71: #define UNUSED(l) (l=l)    /* Shut up about unused variables */
        !            72: 
2.12      frystyk    73: PRIVATE int HTBoundary_put_block (HTStream * me, const char * b, int l)
                     74: {
2.15    ! vbancrof   75:        /*
        !            76:        ** The HTBoundary object gets attached downstream of HTMime.
        !            77:        ** The HTBoundary object creates another HTMime object downstream of
        !            78:        ** the HTBoundary object.
        !            79:        **
        !            80:        ** When we push data downstream to the second HTBoundary object, it
        !            81:        ** updates the bytes read count in the HTNet object.
        !            82:        **
        !            83:        ** When we return to the parent HTMime object, itupdates the
        !            84:        ** bytes read count in the HTNet object again.  Oops.
        !            85:        **
        !            86:        ** Same thing happens with the consumed byte count.  We can prevent
        !            87:        ** the consumed byte counts from being updated by temporary setting
        !            88:        ** the input channel stream pointer to NULL, but for the byte counts
        !            89:        ** we have to save them and restore them before existing.
        !            90:        **
        !            91:        ** This bug was discovered by chance when a multipart/partial response
        !            92:        ** was partially received, and as a result of double-counting the
        !            93:        ** real response got cut off (because HTMime thought that more bytes
        !            94:        ** were processed than actually were, thus it processed only the
        !            95:        ** partial count of the remaining bytes in the response).  When the
        !            96:        ** multipart/partial response was received all at once this bug did
        !            97:        ** not get triggered.
        !            98:        */
        !            99: 
        !           100:        HTHost *host=HTNet_host(me->net);
        !           101:        HTChannel *c=HTHost_channel(host);
        !           102:        HTInputStream *i=HTChannel_input(c);
        !           103: 
        !           104:        long saveBytesRead=HTNet_bytesRead(me->net);
        !           105:        long saveHeaderBytesRead=HTNet_headerBytesRead(me->net);
        !           106: 
        !           107:        if (i)
        !           108:                HTChannel_setInput(c, NULL);
        !           109: 
        !           110:        HTTRACE(STREAM_TRACE, "Boundary: processing %d bytes\n" _ l);
        !           111:        /* Main loop consumes all input */
        !           112: 
        !           113:        while (l)
        !           114:        {
        !           115:                int n= (*me->state)(me, b, l);
        !           116: 
        !           117:                if (n == 0)
        !           118:                        return HT_ERROR;
        !           119:                b += n;
        !           120:                l -= n;
        !           121:        }
        !           122: 
        !           123:        if (i)
        !           124:                HTChannel_setInput(c, i);
        !           125:        HTNet_setBytesRead(me->net, saveBytesRead);
        !           126:        HTNet_setHeaderBytesRead(me->net, saveHeaderBytesRead);
        !           127: 
        !           128:        return HT_OK;
        !           129: }
        !           130: 
        !           131: /*
        !           132: ** Start of line, keptcrlf=YES if we've kept the preceding CRLF from downstream
        !           133: ** and we'll pass it along if we decide that this is not a boundary delimiter.
        !           134: */
        !           135: 
        !           136: PRIVATE int start_of_line (HTStream * me, const char * b, int l)
        !           137: {
        !           138:        if (*b != '-')
        !           139:                return not_delimiter(me, b, l, 0);
        !           140: 
        !           141:        HTTRACE(STREAM_TRACE, "Boundary: start of line: input '-'\n");
        !           142: 
        !           143:        me->state= seen_dash;
        !           144: 
        !           145:        return 1;
        !           146: }
        !           147: 
        !           148: /*
        !           149: ** Line: -
        !           150: */
        !           151: 
        !           152: PRIVATE int seen_dash (HTStream * me, const char * b, int l)
        !           153: {
        !           154:        if (*b != '-')
        !           155:                return not_delimiter(me, b, l, 1);
        !           156: 
        !           157:        HTTRACE(STREAM_TRACE, "Boundary: start of line: input '--'\n");
        !           158: 
        !           159:        me->state= seen_doubledash;
        !           160:        me->boundary_ptr=me->boundary;
        !           161:        return 1;
        !           162: }
        !           163: 
        !           164: /*
        !           165: ** Line: --
        !           166: */
        !           167: 
        !           168: PRIVATE int seen_doubledash (HTStream * me, const char * b, int l)
        !           169: {
        !           170:        me->state=seen_doubledash;
        !           171: 
        !           172:        if (*me->boundary_ptr)
        !           173:        {
        !           174:                if (*b != *me->boundary_ptr)
        !           175:                {
        !           176:                        return not_delimiter(me, b, l,
        !           177:                                             me->boundary_ptr - me->boundary
        !           178:                                             + 2);
2.12      frystyk   179:                }
2.15    ! vbancrof  180:                ++me->boundary_ptr;
        !           181:                return 1;
        !           182:        }
        !           183: 
        !           184:        /*
        !           185:        ** Line: --delimiter
        !           186:        */
        !           187: 
        !           188:        if (*b == '-')
        !           189:        {
        !           190:                HTTRACE(STREAM_TRACE,
        !           191:                        "Boundary: start of line: input '--%s-'\n"
        !           192:                        _ me->boundary);
        !           193: 
        !           194:                me->state=seen_delimiter_dash;
        !           195:                return 1;
        !           196:        }
        !           197: 
        !           198:        HTTRACE(STREAM_TRACE,
        !           199:                "Boundary: Found: '--%s'\n" _ me->boundary);
        !           200:        
        !           201:        return seen_delimiter_nonterminal(me, b, l);
        !           202: }
        !           203: 
        !           204: /*
        !           205: ** Line: --delimiter
        !           206: **
        !           207: ** Waiting for CRLF.
        !           208: */
        !           209: 
        !           210: 
        !           211: PRIVATE int seen_delimiter_nonterminal(HTStream * me, const char * b, int l)
        !           212: {
        !           213:        UNUSED(l);
        !           214: 
        !           215:        me->state=seen_delimiter_nonterminal;
        !           216:        if (*b == CR)
        !           217:                me->state=seen_delimiter_nonterminal_CR;
        !           218: 
        !           219:        return 1;
        !           220: }
        !           221: 
        !           222: /*
        !           223: ** Line: --delimiter<CR>
        !           224: */
        !           225: 
        !           226: PRIVATE int seen_delimiter_nonterminal_CR(HTStream * me, const char * b, int l)
        !           227: {
        !           228:        HTTRACE(STREAM_TRACE,
        !           229:                "Boundary: Found: '--%s<CR>'\n" _ me->boundary);
        !           230:        
        !           231:        if (*b != LF)
        !           232:                return seen_delimiter_nonterminal(me, b, l);
        !           233: 
        !           234:        HTTRACE(STREAM_TRACE,
        !           235:                "Boundary: Found: '--%s<CR><LF>'\n" _ me->boundary);
        !           236:        
        !           237:        process_boundary(me, NO);
        !           238:        return 1;
        !           239: }
        !           240: 
        !           241: /*
        !           242: ** Line: --delimiter-
        !           243: */
        !           244: 
        !           245: PRIVATE int seen_delimiter_dash(HTStream * me, const char * b, int l)
        !           246: {
        !           247:        if (*b != '-')
        !           248:                return seen_delimiter_nonterminal(me, b, l);
        !           249: 
        !           250:        HTTRACE(STREAM_TRACE,
        !           251:                "Boundary: start of line: input '--%s--'\n"
        !           252:                _ me->boundary);
        !           253:        
        !           254:        me->state=seen_delimiter_terminal;
        !           255:        return 1;
        !           256: }
        !           257: 
        !           258: /*
        !           259: ** Line: --delimiter--
        !           260: */
        !           261: 
        !           262: PRIVATE int seen_delimiter_terminal(HTStream * me, const char * b, int l)
        !           263: {
        !           264:        UNUSED(l);
        !           265: 
        !           266:        me->state=seen_delimiter_terminal;
        !           267: 
        !           268:        if (*b == CR)
        !           269:                me->state=seen_delimiter_terminal_CR;
        !           270:        return 1;
        !           271: }
        !           272: /*
        !           273: ** Line: --delimiter--<CR>
        !           274: */
        !           275: 
        !           276: PRIVATE int seen_delimiter_terminal_CR(HTStream * me, const char * b, int l)
        !           277: {
        !           278:        HTTRACE(STREAM_TRACE,
        !           279:                "Boundary: Found '--%s--<CR>'\n"
        !           280:                _ me->boundary);
        !           281:        
        !           282:        if (*b != LF)
        !           283:                return seen_delimiter_terminal(me, b, l);
        !           284:        HTTRACE(STREAM_TRACE,
        !           285:                "Boundary: Found '--%s--<CR><LF>'\n"
        !           286:                _ me->boundary);
        !           287:        
        !           288:        process_boundary(me, YES);
        !           289:        return 1;
        !           290: }
        !           291: 
        !           292: /*
        !           293: ** Beginning of the line does not contain a delimiter.
        !           294: **
        !           295: **
        !           296: ** extra: Count of characters in a partially matched delimiter.  Since it's
        !           297: ** not a delimiter this is content that needs to go downstream.
        !           298: */
        !           299: 
        !           300: PRIVATE int not_delimiter(HTStream * me, const char * b, int l, int extra)
        !           301: {
        !           302:        HTTRACE(STREAM_TRACE, "Boundary: not a delimiter line\n");
        !           303:        
        !           304:        if (me->keptcrlf)
        !           305:        {
        !           306:                HTTRACE(STREAM_TRACE, "Boundary: Sending previous line's <CR><LF>\n");
        !           307:                /*
        !           308:                ** Did not process CRLF from previous line, because prev CRLF
        !           309:                ** is considered a part of the delimiter.  See MIME RFC.
        !           310:                */
        !           311: 
        !           312:                me->keptcrlf=NO;
        !           313:                if (PUTBLOCK("\r\n", 2) != HT_OK)
        !           314:                        return 0;
        !           315:        }
        !           316: 
        !           317:        /*
        !           318:        ** Potentially matched some of: --DELIMITER
        !           319:        */
        !           320: 
        !           321:        if (extra)
        !           322:        {
        !           323:                HTTRACE(STREAM_TRACE, "Boundary: Sending partially-matched %d characters\n" _ extra);
        !           324: 
        !           325:                if (PUTBLOCK("--", extra > 2 ? 2:extra) != HT_OK)
        !           326:                        return 0;
        !           327: 
        !           328:                if (extra > 2)
        !           329:                        if (PUTBLOCK(me->boundary, extra-2) != HT_OK)
        !           330:                                return 0;
        !           331:        }
        !           332:        return seen_nothing(me, b, l);
        !           333: }
        !           334: 
        !           335: /*
        !           336: ** We're not looking for a delimiter.  Look for the next line of input
        !           337: ** in the data that could potentially be a delimiter.
        !           338: */
        !           339: 
        !           340: PRIVATE int seen_nothing(HTStream * me, const char * b, int l)
        !           341: {
        !           342:        int i;
        !           343: 
        !           344:        me->state=seen_nothing;
        !           345: 
        !           346:        for (i=0; i<l; i++)
        !           347:        {
        !           348:                if (b[i] != CR)
        !           349:                        continue;
        !           350: 
        !           351:                /*
        !           352:                ** If we have at least four more characters in unconsumed
        !           353:                ** input, and they're not \r\n--, we can safely skip over
        !           354:                ** them.
        !           355:                */
        !           356: 
        !           357:                if (l-i > 4 &&
        !           358:                    strncmp(b+i, "\r\n--", 4))
        !           359:                        continue;
        !           360:                break;
        !           361:        }
        !           362: 
        !           363:        if (i == 0)
        !           364:        {
        !           365:                /* Could only be a CR here. */
        !           366: 
        !           367:                me->state=seen_cr;
        !           368:                return 1;
        !           369:        }
        !           370: 
        !           371:        HTTRACE(STREAM_TRACE, "Boundary: Processed %d (out of %d) bytes\n"
        !           372:                _ i _ l);
        !           373: 
        !           374:        if (PUTBLOCK(b, i) != HT_OK)
        !           375:                return 0;
        !           376: 
        !           377:        return i;
        !           378: }
        !           379: 
        !           380: /*
        !           381: ** State: seen a CR
        !           382: */
        !           383: 
        !           384: PRIVATE int seen_cr(HTStream * me, const char * b, int l)
        !           385: {
        !           386:        HTTRACE(STREAM_TRACE, "Boundary: Processed <CR>\n");
        !           387: 
        !           388:        if (*b != LF)
        !           389:        {
        !           390:                HTTRACE(STREAM_TRACE, "Boundary: ... <LF> didn't follow\n");
        !           391:                if (PUTBLOCK("\r", 1) != HT_OK)
        !           392:                        return 0;
        !           393:                return seen_nothing(me, b, l);
        !           394:        }
        !           395: 
        !           396:        HTTRACE(STREAM_TRACE, "Boundary: Processed <CR><LF>\n");
        !           397:        me->state=start_of_line;
        !           398:        me->keptcrlf=YES;
        !           399:        return 1;
        !           400: }
        !           401: 
        !           402: PRIVATE void process_boundary(HTStream *me, int isterminal)
        !           403: {
        !           404:        HTBoundary_flush(me);
        !           405:        if (me->target) FREE_TARGET;
        !           406:        me->target=NULL;
        !           407:        me->state=start_of_line;
        !           408:        me->keptcrlf=NO;
        !           409: 
        !           410:        if (!isterminal)
2.12      frystyk   411:                me->target = HTStreamStack(WWW_MIME,me->format,
                    412:                                           HTMerge(me->orig_target, 2),
                    413:                                           me->request, YES);
                    414: }
                    415: 
2.15    ! vbancrof  416: 
2.12      frystyk   417: PRIVATE int HTBoundary_put_string (HTStream * me, const char * s)
                    418: {
                    419:     return HTBoundary_put_block(me, s, (int) strlen(s));
                    420: }
                    421: 
                    422: PRIVATE int HTBoundary_put_character (HTStream * me, char c)
                    423: {
                    424:     return HTBoundary_put_block(me, &c, 1);
                    425: }
                    426: 
                    427: PRIVATE int HTBoundary_flush (HTStream * me)
                    428: {
2.15    ! vbancrof  429:        if (me->target == NULL)
        !           430:                return HT_OK;
        !           431:        return (*me->target->isa->flush)(me->target);
2.12      frystyk   432: }
                    433: 
                    434: PRIVATE int HTBoundary_free (HTStream * me)
                    435: {
                    436:     int status = HT_OK;
                    437:     if (me->target) {
                    438:        if ((status = (*me->target->isa->_free)(me->target)) == HT_WOULD_BLOCK)
                    439:            return HT_WOULD_BLOCK;
                    440:     }
2.14      frystyk   441:     HTTRACE(PROT_TRACE, "Boundary.... FREEING....\n");
2.12      frystyk   442:     HT_FREE(me->boundary);
                    443:     HT_FREE(me);
                    444:     return status;
                    445: }
                    446: 
                    447: PRIVATE int HTBoundary_abort (HTStream * me, HTList * e)
                    448: {
                    449:     int status = HT_ERROR;
                    450:     if (me->target) status = (*me->target->isa->abort)(me->target, e);
2.14      frystyk   451:     HTTRACE(PROT_TRACE, "Boundary.... ABORTING...\n");
2.12      frystyk   452:     HT_FREE(me->boundary);
                    453:     HT_FREE(me);
                    454:     return status;
                    455: }
                    456: 
                    457: PRIVATE const HTStreamClass HTBoundaryClass =
                    458: {              
                    459:     "HTBoundary",
                    460:     HTBoundary_flush,
                    461:     HTBoundary_free,
                    462:     HTBoundary_abort,
                    463:     HTBoundary_put_character,
                    464:     HTBoundary_put_string,
                    465:     HTBoundary_put_block
                    466: };
                    467: 
                    468: PUBLIC HTStream * HTBoundary   (HTRequest *    request,
                    469:                                void *          param,
                    470:                                HTFormat        input_format,
                    471:                                HTFormat        output_format,
                    472:                                HTStream *      output_stream)
                    473: {
                    474:     HTResponse * response = HTRequest_response(request);
                    475:     HTParentAnchor * anchor = HTRequest_anchor(request);
                    476:     HTAssocList * type_param = response ?
                    477:        HTResponse_formatParam(response) :
                    478:        HTAnchor_formatParam(anchor);
                    479:     char * boundary = HTAssocList_findObject(type_param, "boundary");
2.15    ! vbancrof  480: 
        !           481:     UNUSED(param);
        !           482:     UNUSED(input_format);
        !           483: 
2.12      frystyk   484:     if (boundary) {
                    485:        HTStream * me;
                    486:        if ((me = (HTStream  *) HT_CALLOC(1, sizeof(HTStream))) == NULL)
                    487:            HT_OUTOFMEM("HTBoundary");
                    488:        me->isa = &HTBoundaryClass;
2.15    ! vbancrof  489:        me->net = HTRequest_net(request);
        !           490:        me->request = request;
2.12      frystyk   491:        me->format = output_format;
                    492:        me->orig_target = output_stream;
                    493:        me->debug = HTRequest_debugStream(request);
2.15    ! vbancrof  494: 
        !           495:        me->state = start_of_line;
        !           496:        me->keptcrlf=NO;
        !           497: 
2.12      frystyk   498:        StrAllocCopy(me->boundary, boundary);                  /* Local copy */
2.15    ! vbancrof  499: 
2.14      frystyk   500:        HTTRACE(STREAM_TRACE, "Boundary.... Stream created with boundary '%s\'\n" _ me->boundary);
2.12      frystyk   501:        return me;
                    502:     } else {
2.14      frystyk   503:        HTTRACE(STREAM_TRACE, "Boundary.... UNKNOWN boundary!\n");
2.12      frystyk   504:        return HTErrorStream();
                    505:     }
                    506: }

Webmaster