Annotation of libwww/Library/src/HTFormat.c, revision 1.30

1.7       secret      1: 
1.1       timbl       2: /*             Manage different file formats                   HTFormat.c
                      3: **             =============================
                      4: **
                      5: ** Bugs:
                      6: **     Not reentrant.
                      7: **
                      8: **     Assumes the incoming stream is ASCII, rather than a local file
                      9: **     format, and so ALWAYS converts from ASCII on non-ASCII machines.
                     10: **     Therefore, non-ASCII machines can't read local files.
1.2       timbl      11: **
                     12: */
                     13: 
1.10      timbl      14: 
1.2       timbl      15: /* Implements:
1.1       timbl      16: */
1.2       timbl      17: #include "HTFormat.h"
                     18: 
                     19: PUBLIC float HTMaxSecs = 1e10;         /* No effective limit */
                     20: PUBLIC float HTMaxLength = 1e10;       /* No effective limit */
                     21: 
                     22: #ifdef unix
                     23: #ifdef NeXT
                     24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
                     25: #else
                     26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n" 
                     27:        /* Full pathname would be better! */
                     28: #endif
                     29: #endif
                     30: 
1.1       timbl      31: 
                     32: #include "HTUtils.h"
                     33: #include "tcp.h"
                     34: 
                     35: #include "HTML.h"
1.12      timbl      36: #include "HTMLPDTD.h"
1.1       timbl      37: #include "HText.h"
1.2       timbl      38: #include "HTAlert.h"
                     39: #include "HTList.h"
                     40: #include "HTInit.h"
                     41: /*     Streams and structured streams which we use:
                     42: */
                     43: #include "HTFWriter.h"
                     44: #include "HTPlain.h"
                     45: #include "SGML.h"
                     46: #include "HTML.h"
                     47: #include "HTMLGen.h"
                     48: 
                     49: PUBLIC BOOL HTOutputSource = NO;       /* Flag: shortcut parser to stdout */
                     50: 
1.10      timbl      51: #ifdef ORIGINAL
1.2       timbl      52: struct _HTStream {
                     53:       CONST HTStreamClass*     isa;
                     54:       /* ... */
                     55: };
1.10      timbl      56: #endif
                     57: 
                     58: /* this version used by the NetToText stream */
                     59: struct _HTStream {
                     60:        CONST HTStreamClass *           isa;
                     61:        BOOL                    had_cr;
                     62:        HTStream *              sink;
                     63: };
1.2       timbl      64: 
                     65: 
1.17      luotonen   66: /*
                     67: ** Accept-Encoding and Accept-Language
                     68: */
                     69: typedef struct _HTAcceptNode {
                     70:     HTAtom *   atom;
                     71:     float      quality;
                     72: } HTAcceptNode;
                     73: 
                     74: 
                     75: 
                     76: 
1.2       timbl      77: /*     Presentation methods
                     78: **     --------------------
                     79: */
                     80: 
1.14      timbl      81: PUBLIC HTList * HTConversions = NULL;
1.2       timbl      82: 
                     83: 
                     84: /*     Define a presentation system command for a content-type
                     85: **     -------------------------------------------------------
                     86: */
1.12      timbl      87: PUBLIC void HTSetPresentation ARGS6(
                     88:        HTList *,       conversions,
                     89:        CONST char *,   representation,
                     90:        CONST char *,   command,
                     91:        float,          quality,
                     92:        float,          secs, 
                     93:        float,          secs_per_byte
1.2       timbl      94: ){
                     95: 
                     96:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                     97:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                     98:     
                     99:     pres->rep = HTAtom_for(representation);
                    100:     pres->rep_out = WWW_PRESENT;               /* Fixed for now ... :-) */
                    101:     pres->converter = HTSaveAndExecute;                /* Fixed for now ...     */
                    102:     pres->quality = quality;
                    103:     pres->secs = secs;
                    104:     pres->secs_per_byte = secs_per_byte;
                    105:     pres->rep = HTAtom_for(representation);
                    106:     pres->command = 0;
                    107:     StrAllocCopy(pres->command, command);
                    108:     
1.12      timbl     109: /*    if (!HTPresentations) HTPresentations = HTList_new(); */
1.2       timbl     110:     
1.15      luotonen  111: #ifdef OLD_CODE
                    112:     if (strcmp(representation, "*")==0) {
1.2       timbl     113:         if (default_presentation) free(default_presentation);
                    114:        default_presentation = pres;
1.12      timbl     115:     } else 
                    116: #endif
                    117:     HTList_addObject(conversions, pres);
1.2       timbl     118: }
                    119: 
                    120: 
                    121: /*     Define a built-in function for a content-type
                    122: **     ---------------------------------------------
                    123: */
1.12      timbl     124: PUBLIC void HTSetConversion ARGS7(
                    125:        HTList *,       conversions,
                    126:        CONST char *,   representation_in,
                    127:        CONST char *,   representation_out,
1.6       timbl     128:        HTConverter*,   converter,
1.12      timbl     129:        float,          quality,
                    130:        float,          secs, 
                    131:        float,          secs_per_byte
1.2       timbl     132: ){
1.1       timbl     133: 
1.2       timbl     134:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                    135:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                    136:     
                    137:     pres->rep = HTAtom_for(representation_in);
                    138:     pres->rep_out = HTAtom_for(representation_out);
                    139:     pres->converter = converter;
                    140:     pres->command = NULL;              /* Fixed */
                    141:     pres->quality = quality;
                    142:     pres->secs = secs;
                    143:     pres->secs_per_byte = secs_per_byte;
                    144:     pres->command = 0;
                    145:     
1.12      timbl     146: /*    if (!HTPresentations) HTPresentations = HTList_new();  */
1.2       timbl     147:     
1.12      timbl     148: #ifdef OLD_CODE
1.2       timbl     149:     if (strcmp(representation_in, "*")==0) {
                    150:         if (default_presentation) free(default_presentation);
                    151:        default_presentation = pres;
1.12      timbl     152:     } else 
                    153: #endif
                    154:     HTList_addObject(conversions, pres);
1.2       timbl     155: }
1.1       timbl     156: 
                    157: 
                    158: 
1.17      luotonen  159: PUBLIC void HTAcceptEncoding ARGS3(HTList *,   list,
                    160:                                   char *,      enc,
                    161:                                   float,       quality)
                    162: {
                    163:     HTAcceptNode * node;
                    164:     char * cur;
                    165: 
                    166:     if (!list || !enc || !*enc) return;
                    167: 
                    168:     for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
                    169: 
                    170:     node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
                    171:     if (!node) outofmem(__FILE__, "HTAcceptEncoding");
                    172:     HTList_addObject(list, (void*)node);
                    173: 
                    174:     node->atom = HTAtom_for(enc);
                    175:     node->quality = quality;
                    176: }
                    177: 
                    178: 
                    179: PUBLIC void HTAcceptLanguage ARGS3(HTList *,   list,
                    180:                                   char *,      lang,
                    181:                                   float,       quality)
                    182: {
                    183:     HTAcceptNode * node;
                    184: 
                    185:     if (!list || !lang || !*lang) return;
                    186: 
                    187:     node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
                    188:     if (!node) outofmem(__FILE__, "HTAcceptLanguage");
                    189: 
                    190:     HTList_addObject(list, (void*)node);
                    191:     node->atom = HTAtom_for(lang);
                    192:     node->quality = quality;
                    193: }
                    194: 
                    195: 
                    196: PRIVATE BOOL wild_match ARGS2(HTAtom *,        template,
                    197:                              HTAtom *, actual)
                    198: {
                    199:     char *t, *a, *st, *sa;
                    200:     BOOL match = NO;
                    201: 
1.22      luotonen  202:     if (template && actual && (t = HTAtom_name(template))) {
                    203:        if (!strcmp(t, "*"))
                    204:            return YES;
1.17      luotonen  205: 
1.22      luotonen  206:        if (strchr(t, '*') &&
                    207:            (a = HTAtom_name(actual)) &&
                    208:            (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17      luotonen  209: 
1.22      luotonen  210:            *sa = 0;
                    211:            *st = 0;
                    212: 
                    213:            if ((*(st-1)=='*' &&
                    214:                 (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
                    215:                (*(st+1)=='*' && !strcasecomp(t,a)))
                    216:                match = YES;
                    217: 
                    218:            *sa = '/';
                    219:            *st = '/';
                    220:        }    
                    221:     }
1.23      luotonen  222:     return match;
1.17      luotonen  223: }
                    224: 
                    225: 
                    226: PRIVATE float type_value ARGS2(HTAtom *,       content_type,
                    227:                               HTList *,        accepted)
                    228: {
                    229:     HTList * cur = accepted;
                    230:     HTPresentation * pres;
                    231:     HTPresentation * wild = NULL;
                    232: 
                    233:     if (!content_type || !accepted) return -1;
                    234: 
                    235:     while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
                    236:        if (pres->rep == content_type)
                    237:            return pres->quality;
                    238:        else if (wild_match(pres->rep, content_type))
                    239:            wild = pres;
                    240:     }
                    241:     if (wild) return wild->quality;
                    242:     else return -1;
                    243: }
                    244: 
                    245: 
                    246: PRIVATE float lang_value ARGS2(HTAtom *,       language,
                    247:                               HTList *,        accepted)
                    248: {
                    249:     HTList * cur = accepted;
                    250:     HTAcceptNode * node;
                    251:     HTAcceptNode * wild = NULL;
                    252: 
                    253:     if (!language || !accepted || HTList_isEmpty(accepted)) {
                    254:        return 0.1;
                    255:     }
                    256: 
                    257:     while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
                    258:        if (node->atom == language) {
                    259:            return node->quality;
                    260:        }
                    261:        else if (wild_match(node->atom, language)) {
                    262:            wild = node;
                    263:        }
                    264:     }
                    265: 
                    266:     if (wild) {
                    267:        return wild->quality;
                    268:     }
                    269:     else {
                    270:        return 0.1;
                    271:     }
                    272: }
                    273: 
                    274: 
                    275: PRIVATE float encoding_value ARGS2(HTAtom *,   encoding,
                    276:                                   HTList *,    accepted)
                    277: {
                    278:     HTList * cur = accepted;
                    279:     HTAcceptNode * node;
                    280:     HTAcceptNode * wild = NULL;
                    281:     char * e;
                    282: 
                    283:     if (!encoding || !accepted || HTList_isEmpty(accepted))
                    284:        return 1;
                    285: 
                    286:     e = HTAtom_name(encoding);
                    287:     if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
                    288:        return 1;
                    289: 
                    290:     while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
                    291:        if (node->atom == encoding)
                    292:            return node->quality;
                    293:        else if (wild_match(node->atom, encoding))
                    294:            wild = node;
                    295:     }
                    296:     if (wild) return wild->quality;
                    297:     else return 1;
                    298: }
                    299: 
                    300: 
                    301: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
                    302:                         HTList *, accepted_content_types,
                    303:                         HTList *, accepted_languages,
                    304:                         HTList *, accepted_encodings)
                    305: {
                    306:     int accepted_cnt = 0;
                    307:     HTList * accepted;
                    308:     HTList * sorted;
                    309:     HTList * cur;
                    310:     HTContentDescription * d;
                    311: 
                    312:     if (!possibilities) return NO;
                    313: 
                    314:     accepted = HTList_new();
                    315:     cur = possibilities;
                    316:     while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
                    317:        float tv = type_value(d->content_type, accepted_content_types);
                    318:        float lv = lang_value(d->content_language, accepted_languages);
                    319:        float ev = encoding_value(d->content_encoding, accepted_encodings);
                    320: 
                    321: #ifdef ARI_DEBUG
                    322:        CTRACE(stderr,
                    323:               " ## FOR FILE \"%s\" (%.3f) VALUES type %.3f enc %.3f lang %.3f\n",
                    324:               d->filename, d->quality, tv, ev, lv);
                    325: #endif
                    326: 
                    327:        if (tv > 0) {
                    328:            d->quality *= tv * lv * ev;
                    329:            HTList_addObject(accepted, d);
                    330:            accepted_cnt++;
                    331:        }
1.18      luotonen  332:        else {
                    333:            if (d->filename) free(d->filename);
                    334:            free(d);
                    335:        }
1.17      luotonen  336:     }
                    337: 
1.18      luotonen  338:     CTRACE(stderr, "Ranking.....\n");
1.17      luotonen  339:     CTRACE(stderr,
1.18      luotonen  340:           "\nRANK QUALITY CONTENT-TYPE         LANGUAGE ENCODING    FILE\n");
1.17      luotonen  341: 
                    342:     sorted = HTList_new();
                    343:     while (accepted_cnt-- > 0) {
                    344:        HTContentDescription * worst = NULL;
                    345:        cur = accepted;
                    346:        while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
                    347:            if (!worst || d->quality < worst->quality)
                    348:                worst = d;
                    349:        }
                    350:        if (worst) {
                    351:            CTRACE(stderr, "%d.   %.4f  %-20.20s %-8.8s %-10.10s %s\n",
                    352:                   accepted_cnt+1,
                    353:                   worst->quality,
                    354:                   (worst->content_type
                    355:                         ? HTAtom_name(worst->content_type)      : "-"),
                    356:                   (worst->content_language
                    357:                         ? HTAtom_name(worst->content_language)  :"-"),
                    358:                   (worst->content_encoding
                    359:                         ? HTAtom_name(worst->content_encoding)  :"-"),
                    360:                   (worst->filename
                    361:                         ? worst->filename                       :"-"));
                    362:            HTList_removeObject(accepted, (void*)worst);
                    363:            HTList_addObject(sorted, (void*)worst);
                    364:        }
                    365:     }
1.18      luotonen  366:     CTRACE(stderr, "\n");
1.17      luotonen  367:     HTList_delete(accepted);
                    368:     HTList_delete(possibilities->next);
                    369:     possibilities->next = sorted->next;
                    370:     sorted->next = NULL;
                    371:     HTList_delete(sorted);
                    372: 
                    373:     if (!HTList_isEmpty(possibilities)) return YES;
                    374:     else return NO;
                    375: }
                    376: 
                    377: 
                    378: 
                    379: 
                    380: 
1.13      timbl     381: /*                     Socket Input Buffering
                    382: **                     ----------------------
1.1       timbl     383: **
1.13      timbl     384: **     This code is used because one cannot in general open a
                    385: **     file descriptor for a socket.
                    386: **
1.1       timbl     387: **     The input file is read using the macro which can read from
1.13      timbl     388: **     a socket or a file, but this should not be used for files
                    389: **     as fopen() etc is more portable of course.
                    390: **
1.1       timbl     391: **     The input buffer size, if large will give greater efficiency and
                    392: **     release the server faster, and if small will save space on PCs etc.
                    393: */
                    394: 
                    395: 
                    396: /*     Set up the buffering
                    397: **
                    398: **     These routines are public because they are in fact needed by
                    399: **     many parsers, and on PCs and Macs we should not duplicate
                    400: **     the static buffer area.
                    401: */
1.13      timbl     402: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1       timbl     403: {
1.28      frystyk   404:     HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13      timbl     405:     if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
                    406:     isoc->input_file_number = file_number;
                    407:     isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
                    408:     return isoc;
1.1       timbl     409: }
                    410: 
                    411: 
1.13      timbl     412: PUBLIC char HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1       timbl     413: {
                    414:     char ch;
                    415:     do {
1.13      timbl     416:        if (isoc-> input_pointer >= isoc->input_limit) {
1.1       timbl     417:            int status = NETREAD(
1.13      timbl     418:                   isoc->input_file_number,
                    419:                   isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1       timbl     420:            if (status <= 0) {
                    421:                if (status == 0) return (char)EOF;
                    422:                if (TRACE) fprintf(stderr,
                    423:                    "HTFormat: File read error %d\n", status);
                    424:                return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
                    425:            }
1.13      timbl     426:            isoc-> input_pointer = isoc->input_buffer;
                    427:            isoc->input_limit = isoc->input_buffer + status;
1.1       timbl     428:        }
1.13      timbl     429:        ch = *isoc-> input_pointer++;
1.1       timbl     430:     } while (ch == (char) 13); /* Ignore ASCII carriage return */
                    431:     
                    432:     return FROMASCII(ch);
                    433: }
                    434: 
1.17      luotonen  435: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13      timbl     436: {
                    437:     if (me) free(me);
                    438: }
                    439: 
                    440: 
1.16      luotonen  441: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*,     isoc,
                    442:                                           int *,               len)
                    443: {
                    444:     if (isoc->input_pointer >= isoc->input_limit) {
                    445:        int status = NETREAD(isoc->input_file_number,
                    446:                             isoc->input_buffer,
                    447:                             ((*len < INPUT_BUFFER_SIZE) ?
                    448:                              *len : INPUT_BUFFER_SIZE));
                    449:        if (status <= 0) {
                    450:            isoc->input_limit = isoc->input_buffer;
                    451:            if (status < 0)
                    452:                CTRACE(stderr, "HTInputSocket: File read error %d\n", status);
                    453:            *len = 0;
                    454:            return NULL;
                    455:        }
                    456:        else {
                    457:            *len = status;
                    458:            return isoc->input_buffer;
                    459:        }
                    460:     }
                    461:     else {
                    462:        char * ret = isoc->input_pointer;
                    463:        *len = isoc->input_limit - isoc->input_pointer;
                    464:        isoc->input_pointer = isoc->input_limit;
                    465:        return ret;
                    466:     }
                    467: }
                    468: 
                    469: 
1.15      luotonen  470: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
                    471: {
                    472:     if (isoc) {
                    473:        int status;
                    474: 
                    475:        isoc->input_pointer = isoc->input_buffer;
                    476:        status = NETREAD(isoc->input_file_number,
                    477:                         isoc->input_buffer,
                    478:                         INPUT_BUFFER_SIZE);
                    479:        if (status <= 0) {
                    480:            isoc->input_limit = isoc->input_buffer;
                    481:            if (status < 0)
                    482:                if (TRACE) fprintf(stderr,
                    483:                                   "HTInputSocket: File read error %d\n",
                    484:                                   status);
                    485:        }
                    486:        else 
                    487:            isoc->input_limit = isoc->input_buffer + status;
                    488:        return status;
                    489:     }
                    490:     return -1;
                    491: }
                    492: 
                    493: 
                    494: PRIVATE void ascii_cat ARGS3(char **,  linep,
                    495:                             char *,    start,
                    496:                             char *,    end)
                    497: {
                    498:     if (linep && start && end && start <= end) {
                    499:        char *ptr;
                    500: 
                    501:        if (*linep) {
                    502:            int len = strlen(*linep);
                    503:            *linep = (char*)realloc(*linep, len + end-start + 1);
                    504:            ptr = *linep + len;
                    505:        }
                    506:        else {
                    507:            ptr = *linep = (char*)malloc(end-start + 1);
                    508:        }
                    509: 
                    510:        while (start < end) {
                    511:            *ptr = FROMASCII(*start);
                    512:            ptr++;
                    513:            start++;
                    514:        }
                    515:        *ptr = 0;
                    516:     }
                    517: }
                    518: 
                    519: 
                    520: PRIVATE char * get_some_line ARGS2(HTInputSocket *,    isoc,
                    521:                                   BOOL,                unfold)
                    522: {
                    523:     if (!isoc)
                    524:        return NULL;
                    525:     else {
                    526:        BOOL check_unfold = NO;
                    527:        int prev_cr = 0;
                    528:        char *start = isoc->input_pointer;
                    529:        char *cur = isoc->input_pointer;
                    530:        char * line = NULL;
                    531: 
                    532:        for(;;) {
                    533:            /*
                    534:            ** Get more if needed to complete line
                    535:            */
                    536:            if (cur >= isoc->input_limit) { /* Need more data */
                    537:                ascii_cat(&line, start, cur);
                    538:                if (fill_in_buffer(isoc) <= 0)
                    539:                    return line;
                    540:                start = cur = isoc->input_pointer;
                    541:            } /* if need more data */
                    542: 
                    543:            /*
                    544:            ** Find a line feed if there is one
                    545:            */
                    546:            for(; cur < isoc->input_limit; cur++) {
                    547:                char c = FROMASCII(*cur);
                    548:                if (!c) {
1.18      luotonen  549:                    if (line) free(line);       /* Leak fixed AL 6 Feb 94 */
1.15      luotonen  550:                    return NULL;        /* Panic! read a 0! */
                    551:                }
                    552:                if (check_unfold  &&  c != ' '  &&  c != '\t') {
                    553:                    return line;  /* Note: didn't update isoc->input_pointer */
                    554:                }
                    555:                else {
                    556:                    check_unfold = NO;
                    557:                }
                    558: 
                    559:                if (c=='\r') {
                    560:                    prev_cr = 1;
                    561:                }
                    562:                else {
                    563:                    if (c=='\n') {              /* Found a line feed */
                    564:                        ascii_cat(&line, start, cur-prev_cr);
                    565:                        start = isoc->input_pointer = cur+1;
                    566: 
                    567:                        if (line && strlen(line) > 0 && unfold) {
                    568:                            check_unfold = YES;
                    569:                        }
                    570:                        else {
                    571:                            return line;
                    572:                        }
                    573:                    } /* if NL */
                    574:                    /* else just a regular character */
                    575:                    prev_cr = 0;
                    576:                } /* if not CR */
                    577:            } /* while characters in buffer remain */
                    578:        } /* until line read or end-of-file */
                    579:     } /* valid parameters to function */
                    580: }
                    581: 
                    582: 
                    583: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
                    584: {
                    585:     return get_some_line(isoc, NO);
                    586: }
                    587: 
                    588: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
                    589: {
                    590:     return get_some_line(isoc, YES);
                    591: }
                    592: 
                    593: 
                    594: /*
                    595: ** Read HTTP status line (if there is one).
                    596: **
                    597: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
                    598: ** First look at the stub in ASCII and check if it starts "HTTP/".
                    599: **
                    600: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
                    601: **      will be taken as a HTTP 1.0 server.  Failure.
                    602: */
                    603: #define STUB_LENGTH 20
                    604: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
                    605: {
                    606:     if (!isoc) {
                    607:        return NULL;
                    608:     }
                    609:     else {
                    610:        char buf[STUB_LENGTH + 1];
                    611:        int i;
                    612:        char server_version[STUB_LENGTH+1];
                    613:        int server_status;
                    614: 
                    615:        /*
                    616:        ** Read initial buffer
                    617:        */
                    618:        if (isoc->input_pointer >= isoc->input_limit &&
                    619:            fill_in_buffer(isoc) <= 0) {
                    620:            return NULL;
                    621:         }
                    622: 
                    623:        for (i=0; i < STUB_LENGTH; i++)
                    624:            buf[i] = FROMASCII(isoc->input_buffer[i]);
                    625:        buf[STUB_LENGTH] = 0;
                    626: 
                    627:        if (0 != strncmp(buf, "HTTP/", 5) ||
                    628:            sscanf(buf, "%20s%d", server_version, &server_status) < 2)
                    629:            return NULL;
                    630:        else
                    631:            return get_some_line(isoc, NO);
                    632:     }
                    633: }
                    634: 
                    635: 
                    636: /*
                    637: ** Do heuristic test to see if this is binary.
                    638: **
                    639: ** We check for characters above 128 in the first few bytes, and
                    640: ** if we find them we forget the html default.
                    641: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
                    642: **
                    643: ** Bugs: An HTTP 0.9 server returning a binary document with
                    644: **      characters < 128 will be read as ASCII.
                    645: */
                    646: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
                    647: {
                    648:     if (isoc &&
                    649:        (isoc->input_pointer < isoc->input_limit ||
                    650:         fill_in_buffer(isoc) > 0)) {
                    651:        char *p = isoc->input_buffer;
                    652:        int i = STUB_LENGTH;
                    653: 
                    654:        for( ; i && p < isoc->input_limit; p++, i++)
                    655:            if (((int)*p)&128)
                    656:                return YES;
                    657:     }
                    658:     return NO;
                    659: }
                    660: 
                    661: 
                    662: 
1.1       timbl     663: /*     Stream the data to an ouput file as binary
                    664: */
1.13      timbl     665: PUBLIC int HTOutputBinary ARGS3( HTInputSocket *, isoc,
                    666:                                int,            input,
                    667:                                FILE *,         output)
1.1       timbl     668: {
                    669:     do {
                    670:            int status = NETREAD(
1.13      timbl     671:                    input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1       timbl     672:            if (status <= 0) {
                    673:                if (status == 0) return 0;
                    674:                if (TRACE) fprintf(stderr,
                    675:                    "HTFormat: File read error %d\n", status);
                    676:                return 2;                       /* Error */
                    677:            }
1.13      timbl     678:            fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1       timbl     679:     } while (YES);
                    680: }
                    681: 
                    682: 
1.17      luotonen  683: 
1.2       timbl     684: /*             Create a filter stack
                    685: **             ---------------------
                    686: **
1.7       secret    687: **     If a wildcard match is made, a temporary HTPresentation
1.2       timbl     688: **     structure is made to hold the destination format while the
                    689: **     new stack is generated. This is just to pass the out format to
                    690: **     MIME so far.  Storing the format of a stream in the stream might
                    691: **     be a lot neater.
1.10      timbl     692: **
1.29      frystyk   693: **     The star/star format is special, in that if you can take
1.10      timbl     694: **     that you can take anything. However, we
1.2       timbl     695: */
1.12      timbl     696: PUBLIC HTStream * HTStreamStack ARGS2(
1.10      timbl     697:        HTFormat,               rep_in,
1.12      timbl     698:        HTRequest *,            request)
1.2       timbl     699: {
1.12      timbl     700:     HTFormat rep_out = request->output_format; /* Could be a param */
1.14      timbl     701:     HTList * conversion[2];
                    702:     int which_list;
1.25      frystyk   703:     float best_quality = -1e30;                /* Pretty bad! */
1.29      frystyk   704:     HTPresentation *pres, *match, *best_match=0;
1.14      timbl     705:     
1.2       timbl     706:     if (TRACE) fprintf(stderr,
                    707:        "HTFormat: Constructing stream stack for %s to %s\n",
1.10      timbl     708:        HTAtom_name(rep_in),    
1.2       timbl     709:        HTAtom_name(rep_out));
                    710:                
1.21      luotonen  711:     if (rep_out == WWW_SOURCE || rep_out == rep_in)
                    712:        return request->output_stream;
1.2       timbl     713: 
1.14      timbl     714:     conversion[0] = request->conversions;
                    715:     conversion[1] = HTConversions;
1.17      luotonen  716: 
1.15      luotonen  717:     for(which_list = 0; which_list<2; which_list++) {
                    718:        HTList * cur = conversion[which_list];
                    719:        
                    720:        while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25      frystyk   721:            if  ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
                    722:                (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
                    723:                if (pres->quality > best_quality) {
                    724:                    best_match = pres;
                    725:                    best_quality = pres->quality;
1.10      timbl     726:                }
                    727:            }
1.25      frystyk   728:            
1.29      frystyk   729: #ifdef OLD_CODE
                    730:            /* This case is now included in the best_match loop */
1.25      frystyk   731:            /* Special case when input format is 'www/source' */ 
1.10      timbl     732:            if (pres->rep == source) {
1.29      frystyk   733:                if (pres->rep_out == rep_out ||
                    734:                    wild_match(pres->rep_out, rep_out))
1.10      timbl     735:                    source_match = pres;
1.2       timbl     736:            }
1.29      frystyk   737: #endif
1.2       timbl     738:        }
                    739:     }
1.29      frystyk   740:     match = best_match ? best_match : NULL;
                    741:     if (match) {
                    742:        if (match->rep == WWW_SOURCE) {
                    743:            if (TRACE) fprintf(stderr,
                    744:            "HTFormat: Don't know how to handle this, so put out %s to %s\n",
                    745:                               HTAtom_name(match->rep), 
                    746:                               HTAtom_name(rep_out));
                    747:        }
                    748:        return (*match->converter)(
1.25      frystyk   749:        request, match->command, rep_in, rep_out,
                    750:        request->output_stream);
1.29      frystyk   751:     }
1.2       timbl     752:     return NULL;
                    753: }
                    754:        
                    755: 
                    756: /*             Find the cost of a filter stack
                    757: **             -------------------------------
                    758: **
                    759: **     Must return the cost of the same stack which StreamStack would set up.
                    760: **
                    761: ** On entry,
                    762: **     length  The size of the data to be converted
                    763: */
1.12      timbl     764: PUBLIC float HTStackValue ARGS5(
1.14      timbl     765:        HTList *,               theseConversions,
1.10      timbl     766:        HTFormat,               rep_in,
1.2       timbl     767:        HTFormat,               rep_out,
                    768:        float,                  initial_value,
                    769:        long int,               length)
                    770: {
1.14      timbl     771:     int which_list;
                    772:     HTList* conversion[2];
                    773:     
1.2       timbl     774:     if (TRACE) fprintf(stderr,
                    775:        "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10      timbl     776:        HTAtom_name(rep_in),    initial_value,
1.2       timbl     777:        HTAtom_name(rep_out));
                    778:                
                    779:     if (rep_out == WWW_SOURCE ||
1.10      timbl     780:        rep_out == rep_in) return 0.0;
1.2       timbl     781: 
1.12      timbl     782:  /*   if (!HTPresentations) HTFormatInit();     set up the list */
1.2       timbl     783:     
1.14      timbl     784:     conversion[0] = theseConversions;
                    785:     conversion[1] = HTConversions;
                    786:     
                    787:     for(which_list = 0; which_list<2; which_list++)
                    788:      if (conversion[which_list]) {
1.15      luotonen  789:         HTList * cur = conversion[which_list];
1.2       timbl     790:        HTPresentation * pres;
1.15      luotonen  791:        while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
                    792:            if (pres->rep == rep_in &&
1.17      luotonen  793:                (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2       timbl     794:                float value = initial_value * pres->quality;
                    795:                if (HTMaxSecs != 0.0)
1.15      luotonen  796:                    value = value - (length*pres->secs_per_byte + pres->secs)
1.2       timbl     797:                                         /HTMaxSecs;
                    798:                return value;
                    799:            }
                    800:        }
                    801:     }
                    802:     
                    803:     return -1e30;              /* Really bad */
1.17      luotonen  804: }
                    805: 
                    806: 
1.2       timbl     807: 
1.1       timbl     808: 
1.2       timbl     809: /*     Push data from a socket down a stream
                    810: **     -------------------------------------
1.1       timbl     811: **
1.2       timbl     812: **   This routine is responsible for creating and PRESENTING any
1.1       timbl     813: **   graphic (or other) objects described by the file.
1.2       timbl     814: **
                    815: **   The file number given is assumed to be a TELNET stream ie containing
                    816: **   CRLF at the end of lines which need to be stripped to LF for unix
                    817: **   when the format is textual.
                    818: **
1.26      luotonen  819: **   RETURNS the number of bytes transferred.
                    820: **
1.1       timbl     821: */
1.26      luotonen  822: PUBLIC int HTCopy ARGS2(
1.2       timbl     823:        int,                    file_number,
                    824:        HTStream*,              sink)
1.1       timbl     825: {
1.2       timbl     826:     HTStreamClass targetClass;    
1.13      timbl     827:     HTInputSocket * isoc;
1.26      luotonen  828:     int cnt = 0;
                    829: 
1.5       timbl     830: /*     Push the data down the stream
1.2       timbl     831: **
                    832: */
                    833:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
1.13      timbl     834:     isoc = HTInputSocket_new(file_number);
1.2       timbl     835:     
                    836:     /* Push binary from socket down sink
1.10      timbl     837:     **
                    838:     **         This operation could be put into a main event loop
1.2       timbl     839:     */
                    840:     for(;;) {
                    841:        int status = NETREAD(
1.13      timbl     842:                file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2       timbl     843:        if (status <= 0) {
                    844:            if (status == 0) break;
                    845:            if (TRACE) fprintf(stderr,
1.24      luotonen  846:                "HTFormat: Read error, read returns %d with errno=%d\n",
                    847:                status, errno);
1.2       timbl     848:            break;
                    849:        }
1.26      luotonen  850: 
1.8       timbl     851: #ifdef NOT_ASCII
                    852:        {
                    853:            char * p;
1.13      timbl     854:            for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8       timbl     855:                *p = FROMASCII(*p);
                    856:            }
                    857:        }
                    858: #endif
                    859: 
1.13      timbl     860:        (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26      luotonen  861:        cnt += status;
1.2       timbl     862:     } /* next bufferload */
1.26      luotonen  863: 
1.13      timbl     864:     HTInputSocket_free(isoc);
1.26      luotonen  865: 
                    866:     return cnt;
1.2       timbl     867: }
                    868: 
1.1       timbl     869: 
1.7       secret    870: 
                    871: /*     Push data from a file pointer down a stream
                    872: **     -------------------------------------
                    873: **
                    874: **   This routine is responsible for creating and PRESENTING any
                    875: **   graphic (or other) objects described by the file.
                    876: **
                    877: **
                    878: */
                    879: PUBLIC void HTFileCopy ARGS2(
                    880:        FILE *,                 fp,
                    881:        HTStream*,              sink)
                    882: {
                    883:     HTStreamClass targetClass;    
1.13      timbl     884:     char input_buffer[INPUT_BUFFER_SIZE];
1.7       secret    885:     
                    886: /*     Push the data down the stream
                    887: **
                    888: */
                    889:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    890:     
                    891:     /* Push binary from socket down sink
                    892:     */
                    893:     for(;;) {
                    894:        int status = fread(
                    895:               input_buffer, 1, INPUT_BUFFER_SIZE, fp);
                    896:        if (status == 0) { /* EOF or error */
                    897:            if (ferror(fp) == 0) break;
                    898:            if (TRACE) fprintf(stderr,
                    899:                "HTFormat: Read error, read returns %d\n", ferror(fp));
                    900:            break;
                    901:        }
                    902:        (*targetClass.put_block)(sink, input_buffer, status);
1.13      timbl     903:     } /* next bufferload */    
1.7       secret    904: }
                    905: 
                    906: 
                    907: 
                    908: 
1.2       timbl     909: /*     Push data from a socket down a stream STRIPPING CR
                    910: **     --------------------------------------------------
                    911: **
                    912: **   This routine is responsible for creating and PRESENTING any
1.8       timbl     913: **   graphic (or other) objects described by the socket.
1.2       timbl     914: **
                    915: **   The file number given is assumed to be a TELNET stream ie containing
                    916: **   CRLF at the end of lines which need to be stripped to LF for unix
                    917: **   when the format is textual.
                    918: **
1.1       timbl     919: */
1.2       timbl     920: PUBLIC void HTCopyNoCR ARGS2(
                    921:        int,                    file_number,
                    922:        HTStream*,              sink)
                    923: {
1.13      timbl     924:     HTStreamClass targetClass;
                    925:     HTInputSocket * isoc;   
1.1       timbl     926:     
1.2       timbl     927: /*     Push the data, ignoring CRLF, down the stream
                    928: **
                    929: */
                    930:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    931: 
                    932: /*     Push text from telnet socket down sink
                    933: **
                    934: **     @@@@@ To push strings could be faster? (especially is we
                    935: **     cheat and don't ignore CR! :-}
                    936: */  
1.13      timbl     937:     isoc = HTInputSocket_new(file_number);
1.2       timbl     938:     for(;;) {
                    939:        char character;
1.13      timbl     940:        character = HTInputSocket_getCharacter(isoc);
1.2       timbl     941:        if (character == (char)EOF) break;
                    942:        (*targetClass.put_character)(sink, character);           
                    943:     }
1.13      timbl     944:     HTInputSocket_free(isoc);
1.2       timbl     945: }
1.1       timbl     946: 
1.2       timbl     947: 
1.7       secret    948: 
1.2       timbl     949: /*     Parse a socket given format and file number
                    950: **
                    951: **   This routine is responsible for creating and PRESENTING any
                    952: **   graphic (or other) objects described by the file.
                    953: **
                    954: **   The file number given is assumed to be a TELNET stream ie containing
                    955: **   CRLF at the end of lines which need to be stripped to LF for unix
                    956: **   when the format is textual.
                    957: **
                    958: */
1.14      timbl     959: 
1.12      timbl     960: PUBLIC int HTParseSocket ARGS3(
1.10      timbl     961:        HTFormat,               rep_in,
1.2       timbl     962:        int,                    file_number,
1.12      timbl     963:        HTRequest *,            request)
1.2       timbl     964: {
                    965:     HTStream * stream;
                    966:     HTStreamClass targetClass;    
1.1       timbl     967: 
1.12      timbl     968:     stream = HTStreamStack(rep_in, request);
1.29      frystyk   969: 
1.2       timbl     970:     if (!stream) {
1.30    ! frystyk   971:        char buffer[1024];      /* @@@@@@@@ */
1.2       timbl     972:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12      timbl     973:                HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.30    ! frystyk   974:        if (TRACE) fprintf(stderr, "HTFormat(in HTParseSocket): %s\n", buffer);
1.16      luotonen  975:         return HTLoadError(request, 501, buffer);
1.2       timbl     976:     }
1.1       timbl     977:     
1.3       timbl     978: /*     Push the data, ignoring CRLF if necessary, down the stream
                    979: **
1.2       timbl     980: **
1.3       timbl     981: **   @@  Bug:  This decision ought to be made based on "encoding"
1.9       timbl     982: **   rather than on format.  @@@  When we handle encoding.
1.3       timbl     983: **   The current method smells anyway.
1.2       timbl     984: */
                    985:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
1.10      timbl     986:     if (rep_in == WWW_BINARY || HTOutputSource
1.26      luotonen  987:        || (request->content_encoding &&
                    988:            request->content_encoding != HTAtom_for("8bit") &&
                    989:            request->content_encoding != HTAtom_for("7bit"))
1.10      timbl     990:         || strstr(HTAtom_name(rep_in), "image/")
                    991:        || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29      frystyk   992:        HTCopy(file_number, stream);
1.2       timbl     993:     } else {   /* ascii text with CRLFs :-( */
                    994:         HTCopyNoCR(file_number, stream);
                    995:     }
1.7       secret    996:     (*targetClass.free)(stream);
                    997:     
                    998:     return HT_LOADED;
                    999: }
                   1000: 
                   1001: 
                   1002: 
                   1003: /*     Parse a file given format and file pointer
                   1004: **
                   1005: **   This routine is responsible for creating and PRESENTING any
                   1006: **   graphic (or other) objects described by the file.
                   1007: **
                   1008: **   The file number given is assumed to be a TELNET stream ie containing
1.10      timbl    1009: **   CRLF at the end of lines which need to be stripped to \n for unix
1.7       secret   1010: **   when the format is textual.
                   1011: **
                   1012: */
1.12      timbl    1013: PUBLIC int HTParseFile ARGS3(
1.10      timbl    1014:        HTFormat,               rep_in,
1.7       secret   1015:        FILE *,                 fp,
1.12      timbl    1016:        HTRequest *,            request)
1.7       secret   1017: {
                   1018:     HTStream * stream;
                   1019:     HTStreamClass targetClass;    
                   1020: 
1.12      timbl    1021:     stream = HTStreamStack(rep_in, request);
1.7       secret   1022:     
                   1023:     if (!stream) {
1.30    ! frystyk  1024:        char buffer[1024];      /* @@@@@@@@ */
1.7       secret   1025:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12      timbl    1026:                HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.7       secret   1027:        if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
1.29      frystyk  1028:        return HTLoadError(request, 501, buffer);
1.7       secret   1029:     }
                   1030:     
1.9       timbl    1031: /*     Push the data down the stream
1.7       secret   1032: **
                   1033: **
                   1034: **   @@  Bug:  This decision ought to be made based on "encoding"
1.10      timbl    1035: **   rather than on content-type.  @@@  When we handle encoding.
1.7       secret   1036: **   The current method smells anyway.
                   1037: */
                   1038:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
                   1039:     HTFileCopy(fp, stream);
1.2       timbl    1040:     (*targetClass.free)(stream);
1.1       timbl    1041:     
1.2       timbl    1042:     return HT_LOADED;
1.1       timbl    1043: }
1.2       timbl    1044: 
1.10      timbl    1045: 
                   1046: /*     Converter stream: Network Telnet to internal character text
                   1047: **     -----------------------------------------------------------
                   1048: **
                   1049: **     The input is assumed to be in ASCII, with lines delimited
                   1050: **     by (13,10) pairs, These pairs are converted into (CR,LF)
                   1051: **     pairs in the local representation.  The (CR,LF) sequence
                   1052: **     when found is changed to a '\n' character, the internal
                   1053: **     C representation of a new line.
                   1054: */
                   1055: 
                   1056: 
1.11      timbl    1057: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10      timbl    1058: {
                   1059:     char c = FROMASCII(net_char);
                   1060:     if (me->had_cr) {
                   1061:         if (c==LF) {
                   1062:            me->sink->isa->put_character(me->sink, '\n');       /* Newline */
                   1063:            me->had_cr = NO;
                   1064:            return;
                   1065:         } else {
                   1066:            me->sink->isa->put_character(me->sink, CR); /* leftover */
                   1067:        }
                   1068:     }
                   1069:     me->had_cr = (c==CR);
                   1070:     if (!me->had_cr)
                   1071:        me->sink->isa->put_character(me->sink, c);              /* normal */
                   1072: }
                   1073: 
1.11      timbl    1074: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10      timbl    1075: {
                   1076:     CONST char * p;
                   1077:     for(p=s; *p; p++) NetToText_put_character(me, *p);
                   1078: }
                   1079: 
1.11      timbl    1080: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10      timbl    1081: {
                   1082:     CONST char * p;
                   1083:     for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
                   1084: }
                   1085: 
                   1086: PRIVATE void NetToText_free ARGS1(HTStream *, me)
                   1087: {
                   1088:     me->sink->isa->free(me->sink);             /* Close rest of pipe */
                   1089:     free(me);
                   1090: }
                   1091: 
                   1092: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
                   1093: {
                   1094:     me->sink->isa->abort(me->sink,e);          /* Abort rest of pipe */
                   1095:     free(me);
                   1096: }
                   1097: 
                   1098: /*     The class structure
                   1099: */
                   1100: PRIVATE HTStreamClass NetToTextClass = {
                   1101:     "NetToText",
                   1102:     NetToText_free,
                   1103:     NetToText_abort,
                   1104:     NetToText_put_character,
                   1105:     NetToText_put_string,
                   1106:     NetToText_put_block
                   1107: };
                   1108: 
                   1109: /*     The creation method
                   1110: */
                   1111: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
                   1112: {
                   1113:     HTStream* me = (HTStream*)malloc(sizeof(*me));
                   1114:     if (me == NULL) outofmem(__FILE__, "NetToText");
                   1115:     me->isa = &NetToTextClass;
                   1116:     
                   1117:     me->had_cr = NO;
                   1118:     me->sink = sink;
                   1119:     return me;
                   1120: }
1.2       timbl    1121: 
                   1122: 

Webmaster