Annotation of libwww/Library/src/HTFormat.c, revision 1.34

1.7       secret      1: 
1.1       timbl       2: /*             Manage different file formats                   HTFormat.c
                      3: **             =============================
                      4: **
                      5: ** Bugs:
                      6: **     Not reentrant.
                      7: **
                      8: **     Assumes the incoming stream is ASCII, rather than a local file
                      9: **     format, and so ALWAYS converts from ASCII on non-ASCII machines.
                     10: **     Therefore, non-ASCII machines can't read local files.
1.2       timbl      11: **
                     12: */
                     13: 
1.10      timbl      14: 
1.2       timbl      15: /* Implements:
1.1       timbl      16: */
1.2       timbl      17: #include "HTFormat.h"
                     18: 
                     19: PUBLIC float HTMaxSecs = 1e10;         /* No effective limit */
                     20: PUBLIC float HTMaxLength = 1e10;       /* No effective limit */
                     21: 
                     22: #ifdef unix
                     23: #ifdef NeXT
                     24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
                     25: #else
                     26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n" 
                     27:        /* Full pathname would be better! */
                     28: #endif
                     29: #endif
                     30: 
1.1       timbl      31: 
                     32: #include "HTUtils.h"
                     33: #include "tcp.h"
                     34: 
                     35: #include "HTML.h"
1.12      timbl      36: #include "HTMLPDTD.h"
1.1       timbl      37: #include "HText.h"
1.2       timbl      38: #include "HTAlert.h"
                     39: #include "HTList.h"
                     40: #include "HTInit.h"
                     41: /*     Streams and structured streams which we use:
                     42: */
                     43: #include "HTFWriter.h"
                     44: #include "HTPlain.h"
                     45: #include "SGML.h"
                     46: #include "HTML.h"
                     47: #include "HTMLGen.h"
1.34    ! luotonen   48: #include "HTGuess.h"
        !            49: 
1.2       timbl      50: 
                     51: PUBLIC BOOL HTOutputSource = NO;       /* Flag: shortcut parser to stdout */
                     52: 
1.10      timbl      53: #ifdef ORIGINAL
1.2       timbl      54: struct _HTStream {
                     55:       CONST HTStreamClass*     isa;
                     56:       /* ... */
                     57: };
1.10      timbl      58: #endif
                     59: 
                     60: /* this version used by the NetToText stream */
                     61: struct _HTStream {
                     62:        CONST HTStreamClass *           isa;
                     63:        BOOL                    had_cr;
                     64:        HTStream *              sink;
                     65: };
1.2       timbl      66: 
                     67: 
1.17      luotonen   68: /*
                     69: ** Accept-Encoding and Accept-Language
                     70: */
                     71: typedef struct _HTAcceptNode {
                     72:     HTAtom *   atom;
                     73:     float      quality;
                     74: } HTAcceptNode;
                     75: 
                     76: 
                     77: 
                     78: 
1.2       timbl      79: /*     Presentation methods
                     80: **     --------------------
                     81: */
                     82: 
1.14      timbl      83: PUBLIC HTList * HTConversions = NULL;
1.2       timbl      84: 
1.31      frystyk    85: /* -------------------------------------------------------------------------
                     86:    This function replaces the code in HTRequest_delete() in order to keep
                     87:    the data structure hidden (it is NOT a joke!)
                     88:    Henrik 14/03-94
                     89:    ------------------------------------------------------------------------- */
                     90: PUBLIC void HTFormatDelete ARGS1(HTList *, me)
                     91: {
                     92:     HTList *cur = me;
                     93:     HTPresentation *pres;
                     94:     if (!me)
                     95:        return;
                     96:     while ((pres = (HTPresentation*) HTList_nextObject(cur))) {
                     97:        FREE(pres->command);                     /* Leak fixed AL 6 Feb 1994 */
                     98:        free(pres);
                     99:     }
                    100:     HTList_delete(me);                          /* Leak fixed AL 6 Feb 1994 */
                    101: }
                    102: 
1.2       timbl     103: 
                    104: /*     Define a presentation system command for a content-type
                    105: **     -------------------------------------------------------
                    106: */
1.12      timbl     107: PUBLIC void HTSetPresentation ARGS6(
                    108:        HTList *,       conversions,
                    109:        CONST char *,   representation,
                    110:        CONST char *,   command,
                    111:        float,          quality,
                    112:        float,          secs, 
                    113:        float,          secs_per_byte
1.2       timbl     114: ){
                    115: 
                    116:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                    117:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                    118:     
                    119:     pres->rep = HTAtom_for(representation);
                    120:     pres->rep_out = WWW_PRESENT;               /* Fixed for now ... :-) */
                    121:     pres->converter = HTSaveAndExecute;                /* Fixed for now ...     */
                    122:     pres->quality = quality;
                    123:     pres->secs = secs;
                    124:     pres->secs_per_byte = secs_per_byte;
                    125:     pres->rep = HTAtom_for(representation);
                    126:     pres->command = 0;
                    127:     StrAllocCopy(pres->command, command);
                    128:     
1.12      timbl     129: /*    if (!HTPresentations) HTPresentations = HTList_new(); */
1.2       timbl     130:     
1.15      luotonen  131: #ifdef OLD_CODE
                    132:     if (strcmp(representation, "*")==0) {
1.2       timbl     133:         if (default_presentation) free(default_presentation);
                    134:        default_presentation = pres;
1.12      timbl     135:     } else 
                    136: #endif
                    137:     HTList_addObject(conversions, pres);
1.2       timbl     138: }
                    139: 
                    140: 
                    141: /*     Define a built-in function for a content-type
                    142: **     ---------------------------------------------
                    143: */
1.12      timbl     144: PUBLIC void HTSetConversion ARGS7(
                    145:        HTList *,       conversions,
                    146:        CONST char *,   representation_in,
                    147:        CONST char *,   representation_out,
1.6       timbl     148:        HTConverter*,   converter,
1.12      timbl     149:        float,          quality,
                    150:        float,          secs, 
                    151:        float,          secs_per_byte
1.2       timbl     152: ){
1.1       timbl     153: 
1.2       timbl     154:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                    155:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                    156:     
                    157:     pres->rep = HTAtom_for(representation_in);
                    158:     pres->rep_out = HTAtom_for(representation_out);
                    159:     pres->converter = converter;
                    160:     pres->command = NULL;              /* Fixed */
                    161:     pres->quality = quality;
                    162:     pres->secs = secs;
                    163:     pres->secs_per_byte = secs_per_byte;
                    164:     pres->command = 0;
                    165:     
1.12      timbl     166: /*    if (!HTPresentations) HTPresentations = HTList_new();  */
1.2       timbl     167:     
1.12      timbl     168: #ifdef OLD_CODE
1.2       timbl     169:     if (strcmp(representation_in, "*")==0) {
                    170:         if (default_presentation) free(default_presentation);
                    171:        default_presentation = pres;
1.12      timbl     172:     } else 
                    173: #endif
                    174:     HTList_addObject(conversions, pres);
1.2       timbl     175: }
1.1       timbl     176: 
                    177: 
                    178: 
1.17      luotonen  179: PUBLIC void HTAcceptEncoding ARGS3(HTList *,   list,
                    180:                                   char *,      enc,
                    181:                                   float,       quality)
                    182: {
                    183:     HTAcceptNode * node;
                    184:     char * cur;
                    185: 
                    186:     if (!list || !enc || !*enc) return;
                    187: 
                    188:     for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
                    189: 
                    190:     node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
                    191:     if (!node) outofmem(__FILE__, "HTAcceptEncoding");
                    192:     HTList_addObject(list, (void*)node);
                    193: 
                    194:     node->atom = HTAtom_for(enc);
                    195:     node->quality = quality;
                    196: }
                    197: 
                    198: 
                    199: PUBLIC void HTAcceptLanguage ARGS3(HTList *,   list,
                    200:                                   char *,      lang,
                    201:                                   float,       quality)
                    202: {
                    203:     HTAcceptNode * node;
                    204: 
                    205:     if (!list || !lang || !*lang) return;
                    206: 
                    207:     node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
                    208:     if (!node) outofmem(__FILE__, "HTAcceptLanguage");
                    209: 
                    210:     HTList_addObject(list, (void*)node);
                    211:     node->atom = HTAtom_for(lang);
                    212:     node->quality = quality;
                    213: }
                    214: 
                    215: 
                    216: PRIVATE BOOL wild_match ARGS2(HTAtom *,        template,
                    217:                              HTAtom *, actual)
                    218: {
                    219:     char *t, *a, *st, *sa;
                    220:     BOOL match = NO;
                    221: 
1.22      luotonen  222:     if (template && actual && (t = HTAtom_name(template))) {
                    223:        if (!strcmp(t, "*"))
                    224:            return YES;
1.17      luotonen  225: 
1.22      luotonen  226:        if (strchr(t, '*') &&
                    227:            (a = HTAtom_name(actual)) &&
                    228:            (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
1.17      luotonen  229: 
1.22      luotonen  230:            *sa = 0;
                    231:            *st = 0;
                    232: 
                    233:            if ((*(st-1)=='*' &&
                    234:                 (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
                    235:                (*(st+1)=='*' && !strcasecomp(t,a)))
                    236:                match = YES;
                    237: 
                    238:            *sa = '/';
                    239:            *st = '/';
                    240:        }    
                    241:     }
1.23      luotonen  242:     return match;
1.17      luotonen  243: }
                    244: 
                    245: 
                    246: PRIVATE float type_value ARGS2(HTAtom *,       content_type,
                    247:                               HTList *,        accepted)
                    248: {
                    249:     HTList * cur = accepted;
                    250:     HTPresentation * pres;
                    251:     HTPresentation * wild = NULL;
                    252: 
                    253:     if (!content_type || !accepted) return -1;
                    254: 
                    255:     while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
                    256:        if (pres->rep == content_type)
                    257:            return pres->quality;
                    258:        else if (wild_match(pres->rep, content_type))
                    259:            wild = pres;
                    260:     }
                    261:     if (wild) return wild->quality;
                    262:     else return -1;
                    263: }
                    264: 
                    265: 
                    266: PRIVATE float lang_value ARGS2(HTAtom *,       language,
                    267:                               HTList *,        accepted)
                    268: {
                    269:     HTList * cur = accepted;
                    270:     HTAcceptNode * node;
                    271:     HTAcceptNode * wild = NULL;
                    272: 
                    273:     if (!language || !accepted || HTList_isEmpty(accepted)) {
                    274:        return 0.1;
                    275:     }
                    276: 
                    277:     while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
                    278:        if (node->atom == language) {
                    279:            return node->quality;
                    280:        }
                    281:        else if (wild_match(node->atom, language)) {
                    282:            wild = node;
                    283:        }
                    284:     }
                    285: 
                    286:     if (wild) {
                    287:        return wild->quality;
                    288:     }
                    289:     else {
                    290:        return 0.1;
                    291:     }
                    292: }
                    293: 
                    294: 
                    295: PRIVATE float encoding_value ARGS2(HTAtom *,   encoding,
                    296:                                   HTList *,    accepted)
                    297: {
                    298:     HTList * cur = accepted;
                    299:     HTAcceptNode * node;
                    300:     HTAcceptNode * wild = NULL;
                    301:     char * e;
                    302: 
                    303:     if (!encoding || !accepted || HTList_isEmpty(accepted))
                    304:        return 1;
                    305: 
                    306:     e = HTAtom_name(encoding);
                    307:     if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
                    308:        return 1;
                    309: 
                    310:     while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
                    311:        if (node->atom == encoding)
                    312:            return node->quality;
                    313:        else if (wild_match(node->atom, encoding))
                    314:            wild = node;
                    315:     }
                    316:     if (wild) return wild->quality;
                    317:     else return 1;
                    318: }
                    319: 
                    320: 
                    321: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
                    322:                         HTList *, accepted_content_types,
                    323:                         HTList *, accepted_languages,
                    324:                         HTList *, accepted_encodings)
                    325: {
                    326:     int accepted_cnt = 0;
                    327:     HTList * accepted;
                    328:     HTList * sorted;
                    329:     HTList * cur;
                    330:     HTContentDescription * d;
                    331: 
                    332:     if (!possibilities) return NO;
                    333: 
                    334:     accepted = HTList_new();
                    335:     cur = possibilities;
                    336:     while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
                    337:        float tv = type_value(d->content_type, accepted_content_types);
                    338:        float lv = lang_value(d->content_language, accepted_languages);
                    339:        float ev = encoding_value(d->content_encoding, accepted_encodings);
                    340: 
                    341:        if (tv > 0) {
                    342:            d->quality *= tv * lv * ev;
                    343:            HTList_addObject(accepted, d);
                    344:            accepted_cnt++;
                    345:        }
1.18      luotonen  346:        else {
                    347:            if (d->filename) free(d->filename);
                    348:            free(d);
                    349:        }
1.17      luotonen  350:     }
                    351: 
1.18      luotonen  352:     CTRACE(stderr, "Ranking.....\n");
1.17      luotonen  353:     CTRACE(stderr,
1.18      luotonen  354:           "\nRANK QUALITY CONTENT-TYPE         LANGUAGE ENCODING    FILE\n");
1.17      luotonen  355: 
                    356:     sorted = HTList_new();
                    357:     while (accepted_cnt-- > 0) {
                    358:        HTContentDescription * worst = NULL;
                    359:        cur = accepted;
                    360:        while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
                    361:            if (!worst || d->quality < worst->quality)
                    362:                worst = d;
                    363:        }
                    364:        if (worst) {
                    365:            CTRACE(stderr, "%d.   %.4f  %-20.20s %-8.8s %-10.10s %s\n",
                    366:                   accepted_cnt+1,
                    367:                   worst->quality,
                    368:                   (worst->content_type
                    369:                         ? HTAtom_name(worst->content_type)      : "-"),
                    370:                   (worst->content_language
                    371:                         ? HTAtom_name(worst->content_language)  :"-"),
                    372:                   (worst->content_encoding
                    373:                         ? HTAtom_name(worst->content_encoding)  :"-"),
                    374:                   (worst->filename
                    375:                         ? worst->filename                       :"-"));
                    376:            HTList_removeObject(accepted, (void*)worst);
                    377:            HTList_addObject(sorted, (void*)worst);
                    378:        }
                    379:     }
1.18      luotonen  380:     CTRACE(stderr, "\n");
1.17      luotonen  381:     HTList_delete(accepted);
                    382:     HTList_delete(possibilities->next);
                    383:     possibilities->next = sorted->next;
                    384:     sorted->next = NULL;
                    385:     HTList_delete(sorted);
                    386: 
                    387:     if (!HTList_isEmpty(possibilities)) return YES;
                    388:     else return NO;
                    389: }
                    390: 
                    391: 
                    392: 
                    393: 
                    394: 
1.13      timbl     395: /*                     Socket Input Buffering
                    396: **                     ----------------------
1.1       timbl     397: **
1.13      timbl     398: **     This code is used because one cannot in general open a
                    399: **     file descriptor for a socket.
                    400: **
1.1       timbl     401: **     The input file is read using the macro which can read from
1.13      timbl     402: **     a socket or a file, but this should not be used for files
                    403: **     as fopen() etc is more portable of course.
                    404: **
1.1       timbl     405: **     The input buffer size, if large will give greater efficiency and
                    406: **     release the server faster, and if small will save space on PCs etc.
                    407: */
                    408: 
                    409: 
                    410: /*     Set up the buffering
                    411: **
                    412: **     These routines are public because they are in fact needed by
                    413: **     many parsers, and on PCs and Macs we should not duplicate
                    414: **     the static buffer area.
                    415: */
1.13      timbl     416: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1       timbl     417: {
1.28      frystyk   418:     HTInputSocket *isoc = (HTInputSocket *)calloc(1, sizeof(*isoc));
1.13      timbl     419:     if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
                    420:     isoc->input_file_number = file_number;
                    421:     isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
                    422:     return isoc;
1.1       timbl     423: }
                    424: 
                    425: 
1.13      timbl     426: PUBLIC char HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1       timbl     427: {
                    428:     char ch;
                    429:     do {
1.13      timbl     430:        if (isoc-> input_pointer >= isoc->input_limit) {
1.1       timbl     431:            int status = NETREAD(
1.13      timbl     432:                   isoc->input_file_number,
                    433:                   isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1       timbl     434:            if (status <= 0) {
                    435:                if (status == 0) return (char)EOF;
                    436:                if (TRACE) fprintf(stderr,
                    437:                    "HTFormat: File read error %d\n", status);
                    438:                return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
                    439:            }
1.13      timbl     440:            isoc-> input_pointer = isoc->input_buffer;
                    441:            isoc->input_limit = isoc->input_buffer + status;
1.1       timbl     442:        }
1.13      timbl     443:        ch = *isoc-> input_pointer++;
1.1       timbl     444:     } while (ch == (char) 13); /* Ignore ASCII carriage return */
                    445:     
                    446:     return FROMASCII(ch);
                    447: }
                    448: 
1.17      luotonen  449: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13      timbl     450: {
                    451:     if (me) free(me);
                    452: }
                    453: 
                    454: 
1.16      luotonen  455: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*,     isoc,
                    456:                                           int *,               len)
                    457: {
                    458:     if (isoc->input_pointer >= isoc->input_limit) {
                    459:        int status = NETREAD(isoc->input_file_number,
                    460:                             isoc->input_buffer,
                    461:                             ((*len < INPUT_BUFFER_SIZE) ?
                    462:                              *len : INPUT_BUFFER_SIZE));
                    463:        if (status <= 0) {
                    464:            isoc->input_limit = isoc->input_buffer;
                    465:            if (status < 0)
                    466:                CTRACE(stderr, "HTInputSocket: File read error %d\n", status);
                    467:            *len = 0;
                    468:            return NULL;
                    469:        }
                    470:        else {
                    471:            *len = status;
                    472:            return isoc->input_buffer;
                    473:        }
                    474:     }
                    475:     else {
                    476:        char * ret = isoc->input_pointer;
                    477:        *len = isoc->input_limit - isoc->input_pointer;
                    478:        isoc->input_pointer = isoc->input_limit;
                    479:        return ret;
                    480:     }
                    481: }
                    482: 
                    483: 
1.15      luotonen  484: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
                    485: {
                    486:     if (isoc) {
                    487:        int status;
                    488: 
                    489:        isoc->input_pointer = isoc->input_buffer;
                    490:        status = NETREAD(isoc->input_file_number,
                    491:                         isoc->input_buffer,
                    492:                         INPUT_BUFFER_SIZE);
                    493:        if (status <= 0) {
                    494:            isoc->input_limit = isoc->input_buffer;
                    495:            if (status < 0)
                    496:                if (TRACE) fprintf(stderr,
                    497:                                   "HTInputSocket: File read error %d\n",
                    498:                                   status);
                    499:        }
                    500:        else 
                    501:            isoc->input_limit = isoc->input_buffer + status;
                    502:        return status;
                    503:     }
                    504:     return -1;
                    505: }
                    506: 
                    507: 
                    508: PRIVATE void ascii_cat ARGS3(char **,  linep,
                    509:                             char *,    start,
                    510:                             char *,    end)
                    511: {
                    512:     if (linep && start && end && start <= end) {
                    513:        char *ptr;
                    514: 
                    515:        if (*linep) {
                    516:            int len = strlen(*linep);
                    517:            *linep = (char*)realloc(*linep, len + end-start + 1);
                    518:            ptr = *linep + len;
                    519:        }
                    520:        else {
                    521:            ptr = *linep = (char*)malloc(end-start + 1);
                    522:        }
                    523: 
                    524:        while (start < end) {
                    525:            *ptr = FROMASCII(*start);
                    526:            ptr++;
                    527:            start++;
                    528:        }
                    529:        *ptr = 0;
                    530:     }
                    531: }
                    532: 
                    533: 
                    534: PRIVATE char * get_some_line ARGS2(HTInputSocket *,    isoc,
                    535:                                   BOOL,                unfold)
                    536: {
                    537:     if (!isoc)
                    538:        return NULL;
                    539:     else {
                    540:        BOOL check_unfold = NO;
                    541:        int prev_cr = 0;
                    542:        char *start = isoc->input_pointer;
                    543:        char *cur = isoc->input_pointer;
                    544:        char * line = NULL;
                    545: 
                    546:        for(;;) {
                    547:            /*
                    548:            ** Get more if needed to complete line
                    549:            */
                    550:            if (cur >= isoc->input_limit) { /* Need more data */
                    551:                ascii_cat(&line, start, cur);
                    552:                if (fill_in_buffer(isoc) <= 0)
                    553:                    return line;
                    554:                start = cur = isoc->input_pointer;
                    555:            } /* if need more data */
                    556: 
                    557:            /*
                    558:            ** Find a line feed if there is one
                    559:            */
                    560:            for(; cur < isoc->input_limit; cur++) {
                    561:                char c = FROMASCII(*cur);
                    562:                if (!c) {
1.18      luotonen  563:                    if (line) free(line);       /* Leak fixed AL 6 Feb 94 */
1.15      luotonen  564:                    return NULL;        /* Panic! read a 0! */
                    565:                }
                    566:                if (check_unfold  &&  c != ' '  &&  c != '\t') {
                    567:                    return line;  /* Note: didn't update isoc->input_pointer */
                    568:                }
                    569:                else {
                    570:                    check_unfold = NO;
                    571:                }
                    572: 
                    573:                if (c=='\r') {
                    574:                    prev_cr = 1;
                    575:                }
                    576:                else {
                    577:                    if (c=='\n') {              /* Found a line feed */
                    578:                        ascii_cat(&line, start, cur-prev_cr);
                    579:                        start = isoc->input_pointer = cur+1;
                    580: 
                    581:                        if (line && strlen(line) > 0 && unfold) {
                    582:                            check_unfold = YES;
                    583:                        }
                    584:                        else {
                    585:                            return line;
                    586:                        }
                    587:                    } /* if NL */
                    588:                    /* else just a regular character */
                    589:                    prev_cr = 0;
                    590:                } /* if not CR */
                    591:            } /* while characters in buffer remain */
                    592:        } /* until line read or end-of-file */
                    593:     } /* valid parameters to function */
                    594: }
                    595: 
                    596: 
                    597: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
                    598: {
                    599:     return get_some_line(isoc, NO);
                    600: }
                    601: 
                    602: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
                    603: {
                    604:     return get_some_line(isoc, YES);
                    605: }
                    606: 
                    607: 
                    608: /*
                    609: ** Read HTTP status line (if there is one).
                    610: **
                    611: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
                    612: ** First look at the stub in ASCII and check if it starts "HTTP/".
                    613: **
                    614: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
                    615: **      will be taken as a HTTP 1.0 server.  Failure.
                    616: */
                    617: #define STUB_LENGTH 20
                    618: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
                    619: {
                    620:     if (!isoc) {
                    621:        return NULL;
                    622:     }
                    623:     else {
                    624:        char buf[STUB_LENGTH + 1];
                    625:        int i;
                    626:        char server_version[STUB_LENGTH+1];
                    627:        int server_status;
                    628: 
                    629:        /*
                    630:        ** Read initial buffer
                    631:        */
                    632:        if (isoc->input_pointer >= isoc->input_limit &&
                    633:            fill_in_buffer(isoc) <= 0) {
                    634:            return NULL;
                    635:         }
                    636: 
                    637:        for (i=0; i < STUB_LENGTH; i++)
                    638:            buf[i] = FROMASCII(isoc->input_buffer[i]);
                    639:        buf[STUB_LENGTH] = 0;
                    640: 
                    641:        if (0 != strncmp(buf, "HTTP/", 5) ||
                    642:            sscanf(buf, "%20s%d", server_version, &server_status) < 2)
                    643:            return NULL;
                    644:        else
                    645:            return get_some_line(isoc, NO);
                    646:     }
                    647: }
                    648: 
                    649: 
                    650: /*
                    651: ** Do heuristic test to see if this is binary.
                    652: **
                    653: ** We check for characters above 128 in the first few bytes, and
                    654: ** if we find them we forget the html default.
                    655: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
                    656: **
                    657: ** Bugs: An HTTP 0.9 server returning a binary document with
                    658: **      characters < 128 will be read as ASCII.
                    659: */
                    660: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
                    661: {
                    662:     if (isoc &&
                    663:        (isoc->input_pointer < isoc->input_limit ||
                    664:         fill_in_buffer(isoc) > 0)) {
                    665:        char *p = isoc->input_buffer;
                    666:        int i = STUB_LENGTH;
                    667: 
                    668:        for( ; i && p < isoc->input_limit; p++, i++)
                    669:            if (((int)*p)&128)
                    670:                return YES;
                    671:     }
                    672:     return NO;
                    673: }
                    674: 
                    675: 
                    676: 
1.1       timbl     677: /*     Stream the data to an ouput file as binary
                    678: */
1.13      timbl     679: PUBLIC int HTOutputBinary ARGS3( HTInputSocket *, isoc,
                    680:                                int,            input,
                    681:                                FILE *,         output)
1.1       timbl     682: {
                    683:     do {
                    684:            int status = NETREAD(
1.13      timbl     685:                    input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1       timbl     686:            if (status <= 0) {
                    687:                if (status == 0) return 0;
                    688:                if (TRACE) fprintf(stderr,
                    689:                    "HTFormat: File read error %d\n", status);
                    690:                return 2;                       /* Error */
                    691:            }
1.13      timbl     692:            fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1       timbl     693:     } while (YES);
                    694: }
                    695: 
                    696: 
1.33      luotonen  697: PRIVATE BOOL better_match ARGS2(HTFormat, f,
                    698:                                HTFormat, g)
                    699: {
                    700:     CONST char *p, *q;
                    701: 
                    702:     if (f && g  &&  (p = HTAtom_name(f))  &&  (q = HTAtom_name(g))) {
                    703:        int i,j;
                    704:        for(i=0 ; *p; p++) if (*p == '*') i++;
                    705:        for(j=0 ; *q; q++) if (*q == '*') j++;
                    706:        if (i < j) return YES;
                    707:     }
                    708:     return NO;
                    709: }
                    710: 
1.17      luotonen  711: 
1.2       timbl     712: /*             Create a filter stack
                    713: **             ---------------------
                    714: **
1.7       secret    715: **     If a wildcard match is made, a temporary HTPresentation
1.2       timbl     716: **     structure is made to hold the destination format while the
                    717: **     new stack is generated. This is just to pass the out format to
                    718: **     MIME so far.  Storing the format of a stream in the stream might
                    719: **     be a lot neater.
1.10      timbl     720: **
1.29      frystyk   721: **     The star/star format is special, in that if you can take
1.10      timbl     722: **     that you can take anything. However, we
1.2       timbl     723: */
1.34    ! luotonen  724: PUBLIC HTStream * HTStreamStack ARGS3(HTFormat,                rep_in,
        !           725:                                      HTRequest *,      request,
        !           726:                                      BOOL,             guess)
1.2       timbl     727: {
1.12      timbl     728:     HTFormat rep_out = request->output_format; /* Could be a param */
1.14      timbl     729:     HTList * conversion[2];
                    730:     int which_list;
1.25      frystyk   731:     float best_quality = -1e30;                /* Pretty bad! */
1.29      frystyk   732:     HTPresentation *pres, *match, *best_match=0;
1.14      timbl     733:     
1.2       timbl     734:     if (TRACE) fprintf(stderr,
                    735:        "HTFormat: Constructing stream stack for %s to %s\n",
1.10      timbl     736:        HTAtom_name(rep_in),    
1.2       timbl     737:        HTAtom_name(rep_out));
1.34    ! luotonen  738: 
        !           739:     if (guess  &&  rep_in == WWW_UNKNOWN) {
        !           740:        CTRACE(stderr, "Returning... guessing stream\n");
        !           741:        return HTGuess_new(request);
        !           742:     }
        !           743: 
1.21      luotonen  744:     if (rep_out == WWW_SOURCE || rep_out == rep_in)
                    745:        return request->output_stream;
1.2       timbl     746: 
1.14      timbl     747:     conversion[0] = request->conversions;
                    748:     conversion[1] = HTConversions;
1.17      luotonen  749: 
1.15      luotonen  750:     for(which_list = 0; which_list<2; which_list++) {
                    751:        HTList * cur = conversion[which_list];
                    752:        
                    753:        while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.25      frystyk   754:            if  ((pres->rep == rep_in || wild_match(pres->rep, rep_in)) &&
1.33      luotonen  755:                 (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
                    756:                if (!best_match ||
                    757:                    better_match(pres->rep, best_match->rep) ||
                    758:                    (!better_match(best_match->rep, pres->rep) &&
                    759:                     pres->quality > best_quality)) {
1.25      frystyk   760:                    best_match = pres;
                    761:                    best_quality = pres->quality;
1.10      timbl     762:                }
                    763:            }
1.33      luotonen  764: 
1.29      frystyk   765: #ifdef OLD_CODE
                    766:            /* This case is now included in the best_match loop */
1.25      frystyk   767:            /* Special case when input format is 'www/source' */ 
1.10      timbl     768:            if (pres->rep == source) {
1.29      frystyk   769:                if (pres->rep_out == rep_out ||
                    770:                    wild_match(pres->rep_out, rep_out))
1.10      timbl     771:                    source_match = pres;
1.2       timbl     772:            }
1.29      frystyk   773: #endif
1.2       timbl     774:        }
                    775:     }
1.33      luotonen  776: 
1.29      frystyk   777:     match = best_match ? best_match : NULL;
                    778:     if (match) {
                    779:        if (match->rep == WWW_SOURCE) {
                    780:            if (TRACE) fprintf(stderr,
                    781:            "HTFormat: Don't know how to handle this, so put out %s to %s\n",
                    782:                               HTAtom_name(match->rep), 
                    783:                               HTAtom_name(rep_out));
                    784:        }
                    785:        return (*match->converter)(
1.25      frystyk   786:        request, match->command, rep_in, rep_out,
                    787:        request->output_stream);
1.29      frystyk   788:     }
1.2       timbl     789:     return NULL;
                    790: }
                    791:        
                    792: 
                    793: /*             Find the cost of a filter stack
                    794: **             -------------------------------
                    795: **
                    796: **     Must return the cost of the same stack which StreamStack would set up.
                    797: **
                    798: ** On entry,
                    799: **     length  The size of the data to be converted
                    800: */
1.12      timbl     801: PUBLIC float HTStackValue ARGS5(
1.14      timbl     802:        HTList *,               theseConversions,
1.10      timbl     803:        HTFormat,               rep_in,
1.2       timbl     804:        HTFormat,               rep_out,
                    805:        float,                  initial_value,
                    806:        long int,               length)
                    807: {
1.14      timbl     808:     int which_list;
                    809:     HTList* conversion[2];
                    810:     
1.2       timbl     811:     if (TRACE) fprintf(stderr,
                    812:        "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10      timbl     813:        HTAtom_name(rep_in),    initial_value,
1.2       timbl     814:        HTAtom_name(rep_out));
                    815:                
                    816:     if (rep_out == WWW_SOURCE ||
1.10      timbl     817:        rep_out == rep_in) return 0.0;
1.2       timbl     818: 
1.12      timbl     819:  /*   if (!HTPresentations) HTFormatInit();     set up the list */
1.2       timbl     820:     
1.14      timbl     821:     conversion[0] = theseConversions;
                    822:     conversion[1] = HTConversions;
                    823:     
                    824:     for(which_list = 0; which_list<2; which_list++)
                    825:      if (conversion[which_list]) {
1.15      luotonen  826:         HTList * cur = conversion[which_list];
1.2       timbl     827:        HTPresentation * pres;
1.15      luotonen  828:        while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
                    829:            if (pres->rep == rep_in &&
1.17      luotonen  830:                (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2       timbl     831:                float value = initial_value * pres->quality;
                    832:                if (HTMaxSecs != 0.0)
1.15      luotonen  833:                    value = value - (length*pres->secs_per_byte + pres->secs)
1.2       timbl     834:                                         /HTMaxSecs;
                    835:                return value;
                    836:            }
                    837:        }
                    838:     }
                    839:     
                    840:     return -1e30;              /* Really bad */
1.17      luotonen  841: }
                    842: 
                    843: 
1.2       timbl     844: 
1.1       timbl     845: 
1.2       timbl     846: /*     Push data from a socket down a stream
                    847: **     -------------------------------------
1.1       timbl     848: **
1.2       timbl     849: **   This routine is responsible for creating and PRESENTING any
1.1       timbl     850: **   graphic (or other) objects described by the file.
1.2       timbl     851: **
                    852: **   The file number given is assumed to be a TELNET stream ie containing
                    853: **   CRLF at the end of lines which need to be stripped to LF for unix
                    854: **   when the format is textual.
                    855: **
1.26      luotonen  856: **   RETURNS the number of bytes transferred.
                    857: **
1.1       timbl     858: */
1.26      luotonen  859: PUBLIC int HTCopy ARGS2(
1.2       timbl     860:        int,                    file_number,
                    861:        HTStream*,              sink)
1.1       timbl     862: {
1.2       timbl     863:     HTStreamClass targetClass;    
1.13      timbl     864:     HTInputSocket * isoc;
1.26      luotonen  865:     int cnt = 0;
                    866: 
1.5       timbl     867: /*     Push the data down the stream
1.2       timbl     868: **
                    869: */
                    870:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
1.13      timbl     871:     isoc = HTInputSocket_new(file_number);
1.2       timbl     872:     
                    873:     /* Push binary from socket down sink
1.10      timbl     874:     **
                    875:     **         This operation could be put into a main event loop
1.2       timbl     876:     */
                    877:     for(;;) {
                    878:        int status = NETREAD(
1.13      timbl     879:                file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2       timbl     880:        if (status <= 0) {
                    881:            if (status == 0) break;
                    882:            if (TRACE) fprintf(stderr,
1.24      luotonen  883:                "HTFormat: Read error, read returns %d with errno=%d\n",
                    884:                status, errno);
1.2       timbl     885:            break;
                    886:        }
1.26      luotonen  887: 
1.8       timbl     888: #ifdef NOT_ASCII
                    889:        {
                    890:            char * p;
1.13      timbl     891:            for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8       timbl     892:                *p = FROMASCII(*p);
                    893:            }
                    894:        }
                    895: #endif
                    896: 
1.13      timbl     897:        (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.26      luotonen  898:        cnt += status;
1.2       timbl     899:     } /* next bufferload */
1.26      luotonen  900: 
1.13      timbl     901:     HTInputSocket_free(isoc);
1.26      luotonen  902: 
                    903:     return cnt;
1.2       timbl     904: }
                    905: 
1.1       timbl     906: 
1.7       secret    907: 
                    908: /*     Push data from a file pointer down a stream
                    909: **     -------------------------------------
                    910: **
                    911: **   This routine is responsible for creating and PRESENTING any
                    912: **   graphic (or other) objects described by the file.
                    913: **
                    914: **
                    915: */
                    916: PUBLIC void HTFileCopy ARGS2(
                    917:        FILE *,                 fp,
                    918:        HTStream*,              sink)
                    919: {
                    920:     HTStreamClass targetClass;    
1.13      timbl     921:     char input_buffer[INPUT_BUFFER_SIZE];
1.7       secret    922:     
                    923: /*     Push the data down the stream
                    924: **
                    925: */
                    926:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    927:     
                    928:     /* Push binary from socket down sink
                    929:     */
                    930:     for(;;) {
                    931:        int status = fread(
                    932:               input_buffer, 1, INPUT_BUFFER_SIZE, fp);
                    933:        if (status == 0) { /* EOF or error */
                    934:            if (ferror(fp) == 0) break;
                    935:            if (TRACE) fprintf(stderr,
                    936:                "HTFormat: Read error, read returns %d\n", ferror(fp));
                    937:            break;
                    938:        }
                    939:        (*targetClass.put_block)(sink, input_buffer, status);
1.13      timbl     940:     } /* next bufferload */    
1.7       secret    941: }
                    942: 
                    943: 
                    944: 
                    945: 
1.2       timbl     946: /*     Push data from a socket down a stream STRIPPING CR
                    947: **     --------------------------------------------------
                    948: **
                    949: **   This routine is responsible for creating and PRESENTING any
1.8       timbl     950: **   graphic (or other) objects described by the socket.
1.2       timbl     951: **
                    952: **   The file number given is assumed to be a TELNET stream ie containing
                    953: **   CRLF at the end of lines which need to be stripped to LF for unix
                    954: **   when the format is textual.
                    955: **
1.1       timbl     956: */
1.2       timbl     957: PUBLIC void HTCopyNoCR ARGS2(
                    958:        int,                    file_number,
                    959:        HTStream*,              sink)
                    960: {
1.13      timbl     961:     HTStreamClass targetClass;
                    962:     HTInputSocket * isoc;   
1.1       timbl     963:     
1.2       timbl     964: /*     Push the data, ignoring CRLF, down the stream
                    965: **
                    966: */
                    967:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    968: 
                    969: /*     Push text from telnet socket down sink
                    970: **
                    971: **     @@@@@ To push strings could be faster? (especially is we
                    972: **     cheat and don't ignore CR! :-}
                    973: */  
1.13      timbl     974:     isoc = HTInputSocket_new(file_number);
1.2       timbl     975:     for(;;) {
                    976:        char character;
1.13      timbl     977:        character = HTInputSocket_getCharacter(isoc);
1.2       timbl     978:        if (character == (char)EOF) break;
                    979:        (*targetClass.put_character)(sink, character);           
                    980:     }
1.13      timbl     981:     HTInputSocket_free(isoc);
1.2       timbl     982: }
1.1       timbl     983: 
1.2       timbl     984: 
1.7       secret    985: 
1.2       timbl     986: /*     Parse a socket given format and file number
                    987: **
                    988: **   This routine is responsible for creating and PRESENTING any
                    989: **   graphic (or other) objects described by the file.
                    990: **
                    991: **   The file number given is assumed to be a TELNET stream ie containing
                    992: **   CRLF at the end of lines which need to be stripped to LF for unix
                    993: **   when the format is textual.
                    994: **
                    995: */
1.14      timbl     996: 
1.12      timbl     997: PUBLIC int HTParseSocket ARGS3(
1.10      timbl     998:        HTFormat,               rep_in,
1.2       timbl     999:        int,                    file_number,
1.12      timbl    1000:        HTRequest *,            request)
1.2       timbl    1001: {
                   1002:     HTStream * stream;
                   1003:     HTStreamClass targetClass;    
1.1       timbl    1004: 
1.34    ! luotonen 1005:     stream = HTStreamStack(rep_in, request, YES);
1.29      frystyk  1006: 
1.2       timbl    1007:     if (!stream) {
1.30      frystyk  1008:        char buffer[1024];      /* @@@@@@@@ */
1.2       timbl    1009:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12      timbl    1010:                HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.30      frystyk  1011:        if (TRACE) fprintf(stderr, "HTFormat(in HTParseSocket): %s\n", buffer);
1.16      luotonen 1012:         return HTLoadError(request, 501, buffer);
1.2       timbl    1013:     }
1.1       timbl    1014:     
1.3       timbl    1015: /*     Push the data, ignoring CRLF if necessary, down the stream
                   1016: **
1.2       timbl    1017: **
1.3       timbl    1018: **   @@  Bug:  This decision ought to be made based on "encoding"
1.9       timbl    1019: **   rather than on format.  @@@  When we handle encoding.
1.3       timbl    1020: **   The current method smells anyway.
1.2       timbl    1021: */
                   1022:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
1.32      luotonen 1023:     if (rep_in == WWW_BINARY || rep_in == WWW_UNKNOWN || HTOutputSource
1.26      luotonen 1024:        || (request->content_encoding &&
                   1025:            request->content_encoding != HTAtom_for("8bit") &&
                   1026:            request->content_encoding != HTAtom_for("7bit"))
1.10      timbl    1027:         || strstr(HTAtom_name(rep_in), "image/")
                   1028:        || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.29      frystyk  1029:        HTCopy(file_number, stream);
1.2       timbl    1030:     } else {   /* ascii text with CRLFs :-( */
                   1031:         HTCopyNoCR(file_number, stream);
                   1032:     }
1.7       secret   1033:     (*targetClass.free)(stream);
                   1034:     
                   1035:     return HT_LOADED;
                   1036: }
                   1037: 
                   1038: 
                   1039: 
                   1040: /*     Parse a file given format and file pointer
                   1041: **
                   1042: **   This routine is responsible for creating and PRESENTING any
                   1043: **   graphic (or other) objects described by the file.
                   1044: **
                   1045: **   The file number given is assumed to be a TELNET stream ie containing
1.10      timbl    1046: **   CRLF at the end of lines which need to be stripped to \n for unix
1.7       secret   1047: **   when the format is textual.
                   1048: **
                   1049: */
1.12      timbl    1050: PUBLIC int HTParseFile ARGS3(
1.10      timbl    1051:        HTFormat,               rep_in,
1.7       secret   1052:        FILE *,                 fp,
1.12      timbl    1053:        HTRequest *,            request)
1.7       secret   1054: {
                   1055:     HTStream * stream;
                   1056:     HTStreamClass targetClass;    
                   1057: 
1.34    ! luotonen 1058:     stream = HTStreamStack(rep_in, request, YES);
1.7       secret   1059:     
                   1060:     if (!stream) {
1.30      frystyk  1061:        char buffer[1024];      /* @@@@@@@@ */
1.7       secret   1062:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12      timbl    1063:                HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.7       secret   1064:        if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
1.29      frystyk  1065:        return HTLoadError(request, 501, buffer);
1.7       secret   1066:     }
                   1067:     
1.9       timbl    1068: /*     Push the data down the stream
1.7       secret   1069: **
                   1070: **
                   1071: **   @@  Bug:  This decision ought to be made based on "encoding"
1.10      timbl    1072: **   rather than on content-type.  @@@  When we handle encoding.
1.7       secret   1073: **   The current method smells anyway.
                   1074: */
                   1075:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
                   1076:     HTFileCopy(fp, stream);
1.2       timbl    1077:     (*targetClass.free)(stream);
1.1       timbl    1078:     
1.2       timbl    1079:     return HT_LOADED;
1.1       timbl    1080: }
1.2       timbl    1081: 
1.10      timbl    1082: 
                   1083: /*     Converter stream: Network Telnet to internal character text
                   1084: **     -----------------------------------------------------------
                   1085: **
                   1086: **     The input is assumed to be in ASCII, with lines delimited
                   1087: **     by (13,10) pairs, These pairs are converted into (CR,LF)
                   1088: **     pairs in the local representation.  The (CR,LF) sequence
                   1089: **     when found is changed to a '\n' character, the internal
                   1090: **     C representation of a new line.
                   1091: */
                   1092: 
                   1093: 
1.11      timbl    1094: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10      timbl    1095: {
                   1096:     char c = FROMASCII(net_char);
                   1097:     if (me->had_cr) {
                   1098:         if (c==LF) {
                   1099:            me->sink->isa->put_character(me->sink, '\n');       /* Newline */
                   1100:            me->had_cr = NO;
                   1101:            return;
                   1102:         } else {
                   1103:            me->sink->isa->put_character(me->sink, CR); /* leftover */
                   1104:        }
                   1105:     }
                   1106:     me->had_cr = (c==CR);
                   1107:     if (!me->had_cr)
                   1108:        me->sink->isa->put_character(me->sink, c);              /* normal */
                   1109: }
                   1110: 
1.11      timbl    1111: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10      timbl    1112: {
                   1113:     CONST char * p;
                   1114:     for(p=s; *p; p++) NetToText_put_character(me, *p);
                   1115: }
                   1116: 
1.11      timbl    1117: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10      timbl    1118: {
                   1119:     CONST char * p;
                   1120:     for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
                   1121: }
                   1122: 
                   1123: PRIVATE void NetToText_free ARGS1(HTStream *, me)
                   1124: {
                   1125:     me->sink->isa->free(me->sink);             /* Close rest of pipe */
                   1126:     free(me);
                   1127: }
                   1128: 
                   1129: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
                   1130: {
                   1131:     me->sink->isa->abort(me->sink,e);          /* Abort rest of pipe */
                   1132:     free(me);
                   1133: }
                   1134: 
                   1135: /*     The class structure
                   1136: */
                   1137: PRIVATE HTStreamClass NetToTextClass = {
                   1138:     "NetToText",
                   1139:     NetToText_free,
                   1140:     NetToText_abort,
                   1141:     NetToText_put_character,
                   1142:     NetToText_put_string,
                   1143:     NetToText_put_block
                   1144: };
                   1145: 
                   1146: /*     The creation method
                   1147: */
                   1148: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
                   1149: {
                   1150:     HTStream* me = (HTStream*)malloc(sizeof(*me));
                   1151:     if (me == NULL) outofmem(__FILE__, "NetToText");
                   1152:     me->isa = &NetToTextClass;
                   1153:     
                   1154:     me->had_cr = NO;
                   1155:     me->sink = sink;
                   1156:     return me;
                   1157: }
1.2       timbl    1158: 
                   1159: 

Webmaster