Annotation of libwww/Library/src/HTFormat.c, revision 1.17

1.7       secret      1: 
1.1       timbl       2: /*             Manage different file formats                   HTFormat.c
                      3: **             =============================
                      4: **
                      5: ** Bugs:
                      6: **     Not reentrant.
                      7: **
                      8: **     Assumes the incoming stream is ASCII, rather than a local file
                      9: **     format, and so ALWAYS converts from ASCII on non-ASCII machines.
                     10: **     Therefore, non-ASCII machines can't read local files.
1.2       timbl      11: **
                     12: */
                     13: 
1.10      timbl      14: 
1.2       timbl      15: /* Implements:
1.1       timbl      16: */
1.2       timbl      17: #include "HTFormat.h"
                     18: 
                     19: PUBLIC float HTMaxSecs = 1e10;         /* No effective limit */
                     20: PUBLIC float HTMaxLength = 1e10;       /* No effective limit */
                     21: 
                     22: #ifdef unix
                     23: #ifdef NeXT
                     24: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
                     25: #else
                     26: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n" 
                     27:        /* Full pathname would be better! */
                     28: #endif
                     29: #endif
                     30: 
1.1       timbl      31: 
                     32: #include "HTUtils.h"
                     33: #include "tcp.h"
                     34: 
                     35: #include "HTML.h"
1.12      timbl      36: #include "HTMLPDTD.h"
1.1       timbl      37: #include "HText.h"
1.2       timbl      38: #include "HTAlert.h"
                     39: #include "HTList.h"
                     40: #include "HTInit.h"
                     41: /*     Streams and structured streams which we use:
                     42: */
                     43: #include "HTFWriter.h"
                     44: #include "HTPlain.h"
                     45: #include "SGML.h"
                     46: #include "HTML.h"
                     47: #include "HTMLGen.h"
                     48: 
                     49: PUBLIC BOOL HTOutputSource = NO;       /* Flag: shortcut parser to stdout */
                     50: extern  BOOL interactive;
                     51: 
1.10      timbl      52: #ifdef ORIGINAL
1.2       timbl      53: struct _HTStream {
                     54:       CONST HTStreamClass*     isa;
                     55:       /* ... */
                     56: };
1.10      timbl      57: #endif
                     58: 
                     59: /* this version used by the NetToText stream */
                     60: struct _HTStream {
                     61:        CONST HTStreamClass *           isa;
                     62:        BOOL                    had_cr;
                     63:        HTStream *              sink;
                     64: };
1.2       timbl      65: 
                     66: 
1.17    ! luotonen   67: /*
        !            68: ** Accept-Encoding and Accept-Language
        !            69: */
        !            70: typedef struct _HTAcceptNode {
        !            71:     HTAtom *   atom;
        !            72:     float      quality;
        !            73: } HTAcceptNode;
        !            74: 
        !            75: 
        !            76: 
        !            77: 
1.2       timbl      78: /*     Presentation methods
                     79: **     --------------------
                     80: */
                     81: 
1.14      timbl      82: PUBLIC HTList * HTConversions = NULL;
1.2       timbl      83: 
                     84: 
                     85: /*     Define a presentation system command for a content-type
                     86: **     -------------------------------------------------------
                     87: */
1.12      timbl      88: PUBLIC void HTSetPresentation ARGS6(
                     89:        HTList *,       conversions,
                     90:        CONST char *,   representation,
                     91:        CONST char *,   command,
                     92:        float,          quality,
                     93:        float,          secs, 
                     94:        float,          secs_per_byte
1.2       timbl      95: ){
                     96: 
                     97:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                     98:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                     99:     
                    100:     pres->rep = HTAtom_for(representation);
                    101:     pres->rep_out = WWW_PRESENT;               /* Fixed for now ... :-) */
                    102:     pres->converter = HTSaveAndExecute;                /* Fixed for now ...     */
                    103:     pres->quality = quality;
                    104:     pres->secs = secs;
                    105:     pres->secs_per_byte = secs_per_byte;
                    106:     pres->rep = HTAtom_for(representation);
                    107:     pres->command = 0;
                    108:     StrAllocCopy(pres->command, command);
                    109:     
1.12      timbl     110: /*    if (!HTPresentations) HTPresentations = HTList_new(); */
1.2       timbl     111:     
1.15      luotonen  112: #ifdef OLD_CODE
                    113:     if (strcmp(representation, "*")==0) {
1.2       timbl     114:         if (default_presentation) free(default_presentation);
                    115:        default_presentation = pres;
1.12      timbl     116:     } else 
                    117: #endif
                    118:     HTList_addObject(conversions, pres);
1.2       timbl     119: }
                    120: 
                    121: 
                    122: /*     Define a built-in function for a content-type
                    123: **     ---------------------------------------------
                    124: */
1.12      timbl     125: PUBLIC void HTSetConversion ARGS7(
                    126:        HTList *,       conversions,
                    127:        CONST char *,   representation_in,
                    128:        CONST char *,   representation_out,
1.6       timbl     129:        HTConverter*,   converter,
1.12      timbl     130:        float,          quality,
                    131:        float,          secs, 
                    132:        float,          secs_per_byte
1.2       timbl     133: ){
1.1       timbl     134: 
1.2       timbl     135:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                    136:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                    137:     
                    138:     pres->rep = HTAtom_for(representation_in);
                    139:     pres->rep_out = HTAtom_for(representation_out);
                    140:     pres->converter = converter;
                    141:     pres->command = NULL;              /* Fixed */
                    142:     pres->quality = quality;
                    143:     pres->secs = secs;
                    144:     pres->secs_per_byte = secs_per_byte;
                    145:     pres->command = 0;
                    146:     
1.12      timbl     147: /*    if (!HTPresentations) HTPresentations = HTList_new();  */
1.2       timbl     148:     
1.12      timbl     149: #ifdef OLD_CODE
1.2       timbl     150:     if (strcmp(representation_in, "*")==0) {
                    151:         if (default_presentation) free(default_presentation);
                    152:        default_presentation = pres;
1.12      timbl     153:     } else 
                    154: #endif
                    155:     HTList_addObject(conversions, pres);
1.2       timbl     156: }
1.1       timbl     157: 
                    158: 
                    159: 
1.17    ! luotonen  160: PUBLIC void HTAcceptEncoding ARGS3(HTList *,   list,
        !           161:                                   char *,      enc,
        !           162:                                   float,       quality)
        !           163: {
        !           164:     HTAcceptNode * node;
        !           165:     char * cur;
        !           166: 
        !           167:     if (!list || !enc || !*enc) return;
        !           168: 
        !           169:     for(cur=enc; *cur; cur++) *cur=TOLOWER(*cur);
        !           170: 
        !           171:     node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
        !           172:     if (!node) outofmem(__FILE__, "HTAcceptEncoding");
        !           173:     HTList_addObject(list, (void*)node);
        !           174: 
        !           175:     node->atom = HTAtom_for(enc);
        !           176:     node->quality = quality;
        !           177: }
        !           178: 
        !           179: 
        !           180: PUBLIC void HTAcceptLanguage ARGS3(HTList *,   list,
        !           181:                                   char *,      lang,
        !           182:                                   float,       quality)
        !           183: {
        !           184:     HTAcceptNode * node;
        !           185: 
        !           186:     if (!list || !lang || !*lang) return;
        !           187: 
        !           188:     node = (HTAcceptNode*)calloc(1, sizeof(HTAcceptNode));
        !           189:     if (!node) outofmem(__FILE__, "HTAcceptLanguage");
        !           190: 
        !           191:     HTList_addObject(list, (void*)node);
        !           192:     node->atom = HTAtom_for(lang);
        !           193:     node->quality = quality;
        !           194: }
        !           195: 
        !           196: 
        !           197: PRIVATE BOOL wild_match ARGS2(HTAtom *,        template,
        !           198:                              HTAtom *, actual)
        !           199: {
        !           200:     char *t, *a, *st, *sa;
        !           201:     BOOL match = NO;
        !           202: 
        !           203:     if (template && actual &&
        !           204:        (t = HTAtom_name(template)) && strchr(t, '*') &&
        !           205:        (a = HTAtom_name(actual)) &&
        !           206:        (st = strchr(t, '/')) && (sa = strchr(a,'/'))) {
        !           207: 
        !           208:        *sa = 0;
        !           209:        *st = 0;
        !           210: 
        !           211:        if ((*(st-1)=='*' &&
        !           212:             (*(st+1)=='*' || !strcasecomp(st+1, sa+1))) ||
        !           213:            (*(st+1)=='*' && !strcasecomp(t,a)))
        !           214:            match = YES;
        !           215: 
        !           216:        *sa = '/';
        !           217:        *st = '/';
        !           218:     }    
        !           219:     return match;
        !           220: }
        !           221: 
        !           222: 
        !           223: PRIVATE float type_value ARGS2(HTAtom *,       content_type,
        !           224:                               HTList *,        accepted)
        !           225: {
        !           226:     HTList * cur = accepted;
        !           227:     HTPresentation * pres;
        !           228:     HTPresentation * wild = NULL;
        !           229: 
        !           230:     if (!content_type || !accepted) return -1;
        !           231: 
        !           232:     while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
        !           233:        if (pres->rep == content_type)
        !           234:            return pres->quality;
        !           235:        else if (wild_match(pres->rep, content_type))
        !           236:            wild = pres;
        !           237:     }
        !           238:     if (wild) return wild->quality;
        !           239:     else return -1;
        !           240: }
        !           241: 
        !           242: 
        !           243: PRIVATE float lang_value ARGS2(HTAtom *,       language,
        !           244:                               HTList *,        accepted)
        !           245: {
        !           246:     HTList * cur = accepted;
        !           247:     HTAcceptNode * node;
        !           248:     HTAcceptNode * wild = NULL;
        !           249: 
        !           250:     if (!language || !accepted || HTList_isEmpty(accepted)) {
        !           251:        return 0.1;
        !           252:     }
        !           253: 
        !           254:     while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
        !           255:        if (node->atom == language) {
        !           256:            return node->quality;
        !           257:        }
        !           258:        else if (wild_match(node->atom, language)) {
        !           259:            wild = node;
        !           260:        }
        !           261:     }
        !           262: 
        !           263:     if (wild) {
        !           264:        return wild->quality;
        !           265:     }
        !           266:     else {
        !           267:        return 0.1;
        !           268:     }
        !           269: }
        !           270: 
        !           271: 
        !           272: PRIVATE float encoding_value ARGS2(HTAtom *,   encoding,
        !           273:                                   HTList *,    accepted)
        !           274: {
        !           275:     HTList * cur = accepted;
        !           276:     HTAcceptNode * node;
        !           277:     HTAcceptNode * wild = NULL;
        !           278:     char * e;
        !           279: 
        !           280:     if (!encoding || !accepted || HTList_isEmpty(accepted))
        !           281:        return 1;
        !           282: 
        !           283:     e = HTAtom_name(encoding);
        !           284:     if (!strcmp(e, "7bit") || !strcmp(e, "8bit") || !strcmp(e, "binary"))
        !           285:        return 1;
        !           286: 
        !           287:     while ((node = (HTAcceptNode*)HTList_nextObject(cur))) {
        !           288:        if (node->atom == encoding)
        !           289:            return node->quality;
        !           290:        else if (wild_match(node->atom, encoding))
        !           291:            wild = node;
        !           292:     }
        !           293:     if (wild) return wild->quality;
        !           294:     else return 1;
        !           295: }
        !           296: 
        !           297: 
        !           298: PUBLIC BOOL HTRank ARGS4(HTList *, possibilities,
        !           299:                         HTList *, accepted_content_types,
        !           300:                         HTList *, accepted_languages,
        !           301:                         HTList *, accepted_encodings)
        !           302: {
        !           303:     int accepted_cnt = 0;
        !           304:     HTList * accepted;
        !           305:     HTList * sorted;
        !           306:     HTList * cur;
        !           307:     HTContentDescription * d;
        !           308: 
        !           309:     if (!possibilities) return NO;
        !           310: 
        !           311:     accepted = HTList_new();
        !           312:     cur = possibilities;
        !           313:     while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
        !           314:        float tv = type_value(d->content_type, accepted_content_types);
        !           315:        float lv = lang_value(d->content_language, accepted_languages);
        !           316:        float ev = encoding_value(d->content_encoding, accepted_encodings);
        !           317: 
        !           318: #ifdef ARI_DEBUG
        !           319:        CTRACE(stderr,
        !           320:               " ## FOR FILE \"%s\" (%.3f) VALUES type %.3f enc %.3f lang %.3f\n",
        !           321:               d->filename, d->quality, tv, ev, lv);
        !           322: #endif
        !           323: 
        !           324:        if (tv > 0) {
        !           325:            d->quality *= tv * lv * ev;
        !           326:            HTList_addObject(accepted, d);
        !           327:            accepted_cnt++;
        !           328:        }
        !           329:     }
        !           330: 
        !           331:     CTRACE(stderr,
        !           332:           "RANK QUALITY CONTENT-TYPE         LANGUAGE ENCODING    FILE\n");
        !           333: 
        !           334:     sorted = HTList_new();
        !           335:     while (accepted_cnt-- > 0) {
        !           336:        HTContentDescription * worst = NULL;
        !           337:        cur = accepted;
        !           338:        while ((d = (HTContentDescription*)HTList_nextObject(cur))) {
        !           339:            if (!worst || d->quality < worst->quality)
        !           340:                worst = d;
        !           341:        }
        !           342:        if (worst) {
        !           343:            CTRACE(stderr, "%d.   %.4f  %-20.20s %-8.8s %-10.10s %s\n",
        !           344:                   accepted_cnt+1,
        !           345:                   worst->quality,
        !           346:                   (worst->content_type
        !           347:                         ? HTAtom_name(worst->content_type)      : "-"),
        !           348:                   (worst->content_language
        !           349:                         ? HTAtom_name(worst->content_language)  :"-"),
        !           350:                   (worst->content_encoding
        !           351:                         ? HTAtom_name(worst->content_encoding)  :"-"),
        !           352:                   (worst->filename
        !           353:                         ? worst->filename                       :"-"));
        !           354:            HTList_removeObject(accepted, (void*)worst);
        !           355:            HTList_addObject(sorted, (void*)worst);
        !           356:        }
        !           357:     }
        !           358:     HTList_delete(accepted);
        !           359:     HTList_delete(possibilities->next);
        !           360:     possibilities->next = sorted->next;
        !           361:     sorted->next = NULL;
        !           362:     HTList_delete(sorted);
        !           363: 
        !           364:     if (!HTList_isEmpty(possibilities)) return YES;
        !           365:     else return NO;
        !           366: }
        !           367: 
        !           368: 
        !           369: 
        !           370: 
        !           371: 
1.13      timbl     372: /*                     Socket Input Buffering
                    373: **                     ----------------------
1.1       timbl     374: **
1.13      timbl     375: **     This code is used because one cannot in general open a
                    376: **     file descriptor for a socket.
                    377: **
1.1       timbl     378: **     The input file is read using the macro which can read from
1.13      timbl     379: **     a socket or a file, but this should not be used for files
                    380: **     as fopen() etc is more portable of course.
                    381: **
1.1       timbl     382: **     The input buffer size, if large will give greater efficiency and
                    383: **     release the server faster, and if small will save space on PCs etc.
                    384: */
                    385: 
                    386: 
                    387: /*     Set up the buffering
                    388: **
                    389: **     These routines are public because they are in fact needed by
                    390: **     many parsers, and on PCs and Macs we should not duplicate
                    391: **     the static buffer area.
                    392: */
1.13      timbl     393: PUBLIC HTInputSocket * HTInputSocket_new ARGS1 (int,file_number)
1.1       timbl     394: {
1.13      timbl     395:     HTInputSocket *isoc = (HTInputSocket *)malloc(sizeof(*isoc));
                    396:     if (!isoc) outofmem(__FILE__, "HTInputSocket_new");
                    397:     isoc->input_file_number = file_number;
                    398:     isoc->input_pointer = isoc->input_limit = isoc->input_buffer;
                    399:     return isoc;
1.1       timbl     400: }
                    401: 
                    402: 
1.13      timbl     403: PUBLIC char HTInputSocket_getCharacter ARGS1(HTInputSocket*, isoc)
1.1       timbl     404: {
                    405:     char ch;
                    406:     do {
1.13      timbl     407:        if (isoc-> input_pointer >= isoc->input_limit) {
1.1       timbl     408:            int status = NETREAD(
1.13      timbl     409:                   isoc->input_file_number,
                    410:                   isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1       timbl     411:            if (status <= 0) {
                    412:                if (status == 0) return (char)EOF;
                    413:                if (TRACE) fprintf(stderr,
                    414:                    "HTFormat: File read error %d\n", status);
                    415:                return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
                    416:            }
1.13      timbl     417:            isoc-> input_pointer = isoc->input_buffer;
                    418:            isoc->input_limit = isoc->input_buffer + status;
1.1       timbl     419:        }
1.13      timbl     420:        ch = *isoc-> input_pointer++;
1.1       timbl     421:     } while (ch == (char) 13); /* Ignore ASCII carriage return */
                    422:     
                    423:     return FROMASCII(ch);
                    424: }
                    425: 
1.17    ! luotonen  426: PUBLIC void HTInputSocket_free ARGS1(HTInputSocket *, me)
1.13      timbl     427: {
                    428:     if (me) free(me);
                    429: }
                    430: 
                    431: 
1.16      luotonen  432: PUBLIC char * HTInputSocket_getBlock ARGS2(HTInputSocket*,     isoc,
                    433:                                           int *,               len)
                    434: {
                    435:     if (isoc->input_pointer >= isoc->input_limit) {
                    436:        int status = NETREAD(isoc->input_file_number,
                    437:                             isoc->input_buffer,
                    438:                             ((*len < INPUT_BUFFER_SIZE) ?
                    439:                              *len : INPUT_BUFFER_SIZE));
                    440:        if (status <= 0) {
                    441:            isoc->input_limit = isoc->input_buffer;
                    442:            if (status < 0)
                    443:                CTRACE(stderr, "HTInputSocket: File read error %d\n", status);
                    444:            *len = 0;
                    445:            return NULL;
                    446:        }
                    447:        else {
                    448:            *len = status;
                    449:            return isoc->input_buffer;
                    450:        }
                    451:     }
                    452:     else {
                    453:        char * ret = isoc->input_pointer;
                    454:        *len = isoc->input_limit - isoc->input_pointer;
                    455:        isoc->input_pointer = isoc->input_limit;
                    456:        return ret;
                    457:     }
                    458: }
                    459: 
                    460: 
1.15      luotonen  461: PRIVATE int fill_in_buffer ARGS1(HTInputSocket *, isoc)
                    462: {
                    463:     if (isoc) {
                    464:        int status;
                    465: 
                    466:        isoc->input_pointer = isoc->input_buffer;
                    467:        status = NETREAD(isoc->input_file_number,
                    468:                         isoc->input_buffer,
                    469:                         INPUT_BUFFER_SIZE);
                    470:        if (status <= 0) {
                    471:            isoc->input_limit = isoc->input_buffer;
                    472:            if (status < 0)
                    473:                if (TRACE) fprintf(stderr,
                    474:                                   "HTInputSocket: File read error %d\n",
                    475:                                   status);
                    476:        }
                    477:        else 
                    478:            isoc->input_limit = isoc->input_buffer + status;
                    479:        return status;
                    480:     }
                    481:     return -1;
                    482: }
                    483: 
                    484: 
                    485: PRIVATE void ascii_cat ARGS3(char **,  linep,
                    486:                             char *,    start,
                    487:                             char *,    end)
                    488: {
                    489:     if (linep && start && end && start <= end) {
                    490:        char *ptr;
                    491: 
                    492:        if (*linep) {
                    493:            int len = strlen(*linep);
                    494:            *linep = (char*)realloc(*linep, len + end-start + 1);
                    495:            ptr = *linep + len;
                    496:        }
                    497:        else {
                    498:            ptr = *linep = (char*)malloc(end-start + 1);
                    499:        }
                    500: 
                    501:        while (start < end) {
                    502:            *ptr = FROMASCII(*start);
                    503:            ptr++;
                    504:            start++;
                    505:        }
                    506:        *ptr = 0;
                    507:     }
                    508: }
                    509: 
                    510: 
                    511: PRIVATE char * get_some_line ARGS2(HTInputSocket *,    isoc,
                    512:                                   BOOL,                unfold)
                    513: {
                    514:     if (!isoc)
                    515:        return NULL;
                    516:     else {
                    517:        BOOL check_unfold = NO;
                    518:        int prev_cr = 0;
                    519:        char *start = isoc->input_pointer;
                    520:        char *cur = isoc->input_pointer;
                    521:        char * line = NULL;
                    522: 
                    523:        for(;;) {
                    524:            /*
                    525:            ** Get more if needed to complete line
                    526:            */
                    527:            if (cur >= isoc->input_limit) { /* Need more data */
                    528:                ascii_cat(&line, start, cur);
                    529:                if (fill_in_buffer(isoc) <= 0)
                    530:                    return line;
                    531:                start = cur = isoc->input_pointer;
                    532:            } /* if need more data */
                    533: 
                    534:            /*
                    535:            ** Find a line feed if there is one
                    536:            */
                    537:            for(; cur < isoc->input_limit; cur++) {
                    538:                char c = FROMASCII(*cur);
                    539:                if (!c) {
                    540:                    return NULL;        /* Panic! read a 0! */
                    541:                }
                    542:                if (check_unfold  &&  c != ' '  &&  c != '\t') {
                    543:                    return line;  /* Note: didn't update isoc->input_pointer */
                    544:                }
                    545:                else {
                    546:                    check_unfold = NO;
                    547:                }
                    548: 
                    549:                if (c=='\r') {
                    550:                    prev_cr = 1;
                    551:                }
                    552:                else {
                    553:                    if (c=='\n') {              /* Found a line feed */
                    554:                        ascii_cat(&line, start, cur-prev_cr);
                    555:                        start = isoc->input_pointer = cur+1;
                    556: 
                    557:                        if (line && strlen(line) > 0 && unfold) {
                    558:                            check_unfold = YES;
                    559:                        }
                    560:                        else {
                    561:                            return line;
                    562:                        }
                    563:                    } /* if NL */
                    564:                    /* else just a regular character */
                    565:                    prev_cr = 0;
                    566:                } /* if not CR */
                    567:            } /* while characters in buffer remain */
                    568:        } /* until line read or end-of-file */
                    569:     } /* valid parameters to function */
                    570: }
                    571: 
                    572: 
                    573: PUBLIC char * HTInputSocket_getLine ARGS1(HTInputSocket *, isoc)
                    574: {
                    575:     return get_some_line(isoc, NO);
                    576: }
                    577: 
                    578: PUBLIC char * HTInputSocket_getUnfoldedLine ARGS1(HTInputSocket *, isoc)
                    579: {
                    580:     return get_some_line(isoc, YES);
                    581: }
                    582: 
                    583: 
                    584: /*
                    585: ** Read HTTP status line (if there is one).
                    586: **
                    587: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
                    588: ** First look at the stub in ASCII and check if it starts "HTTP/".
                    589: **
                    590: ** Bugs: A HTTP0.9 server returning a document starting "HTTP/"
                    591: **      will be taken as a HTTP 1.0 server.  Failure.
                    592: */
                    593: #define STUB_LENGTH 20
                    594: PUBLIC char * HTInputSocket_getStatusLine ARGS1(HTInputSocket *, isoc)
                    595: {
                    596:     if (!isoc) {
                    597:        return NULL;
                    598:     }
                    599:     else {
                    600:        char buf[STUB_LENGTH + 1];
                    601:        int i;
                    602:        char server_version[STUB_LENGTH+1];
                    603:        int server_status;
                    604: 
                    605:        /*
                    606:        ** Read initial buffer
                    607:        */
                    608:        if (isoc->input_pointer >= isoc->input_limit &&
                    609:            fill_in_buffer(isoc) <= 0) {
                    610:            return NULL;
                    611:         }
                    612: 
                    613:        for (i=0; i < STUB_LENGTH; i++)
                    614:            buf[i] = FROMASCII(isoc->input_buffer[i]);
                    615:        buf[STUB_LENGTH] = 0;
                    616: 
                    617:        if (0 != strncmp(buf, "HTTP/", 5) ||
                    618:            sscanf(buf, "%20s%d", server_version, &server_status) < 2)
                    619:            return NULL;
                    620:        else
                    621:            return get_some_line(isoc, NO);
                    622:     }
                    623: }
                    624: 
                    625: 
                    626: /*
                    627: ** Do heuristic test to see if this is binary.
                    628: **
                    629: ** We check for characters above 128 in the first few bytes, and
                    630: ** if we find them we forget the html default.
                    631: ** Kludge to trap binary responses from illegal HTTP0.9 servers.
                    632: **
                    633: ** Bugs: An HTTP 0.9 server returning a binary document with
                    634: **      characters < 128 will be read as ASCII.
                    635: */
                    636: PUBLIC BOOL HTInputSocket_seemsBinary ARGS1(HTInputSocket *, isoc)
                    637: {
                    638:     if (isoc &&
                    639:        (isoc->input_pointer < isoc->input_limit ||
                    640:         fill_in_buffer(isoc) > 0)) {
                    641:        char *p = isoc->input_buffer;
                    642:        int i = STUB_LENGTH;
                    643: 
                    644:        for( ; i && p < isoc->input_limit; p++, i++)
                    645:            if (((int)*p)&128)
                    646:                return YES;
                    647:     }
                    648:     return NO;
                    649: }
                    650: 
                    651: 
                    652: 
1.1       timbl     653: /*     Stream the data to an ouput file as binary
                    654: */
1.13      timbl     655: PUBLIC int HTOutputBinary ARGS3( HTInputSocket *, isoc,
                    656:                                int,            input,
                    657:                                FILE *,         output)
1.1       timbl     658: {
                    659:     do {
                    660:            int status = NETREAD(
1.13      timbl     661:                    input, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.1       timbl     662:            if (status <= 0) {
                    663:                if (status == 0) return 0;
                    664:                if (TRACE) fprintf(stderr,
                    665:                    "HTFormat: File read error %d\n", status);
                    666:                return 2;                       /* Error */
                    667:            }
1.13      timbl     668:            fwrite(isoc->input_buffer, sizeof(char), status, output);
1.1       timbl     669:     } while (YES);
                    670: }
                    671: 
                    672: 
1.17    ! luotonen  673: 
1.2       timbl     674: /*             Create a filter stack
                    675: **             ---------------------
                    676: **
1.7       secret    677: **     If a wildcard match is made, a temporary HTPresentation
1.2       timbl     678: **     structure is made to hold the destination format while the
                    679: **     new stack is generated. This is just to pass the out format to
                    680: **     MIME so far.  Storing the format of a stream in the stream might
                    681: **     be a lot neater.
1.10      timbl     682: **
                    683: **     The www/source format is special, in that if you can take
                    684: **     that you can take anything. However, we
1.2       timbl     685: */
1.12      timbl     686: PUBLIC HTStream * HTStreamStack ARGS2(
1.10      timbl     687:        HTFormat,               rep_in,
1.12      timbl     688:        HTRequest *,            request)
1.2       timbl     689: {
1.12      timbl     690:     HTFormat rep_out = request->output_format; /* Could be a param */
1.14      timbl     691:     HTList * conversion[2];
1.10      timbl     692:     HTFormat source = WWW_SOURCE;
1.14      timbl     693:     int which_list;
                    694:     HTPresentation * pres, *match, *wildcard_match=0,
                    695:                        *source_match=0, *source_wildcard_match=0;
                    696:     
1.2       timbl     697:     if (TRACE) fprintf(stderr,
                    698:        "HTFormat: Constructing stream stack for %s to %s\n",
1.10      timbl     699:        HTAtom_name(rep_in),    
1.2       timbl     700:        HTAtom_name(rep_out));
                    701:                
1.15      luotonen  702:     if (rep_out == rep_in) return request->output_stream;
1.2       timbl     703: 
1.14      timbl     704:     conversion[0] = request->conversions;
                    705:     conversion[1] = HTConversions;
1.17    ! luotonen  706: 
1.15      luotonen  707:     for(which_list = 0; which_list<2; which_list++) {
                    708:        HTList * cur = conversion[which_list];
                    709:        
                    710:        while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
1.17    ! luotonen  711:            if (pres->rep == rep_in ||
        !           712:                wild_match(pres->rep, rep_in)) {
1.2       timbl     713:                if (pres->rep_out == rep_out)
1.12      timbl     714:                    return (*pres->converter)(request, pres->command,
1.15      luotonen  715:                                              rep_in, pres->rep_out,
                    716:                                              request->output_stream);
1.17    ! luotonen  717:                if (wild_match(pres->rep_out, rep_out)) {
1.10      timbl     718:                    wildcard_match = pres;
                    719:                }
                    720:            }
                    721:            if (pres->rep == source) {
                    722:                if (pres->rep_out == rep_out)
                    723:                    source_match = pres;
1.17    ! luotonen  724:                if (wild_match(pres->rep_out, rep_out)) {
1.10      timbl     725:                    source_wildcard_match = pres;
1.2       timbl     726:                }
                    727:            }
                    728:        }
                    729:     }
1.14      timbl     730:     match = wildcard_match ? wildcard_match :
                    731:            source_match ?      source_match : 
                    732:            source_wildcard_match;
                    733:     
                    734:     if (match) return (*match->converter)(
                    735:                request, match->command, rep_in, rep_out,
                    736:                request->output_stream);
1.10      timbl     737: 
1.2       timbl     738:     return NULL;
                    739: }
                    740:        
                    741: 
                    742: /*             Find the cost of a filter stack
                    743: **             -------------------------------
                    744: **
                    745: **     Must return the cost of the same stack which StreamStack would set up.
                    746: **
                    747: ** On entry,
                    748: **     length  The size of the data to be converted
                    749: */
1.12      timbl     750: PUBLIC float HTStackValue ARGS5(
1.14      timbl     751:        HTList *,               theseConversions,
1.10      timbl     752:        HTFormat,               rep_in,
1.2       timbl     753:        HTFormat,               rep_out,
                    754:        float,                  initial_value,
                    755:        long int,               length)
                    756: {
1.14      timbl     757:     int which_list;
                    758:     HTList* conversion[2];
                    759:     
1.2       timbl     760:     if (TRACE) fprintf(stderr,
                    761:        "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
1.10      timbl     762:        HTAtom_name(rep_in),    initial_value,
1.2       timbl     763:        HTAtom_name(rep_out));
                    764:                
                    765:     if (rep_out == WWW_SOURCE ||
1.10      timbl     766:        rep_out == rep_in) return 0.0;
1.2       timbl     767: 
1.12      timbl     768:  /*   if (!HTPresentations) HTFormatInit();     set up the list */
1.2       timbl     769:     
1.14      timbl     770:     conversion[0] = theseConversions;
                    771:     conversion[1] = HTConversions;
                    772:     
                    773:     for(which_list = 0; which_list<2; which_list++)
                    774:      if (conversion[which_list]) {
1.15      luotonen  775:         HTList * cur = conversion[which_list];
1.2       timbl     776:        HTPresentation * pres;
1.15      luotonen  777:        while ((pres = (HTPresentation*)HTList_nextObject(cur))) {
                    778:            if (pres->rep == rep_in &&
1.17    ! luotonen  779:                (pres->rep_out == rep_out || wild_match(pres->rep_out, rep_out))) {
1.2       timbl     780:                float value = initial_value * pres->quality;
                    781:                if (HTMaxSecs != 0.0)
1.15      luotonen  782:                    value = value - (length*pres->secs_per_byte + pres->secs)
1.2       timbl     783:                                         /HTMaxSecs;
                    784:                return value;
                    785:            }
                    786:        }
                    787:     }
                    788:     
                    789:     return -1e30;              /* Really bad */
1.17    ! luotonen  790: }
        !           791: 
        !           792: 
1.2       timbl     793: 
1.1       timbl     794: 
1.2       timbl     795: /*     Push data from a socket down a stream
                    796: **     -------------------------------------
1.1       timbl     797: **
1.2       timbl     798: **   This routine is responsible for creating and PRESENTING any
1.1       timbl     799: **   graphic (or other) objects described by the file.
1.2       timbl     800: **
                    801: **   The file number given is assumed to be a TELNET stream ie containing
                    802: **   CRLF at the end of lines which need to be stripped to LF for unix
                    803: **   when the format is textual.
                    804: **
1.1       timbl     805: */
1.2       timbl     806: PUBLIC void HTCopy ARGS2(
                    807:        int,                    file_number,
                    808:        HTStream*,              sink)
1.1       timbl     809: {
1.2       timbl     810:     HTStreamClass targetClass;    
1.13      timbl     811:     HTInputSocket * isoc;
1.2       timbl     812:     
1.5       timbl     813: /*     Push the data down the stream
1.2       timbl     814: **
                    815: */
                    816:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
1.13      timbl     817:     isoc = HTInputSocket_new(file_number);
1.2       timbl     818:     
                    819:     /* Push binary from socket down sink
1.10      timbl     820:     **
                    821:     **         This operation could be put into a main event loop
1.2       timbl     822:     */
                    823:     for(;;) {
                    824:        int status = NETREAD(
1.13      timbl     825:                file_number, isoc->input_buffer, INPUT_BUFFER_SIZE);
1.2       timbl     826:        if (status <= 0) {
                    827:            if (status == 0) break;
                    828:            if (TRACE) fprintf(stderr,
                    829:                "HTFormat: Read error, read returns %d\n", status);
                    830:            break;
                    831:        }
1.8       timbl     832:        
                    833: #ifdef NOT_ASCII
                    834:        {
                    835:            char * p;
1.13      timbl     836:            for(p = isoc->input_buffer; p < isoc->input_buffer+status; p++) {
1.8       timbl     837:                *p = FROMASCII(*p);
                    838:            }
                    839:        }
                    840: #endif
                    841: 
1.13      timbl     842:        (*targetClass.put_block)(sink, isoc->input_buffer, status);
1.2       timbl     843:     } /* next bufferload */
1.13      timbl     844:     HTInputSocket_free(isoc);
1.2       timbl     845: }
                    846: 
1.1       timbl     847: 
1.7       secret    848: 
                    849: /*     Push data from a file pointer down a stream
                    850: **     -------------------------------------
                    851: **
                    852: **   This routine is responsible for creating and PRESENTING any
                    853: **   graphic (or other) objects described by the file.
                    854: **
                    855: **
                    856: */
                    857: PUBLIC void HTFileCopy ARGS2(
                    858:        FILE *,                 fp,
                    859:        HTStream*,              sink)
                    860: {
                    861:     HTStreamClass targetClass;    
1.13      timbl     862:     char input_buffer[INPUT_BUFFER_SIZE];
1.7       secret    863:     
                    864: /*     Push the data down the stream
                    865: **
                    866: */
                    867:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    868:     
                    869:     /* Push binary from socket down sink
                    870:     */
                    871:     for(;;) {
                    872:        int status = fread(
                    873:               input_buffer, 1, INPUT_BUFFER_SIZE, fp);
                    874:        if (status == 0) { /* EOF or error */
                    875:            if (ferror(fp) == 0) break;
                    876:            if (TRACE) fprintf(stderr,
                    877:                "HTFormat: Read error, read returns %d\n", ferror(fp));
                    878:            break;
                    879:        }
                    880:        (*targetClass.put_block)(sink, input_buffer, status);
1.13      timbl     881:     } /* next bufferload */    
1.7       secret    882: }
                    883: 
                    884: 
                    885: 
                    886: 
1.2       timbl     887: /*     Push data from a socket down a stream STRIPPING CR
                    888: **     --------------------------------------------------
                    889: **
                    890: **   This routine is responsible for creating and PRESENTING any
1.8       timbl     891: **   graphic (or other) objects described by the socket.
1.2       timbl     892: **
                    893: **   The file number given is assumed to be a TELNET stream ie containing
                    894: **   CRLF at the end of lines which need to be stripped to LF for unix
                    895: **   when the format is textual.
                    896: **
1.1       timbl     897: */
1.2       timbl     898: PUBLIC void HTCopyNoCR ARGS2(
                    899:        int,                    file_number,
                    900:        HTStream*,              sink)
                    901: {
1.13      timbl     902:     HTStreamClass targetClass;
                    903:     HTInputSocket * isoc;   
1.1       timbl     904:     
1.2       timbl     905: /*     Push the data, ignoring CRLF, down the stream
                    906: **
                    907: */
                    908:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    909: 
                    910: /*     Push text from telnet socket down sink
                    911: **
                    912: **     @@@@@ To push strings could be faster? (especially is we
                    913: **     cheat and don't ignore CR! :-}
                    914: */  
1.13      timbl     915:     isoc = HTInputSocket_new(file_number);
1.2       timbl     916:     for(;;) {
                    917:        char character;
1.13      timbl     918:        character = HTInputSocket_getCharacter(isoc);
1.2       timbl     919:        if (character == (char)EOF) break;
                    920:        (*targetClass.put_character)(sink, character);           
                    921:     }
1.13      timbl     922:     HTInputSocket_free(isoc);
1.2       timbl     923: }
1.1       timbl     924: 
1.2       timbl     925: 
1.7       secret    926: 
1.2       timbl     927: /*     Parse a socket given format and file number
                    928: **
                    929: **   This routine is responsible for creating and PRESENTING any
                    930: **   graphic (or other) objects described by the file.
                    931: **
                    932: **   The file number given is assumed to be a TELNET stream ie containing
                    933: **   CRLF at the end of lines which need to be stripped to LF for unix
                    934: **   when the format is textual.
                    935: **
                    936: */
1.14      timbl     937: 
1.12      timbl     938: PUBLIC int HTParseSocket ARGS3(
1.10      timbl     939:        HTFormat,               rep_in,
1.2       timbl     940:        int,                    file_number,
1.12      timbl     941:        HTRequest *,            request)
1.2       timbl     942: {
                    943:     HTStream * stream;
                    944:     HTStreamClass targetClass;    
1.1       timbl     945: 
1.12      timbl     946:     stream = HTStreamStack(rep_in, request);
1.2       timbl     947:     
                    948:     if (!stream) {
                    949:         char buffer[1024];     /* @@@@@@@@ */
                    950:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12      timbl     951:                HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.3       timbl     952:        if (TRACE) fprintf(stderr, "HTFormat: %s\n", buffer);
1.16      luotonen  953:         return HTLoadError(request, 501, buffer);
1.2       timbl     954:     }
1.1       timbl     955:     
1.3       timbl     956: /*     Push the data, ignoring CRLF if necessary, down the stream
                    957: **
1.2       timbl     958: **
1.3       timbl     959: **   @@  Bug:  This decision ought to be made based on "encoding"
1.9       timbl     960: **   rather than on format.  @@@  When we handle encoding.
1.3       timbl     961: **   The current method smells anyway.
1.2       timbl     962: */
                    963:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
1.10      timbl     964:     if (rep_in == WWW_BINARY || HTOutputSource
                    965:         || strstr(HTAtom_name(rep_in), "image/")
                    966:        || strstr(HTAtom_name(rep_in), "video/")) { /* @@@@@@ */
1.2       timbl     967:         HTCopy(file_number, stream);
                    968:     } else {   /* ascii text with CRLFs :-( */
                    969:         HTCopyNoCR(file_number, stream);
                    970:     }
1.7       secret    971:     (*targetClass.free)(stream);
                    972:     
                    973:     return HT_LOADED;
                    974: }
                    975: 
                    976: 
                    977: 
                    978: /*     Parse a file given format and file pointer
                    979: **
                    980: **   This routine is responsible for creating and PRESENTING any
                    981: **   graphic (or other) objects described by the file.
                    982: **
                    983: **   The file number given is assumed to be a TELNET stream ie containing
1.10      timbl     984: **   CRLF at the end of lines which need to be stripped to \n for unix
1.7       secret    985: **   when the format is textual.
                    986: **
                    987: */
1.12      timbl     988: PUBLIC int HTParseFile ARGS3(
1.10      timbl     989:        HTFormat,               rep_in,
1.7       secret    990:        FILE *,                 fp,
1.12      timbl     991:        HTRequest *,            request)
1.7       secret    992: {
                    993:     HTStream * stream;
                    994:     HTStreamClass targetClass;    
                    995: 
1.12      timbl     996:     stream = HTStreamStack(rep_in, request);
1.7       secret    997:     
                    998:     if (!stream) {
                    999:         char buffer[1024];     /* @@@@@@@@ */
                   1000:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
1.12      timbl    1001:                HTAtom_name(rep_in), HTAtom_name(request->output_format));
1.7       secret   1002:        if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
1.16      luotonen 1003:         return HTLoadError(request, 501, buffer);
1.7       secret   1004:     }
                   1005:     
1.9       timbl    1006: /*     Push the data down the stream
1.7       secret   1007: **
                   1008: **
                   1009: **   @@  Bug:  This decision ought to be made based on "encoding"
1.10      timbl    1010: **   rather than on content-type.  @@@  When we handle encoding.
1.7       secret   1011: **   The current method smells anyway.
                   1012: */
                   1013:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
                   1014:     HTFileCopy(fp, stream);
1.2       timbl    1015:     (*targetClass.free)(stream);
1.1       timbl    1016:     
1.2       timbl    1017:     return HT_LOADED;
1.1       timbl    1018: }
1.2       timbl    1019: 
1.10      timbl    1020: 
                   1021: /*     Converter stream: Network Telnet to internal character text
                   1022: **     -----------------------------------------------------------
                   1023: **
                   1024: **     The input is assumed to be in ASCII, with lines delimited
                   1025: **     by (13,10) pairs, These pairs are converted into (CR,LF)
                   1026: **     pairs in the local representation.  The (CR,LF) sequence
                   1027: **     when found is changed to a '\n' character, the internal
                   1028: **     C representation of a new line.
                   1029: */
                   1030: 
                   1031: 
1.11      timbl    1032: PRIVATE void NetToText_put_character ARGS2(HTStream *, me, char, net_char)
1.10      timbl    1033: {
                   1034:     char c = FROMASCII(net_char);
                   1035:     if (me->had_cr) {
                   1036:         if (c==LF) {
                   1037:            me->sink->isa->put_character(me->sink, '\n');       /* Newline */
                   1038:            me->had_cr = NO;
                   1039:            return;
                   1040:         } else {
                   1041:            me->sink->isa->put_character(me->sink, CR); /* leftover */
                   1042:        }
                   1043:     }
                   1044:     me->had_cr = (c==CR);
                   1045:     if (!me->had_cr)
                   1046:        me->sink->isa->put_character(me->sink, c);              /* normal */
                   1047: }
                   1048: 
1.11      timbl    1049: PRIVATE void NetToText_put_string ARGS2(HTStream *, me, CONST char *, s)
1.10      timbl    1050: {
                   1051:     CONST char * p;
                   1052:     for(p=s; *p; p++) NetToText_put_character(me, *p);
                   1053: }
                   1054: 
1.11      timbl    1055: PRIVATE void NetToText_put_block ARGS3(HTStream *, me, CONST char*, s, int, l)
1.10      timbl    1056: {
                   1057:     CONST char * p;
                   1058:     for(p=s; p<(s+l); p++) NetToText_put_character(me, *p);
                   1059: }
                   1060: 
                   1061: PRIVATE void NetToText_free ARGS1(HTStream *, me)
                   1062: {
                   1063:     me->sink->isa->free(me->sink);             /* Close rest of pipe */
                   1064:     free(me);
                   1065: }
                   1066: 
                   1067: PRIVATE void NetToText_abort ARGS2(HTStream *, me, HTError, e)
                   1068: {
                   1069:     me->sink->isa->abort(me->sink,e);          /* Abort rest of pipe */
                   1070:     free(me);
                   1071: }
                   1072: 
                   1073: /*     The class structure
                   1074: */
                   1075: PRIVATE HTStreamClass NetToTextClass = {
                   1076:     "NetToText",
                   1077:     NetToText_free,
                   1078:     NetToText_abort,
                   1079:     NetToText_put_character,
                   1080:     NetToText_put_string,
                   1081:     NetToText_put_block
                   1082: };
                   1083: 
                   1084: /*     The creation method
                   1085: */
                   1086: PUBLIC HTStream * HTNetToText ARGS1(HTStream *, sink)
                   1087: {
                   1088:     HTStream* me = (HTStream*)malloc(sizeof(*me));
                   1089:     if (me == NULL) outofmem(__FILE__, "NetToText");
                   1090:     me->isa = &NetToTextClass;
                   1091:     
                   1092:     me->had_cr = NO;
                   1093:     me->sink = sink;
                   1094:     return me;
                   1095: }
1.2       timbl    1096: 
                   1097: 

Webmaster