Annotation of libwww/Library/src/HTFormat.c, revision 1.8

1.7       secret      1: 
1.1       timbl       2: /*             Manage different file formats                   HTFormat.c
                      3: **             =============================
                      4: **
                      5: ** Bugs:
                      6: **     Not reentrant.
                      7: **
                      8: **     Assumes the incoming stream is ASCII, rather than a local file
                      9: **     format, and so ALWAYS converts from ASCII on non-ASCII machines.
                     10: **     Therefore, non-ASCII machines can't read local files.
1.2       timbl      11: **
                     12: */
                     13: 
                     14: /* Implements:
1.1       timbl      15: */
1.2       timbl      16: #include "HTFormat.h"
                     17: 
                     18: PUBLIC float HTMaxSecs = 1e10;         /* No effective limit */
                     19: PUBLIC float HTMaxLength = 1e10;       /* No effective limit */
                     20: 
                     21: #ifdef unix
                     22: #ifdef NeXT
                     23: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
                     24: #else
                     25: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n" 
                     26:        /* Full pathname would be better! */
                     27: #endif
                     28: #endif
                     29: 
1.1       timbl      30: 
                     31: #include "HTUtils.h"
                     32: #include "tcp.h"
                     33: 
                     34: #include "HTML.h"
1.2       timbl      35: #include "HTMLDTD.h"
1.1       timbl      36: #include "HText.h"
1.2       timbl      37: #include "HTAlert.h"
                     38: #include "HTList.h"
                     39: #include "HTInit.h"
                     40: /*     Streams and structured streams which we use:
                     41: */
                     42: #include "HTFWriter.h"
                     43: #include "HTPlain.h"
                     44: #include "SGML.h"
                     45: #include "HTML.h"
                     46: #include "HTMLGen.h"
                     47: 
                     48: PUBLIC BOOL HTOutputSource = NO;       /* Flag: shortcut parser to stdout */
                     49: extern  BOOL interactive;
                     50: 
                     51: struct _HTStream {
                     52:       CONST HTStreamClass*     isa;
                     53:       /* ... */
                     54: };
                     55: 
                     56: 
                     57: /*     Presentation methods
                     58: **     --------------------
                     59: */
                     60: 
                     61: PUBLIC  HTList * HTPresentations = 0;
                     62: PUBLIC  HTPresentation* default_presentation = 0;
                     63: 
                     64: 
                     65: /*     Define a presentation system command for a content-type
                     66: **     -------------------------------------------------------
                     67: */
                     68: PUBLIC void HTSetPresentation ARGS5(
                     69:        CONST char *, representation,
                     70:        CONST char *, command,
                     71:        float,  quality,
                     72:        float,  secs, 
                     73:        float,  secs_per_byte
                     74: ){
                     75: 
                     76:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                     77:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                     78:     
                     79:     pres->rep = HTAtom_for(representation);
                     80:     pres->rep_out = WWW_PRESENT;               /* Fixed for now ... :-) */
                     81:     pres->converter = HTSaveAndExecute;                /* Fixed for now ...     */
                     82:     pres->quality = quality;
                     83:     pres->secs = secs;
                     84:     pres->secs_per_byte = secs_per_byte;
                     85:     pres->rep = HTAtom_for(representation);
                     86:     pres->command = 0;
                     87:     StrAllocCopy(pres->command, command);
                     88:     
                     89:     if (!HTPresentations) HTPresentations = HTList_new();
                     90:     
                     91:     if (strcmp(representation, "*")==0) {
                     92:         if (default_presentation) free(default_presentation);
                     93:        default_presentation = pres;
                     94:     } else {
                     95:         HTList_addObject(HTPresentations, pres);
                     96:     }
                     97: }
                     98: 
                     99: 
                    100: /*     Define a built-in function for a content-type
                    101: **     ---------------------------------------------
                    102: */
                    103: PUBLIC void HTSetConversion ARGS6(
                    104:        CONST char *, representation_in,
                    105:        CONST char *, representation_out,
1.6       timbl     106:        HTConverter*,   converter,
1.2       timbl     107:        float,  quality,
                    108:        float,  secs, 
                    109:        float,  secs_per_byte
                    110: ){
1.1       timbl     111: 
1.2       timbl     112:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                    113:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                    114:     
                    115:     pres->rep = HTAtom_for(representation_in);
                    116:     pres->rep_out = HTAtom_for(representation_out);
                    117:     pres->converter = converter;
                    118:     pres->command = NULL;              /* Fixed */
                    119:     pres->quality = quality;
                    120:     pres->secs = secs;
                    121:     pres->secs_per_byte = secs_per_byte;
                    122:     pres->command = 0;
                    123:     
                    124:     if (!HTPresentations) HTPresentations = HTList_new();
                    125:     
                    126:     if (strcmp(representation_in, "*")==0) {
                    127:         if (default_presentation) free(default_presentation);
                    128:        default_presentation = pres;
                    129:     } else {
                    130:         HTList_addObject(HTPresentations, pres);
                    131:     }
                    132: }
1.1       timbl     133: 
                    134: 
                    135: 
                    136: /*     File buffering
                    137: **     --------------
                    138: **
                    139: **     The input file is read using the macro which can read from
                    140: **     a socket or a file.
                    141: **     The input buffer size, if large will give greater efficiency and
                    142: **     release the server faster, and if small will save space on PCs etc.
                    143: */
                    144: #define INPUT_BUFFER_SIZE 4096         /* Tradeoff */
                    145: PRIVATE char input_buffer[INPUT_BUFFER_SIZE];
                    146: PRIVATE char * input_pointer;
                    147: PRIVATE char * input_limit;
                    148: PRIVATE int input_file_number;
                    149: 
                    150: 
                    151: /*     Set up the buffering
                    152: **
                    153: **     These routines are public because they are in fact needed by
                    154: **     many parsers, and on PCs and Macs we should not duplicate
                    155: **     the static buffer area.
                    156: */
                    157: PUBLIC void HTInitInput ARGS1 (int,file_number)
                    158: {
                    159:     input_file_number = file_number;
                    160:     input_pointer = input_limit = input_buffer;
                    161: }
                    162: 
                    163: 
                    164: PUBLIC char HTGetChararcter NOARGS
                    165: {
                    166:     char ch;
                    167:     do {
                    168:        if (input_pointer >= input_limit) {
                    169:            int status = NETREAD(
                    170:                    input_file_number, input_buffer, INPUT_BUFFER_SIZE);
                    171:            if (status <= 0) {
                    172:                if (status == 0) return (char)EOF;
                    173:                if (TRACE) fprintf(stderr,
                    174:                    "HTFormat: File read error %d\n", status);
                    175:                return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
                    176:            }
                    177:            input_pointer = input_buffer;
                    178:            input_limit = input_buffer + status;
                    179:        }
                    180:        ch = *input_pointer++;
                    181:     } while (ch == (char) 13); /* Ignore ASCII carriage return */
                    182:     
                    183:     return FROMASCII(ch);
                    184: }
                    185: 
                    186: /*     Stream the data to an ouput file as binary
                    187: */
                    188: PUBLIC int HTOutputBinary ARGS2( int,          input,
                    189:                                  FILE *,       output)
                    190: {
                    191:     do {
                    192:            int status = NETREAD(
                    193:                    input, input_buffer, INPUT_BUFFER_SIZE);
                    194:            if (status <= 0) {
                    195:                if (status == 0) return 0;
                    196:                if (TRACE) fprintf(stderr,
                    197:                    "HTFormat: File read error %d\n", status);
                    198:                return 2;                       /* Error */
                    199:            }
                    200:            fwrite(input_buffer, sizeof(char), status, output);
                    201:     } while (YES);
                    202: }
                    203: 
                    204: 
1.2       timbl     205: /*             Create a filter stack
                    206: **             ---------------------
                    207: **
1.7       secret    208: **     If a wildcard match is made, a temporary HTPresentation
1.2       timbl     209: **     structure is made to hold the destination format while the
                    210: **     new stack is generated. This is just to pass the out format to
                    211: **     MIME so far.  Storing the format of a stream in the stream might
                    212: **     be a lot neater.
                    213: */
                    214: PUBLIC HTStream * HTStreamStack ARGS4(
                    215:        HTFormat,               format_in,
                    216:        HTFormat,               rep_out,
                    217:        HTStream*,              sink,
                    218:        HTParentAnchor*,        anchor)
                    219: {
                    220:     HTAtom * wildcard = HTAtom_for("*");
                    221:     HTPresentation temp;
                    222:     if (TRACE) fprintf(stderr,
                    223:        "HTFormat: Constructing stream stack for %s to %s\n",
                    224:        HTAtom_name(format_in), 
                    225:        HTAtom_name(rep_out));
                    226:                
                    227:     if (rep_out == WWW_SOURCE ||
                    228:        rep_out == format_in) return sink;
                    229: 
                    230:     if (!HTPresentations) HTFormatInit();      /* set up the list */
                    231:     
                    232:     {
                    233:        int n = HTList_count(HTPresentations);
                    234:        int i;
                    235:        HTPresentation * pres;
                    236:        for(i=0; i<n; i++) {
                    237:            pres = HTList_objectAt(HTPresentations, i);
                    238:            if (pres->rep == format_in) {
                    239:                if (pres->rep_out == rep_out)
                    240:                    return (*pres->converter)(pres, anchor, sink);
                    241:                if (pres->rep_out == wildcard) {
                    242:                    temp = *pres;/* make temp conversion to needed fmt */
                    243:                    temp.rep_out = rep_out;             /* yuk */
                    244:                    return (*pres->converter)(&temp, anchor, sink);
                    245:                }
                    246:            }
                    247:        }
                    248:     }
                    249:     
1.3       timbl     250: #ifdef XMOSAIC_HACK
                    251:     return sink;
                    252: #else
1.2       timbl     253:     return NULL;
1.3       timbl     254: #endif
1.2       timbl     255: }
                    256:        
                    257: 
                    258: /*             Find the cost of a filter stack
                    259: **             -------------------------------
                    260: **
                    261: **     Must return the cost of the same stack which StreamStack would set up.
                    262: **
                    263: ** On entry,
                    264: **     length  The size of the data to be converted
                    265: */
                    266: PUBLIC float HTStackValue ARGS4(
                    267:        HTFormat,               format_in,
                    268:        HTFormat,               rep_out,
                    269:        float,                  initial_value,
                    270:        long int,               length)
                    271: {
                    272:     HTAtom * wildcard = HTAtom_for("*");
                    273: 
                    274:     if (TRACE) fprintf(stderr,
                    275:        "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
                    276:        HTAtom_name(format_in), initial_value,
                    277:        HTAtom_name(rep_out));
                    278:                
                    279:     if (rep_out == WWW_SOURCE ||
                    280:        rep_out == format_in) return 0.0;
                    281: 
                    282:     if (!HTPresentations) HTFormatInit();      /* set up the list */
                    283:     
                    284:     {
                    285:        int n = HTList_count(HTPresentations);
                    286:        int i;
                    287:        HTPresentation * pres;
                    288:        for(i=0; i<n; i++) {
                    289:            pres = HTList_objectAt(HTPresentations, i);
                    290:            if (pres->rep == format_in && (
                    291:                        pres->rep_out == rep_out ||
                    292:                        pres->rep_out == wildcard)) {
                    293:                float value = initial_value * pres->quality;
                    294:                if (HTMaxSecs != 0.0)
                    295:                value = value - (length*pres->secs_per_byte + pres->secs)
                    296:                                         /HTMaxSecs;
                    297:                return value;
                    298:            }
                    299:        }
                    300:     }
                    301:     
                    302:     return -1e30;              /* Really bad */
                    303: 
                    304: }
                    305:        
1.1       timbl     306: 
1.2       timbl     307: /*     Push data from a socket down a stream
                    308: **     -------------------------------------
1.1       timbl     309: **
1.2       timbl     310: **   This routine is responsible for creating and PRESENTING any
1.1       timbl     311: **   graphic (or other) objects described by the file.
1.2       timbl     312: **
                    313: **   The file number given is assumed to be a TELNET stream ie containing
                    314: **   CRLF at the end of lines which need to be stripped to LF for unix
                    315: **   when the format is textual.
                    316: **
1.1       timbl     317: */
1.2       timbl     318: PUBLIC void HTCopy ARGS2(
                    319:        int,                    file_number,
                    320:        HTStream*,              sink)
1.1       timbl     321: {
1.2       timbl     322:     HTStreamClass targetClass;    
                    323:     
1.5       timbl     324: /*     Push the data down the stream
1.2       timbl     325: **
                    326: */
                    327:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    328:     
                    329:     /* Push binary from socket down sink
                    330:     */
                    331:     for(;;) {
                    332:        int status = NETREAD(
                    333:                file_number, input_buffer, INPUT_BUFFER_SIZE);
                    334:        if (status <= 0) {
                    335:            if (status == 0) break;
                    336:            if (TRACE) fprintf(stderr,
                    337:                "HTFormat: Read error, read returns %d\n", status);
                    338:            break;
                    339:        }
1.8     ! timbl     340:        
        !           341: #ifdef NOT_ASCII
        !           342:        {
        !           343:            char * p;
        !           344:            for(p = input_buffer; p < input_buffer+status; p++) {
        !           345:                *p = FROMASCII(*p);
        !           346:            }
        !           347:        }
        !           348: #endif
        !           349: 
1.4       timbl     350:        (*targetClass.put_block)(sink, input_buffer, status);
1.2       timbl     351:     } /* next bufferload */
                    352:        
                    353: }
                    354: 
1.1       timbl     355: 
1.7       secret    356: 
                    357: /*     Push data from a file pointer down a stream
                    358: **     -------------------------------------
                    359: **
                    360: **   This routine is responsible for creating and PRESENTING any
                    361: **   graphic (or other) objects described by the file.
                    362: **
                    363: **
                    364: */
                    365: PUBLIC void HTFileCopy ARGS2(
                    366:        FILE *,                 fp,
                    367:        HTStream*,              sink)
                    368: {
                    369:     HTStreamClass targetClass;    
                    370:     
                    371: /*     Push the data down the stream
                    372: **
                    373: */
                    374:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    375:     
                    376:     /* Push binary from socket down sink
                    377:     */
                    378:     for(;;) {
                    379:        int status = fread(
                    380:               input_buffer, 1, INPUT_BUFFER_SIZE, fp);
                    381:        if (status == 0) { /* EOF or error */
                    382:            if (ferror(fp) == 0) break;
                    383:            if (TRACE) fprintf(stderr,
                    384:                "HTFormat: Read error, read returns %d\n", ferror(fp));
                    385:            break;
                    386:        }
                    387:        (*targetClass.put_block)(sink, input_buffer, status);
                    388:     } /* next bufferload */
                    389:        
                    390: }
                    391: 
                    392: 
                    393: 
                    394: 
1.2       timbl     395: /*     Push data from a socket down a stream STRIPPING CR
                    396: **     --------------------------------------------------
                    397: **
                    398: **   This routine is responsible for creating and PRESENTING any
1.8     ! timbl     399: **   graphic (or other) objects described by the socket.
1.2       timbl     400: **
                    401: **   The file number given is assumed to be a TELNET stream ie containing
                    402: **   CRLF at the end of lines which need to be stripped to LF for unix
                    403: **   when the format is textual.
                    404: **
1.1       timbl     405: */
1.2       timbl     406: PUBLIC void HTCopyNoCR ARGS2(
                    407:        int,                    file_number,
                    408:        HTStream*,              sink)
                    409: {
                    410:     HTStreamClass targetClass;    
1.1       timbl     411:     
1.2       timbl     412: /*     Push the data, ignoring CRLF, down the stream
                    413: **
                    414: */
                    415:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    416: 
                    417: /*     Push text from telnet socket down sink
                    418: **
                    419: **     @@@@@ To push strings could be faster? (especially is we
                    420: **     cheat and don't ignore CR! :-}
                    421: */  
1.1       timbl     422:     HTInitInput(file_number);
1.2       timbl     423:     for(;;) {
                    424:        char character;
                    425:        character = HTGetChararcter();
                    426:        if (character == (char)EOF) break;
                    427:        (*targetClass.put_character)(sink, character);           
                    428:     }
                    429: }
1.1       timbl     430: 
1.2       timbl     431: 
1.7       secret    432: 
1.2       timbl     433: /*     Parse a socket given format and file number
                    434: **
                    435: **   This routine is responsible for creating and PRESENTING any
                    436: **   graphic (or other) objects described by the file.
                    437: **
                    438: **   The file number given is assumed to be a TELNET stream ie containing
                    439: **   CRLF at the end of lines which need to be stripped to LF for unix
                    440: **   when the format is textual.
                    441: **
                    442: */
                    443: PUBLIC int HTParseSocket ARGS5(
                    444:        HTFormat,               format_in,
                    445:        HTFormat,               format_out,
                    446:        HTParentAnchor *,       anchor,
                    447:        int,                    file_number,
                    448:        HTStream*,              sink)
                    449: {
                    450:     HTStream * stream;
                    451:     HTStreamClass targetClass;    
1.1       timbl     452: 
1.2       timbl     453:     stream = HTStreamStack(format_in,
                    454:                        format_out,
                    455:                        sink , anchor);
                    456:     
                    457:     if (!stream) {
                    458:         char buffer[1024];     /* @@@@@@@@ */
                    459:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
                    460:                HTAtom_name(format_in), HTAtom_name(format_out));
1.3       timbl     461:        if (TRACE) fprintf(stderr, "HTFormat: %s\n", buffer);
1.2       timbl     462:         return HTLoadError(sink, 501, buffer);
                    463:     }
1.1       timbl     464:     
1.3       timbl     465: /*     Push the data, ignoring CRLF if necessary, down the stream
                    466: **
1.2       timbl     467: **
1.3       timbl     468: **   @@  Bug:  This decision ought to be made based on "encoding"
                    469: **   rather than on format.  @@@  Whne we handle encoding.
                    470: **   The current method smells anyway.
1.2       timbl     471: */
                    472:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
1.3       timbl     473:     if (format_in == WWW_BINARY || HTOutputSource
                    474:         || strstr(HTAtom_name(format_in), "image/")
                    475:        || strstr(HTAtom_name(format_in), "video/")) { /* @@@@@@ */
1.2       timbl     476:         HTCopy(file_number, stream);
                    477:     } else {   /* ascii text with CRLFs :-( */
                    478:         HTCopyNoCR(file_number, stream);
                    479:     }
1.7       secret    480:     (*targetClass.end_document)(stream);
                    481:     (*targetClass.free)(stream);
                    482:     
                    483:     return HT_LOADED;
                    484: }
                    485: 
                    486: 
                    487: 
                    488: /*     Parse a file given format and file pointer
                    489: **
                    490: **   This routine is responsible for creating and PRESENTING any
                    491: **   graphic (or other) objects described by the file.
                    492: **
                    493: **   The file number given is assumed to be a TELNET stream ie containing
                    494: **   CRLF at the end of lines which need to be stripped to LF for unix
                    495: **   when the format is textual.
                    496: **
                    497: */
                    498: PUBLIC int HTParseFile ARGS5(
                    499:        HTFormat,               format_in,
                    500:        HTFormat,               format_out,
                    501:        HTParentAnchor *,       anchor,
                    502:        FILE *,                 fp,
                    503:        HTStream*,              sink)
                    504: {
                    505:     HTStream * stream;
                    506:     HTStreamClass targetClass;    
                    507: 
                    508:     stream = HTStreamStack(format_in,
                    509:                        format_out,
                    510:                        sink , anchor);
                    511:     
                    512:     if (!stream) {
                    513:         char buffer[1024];     /* @@@@@@@@ */
                    514:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
                    515:                HTAtom_name(format_in), HTAtom_name(format_out));
                    516:        if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
                    517:         return HTLoadError(sink, 501, buffer);
                    518:     }
                    519:     
                    520: /*     Push the data, ignoring CRLF if necessary, down the stream
                    521: **
                    522: **
                    523: **   @@  Bug:  This decision ought to be made based on "encoding"
                    524: **   rather than on format.  @@@  When we handle encoding.
                    525: **   The current method smells anyway.
                    526: */
                    527:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
                    528:     HTFileCopy(fp, stream);
1.2       timbl     529:     (*targetClass.end_document)(stream);
                    530:     (*targetClass.free)(stream);
1.1       timbl     531:     
1.2       timbl     532:     return HT_LOADED;
1.1       timbl     533: }
1.2       timbl     534: 
                    535: 
                    536: 

Webmaster