Annotation of libwww/Library/src/HTFormat.c, revision 1.7

1.7     ! secret      1: 
1.1       timbl       2: /*             Manage different file formats                   HTFormat.c
                      3: **             =============================
                      4: **
                      5: ** Bugs:
                      6: **     Not reentrant.
                      7: **
                      8: **     Assumes the incoming stream is ASCII, rather than a local file
                      9: **     format, and so ALWAYS converts from ASCII on non-ASCII machines.
                     10: **     Therefore, non-ASCII machines can't read local files.
1.2       timbl      11: **
                     12: */
                     13: 
                     14: /* Implements:
1.1       timbl      15: */
1.2       timbl      16: #include "HTFormat.h"
                     17: 
                     18: PUBLIC float HTMaxSecs = 1e10;         /* No effective limit */
                     19: PUBLIC float HTMaxLength = 1e10;       /* No effective limit */
                     20: 
                     21: #ifdef unix
                     22: #ifdef NeXT
                     23: #define PRESENT_POSTSCRIPT "open %s; /bin/rm -f %s\n"
                     24: #else
                     25: #define PRESENT_POSTSCRIPT "(ghostview %s ; /bin/rm -f %s)&\n" 
                     26:        /* Full pathname would be better! */
                     27: #endif
                     28: #endif
                     29: 
1.1       timbl      30: 
                     31: #include "HTUtils.h"
                     32: #include "tcp.h"
                     33: 
                     34: #include "HTML.h"
1.2       timbl      35: #include "HTMLDTD.h"
1.1       timbl      36: #include "HText.h"
1.2       timbl      37: #include "HTAlert.h"
                     38: #include "HTList.h"
                     39: #include "HTInit.h"
                     40: /*     Streams and structured streams which we use:
                     41: */
                     42: #include "HTFWriter.h"
                     43: #include "HTPlain.h"
                     44: #include "SGML.h"
                     45: #include "HTML.h"
                     46: #include "HTMLGen.h"
                     47: 
                     48: PUBLIC BOOL HTOutputSource = NO;       /* Flag: shortcut parser to stdout */
                     49: extern  BOOL interactive;
                     50: 
                     51: struct _HTStream {
                     52:       CONST HTStreamClass*     isa;
                     53:       /* ... */
                     54: };
                     55: 
                     56: 
                     57: /*     Presentation methods
                     58: **     --------------------
                     59: */
                     60: 
                     61: PUBLIC  HTList * HTPresentations = 0;
                     62: PUBLIC  HTPresentation* default_presentation = 0;
                     63: 
                     64: 
                     65: /*     Define a presentation system command for a content-type
                     66: **     -------------------------------------------------------
                     67: */
                     68: PUBLIC void HTSetPresentation ARGS5(
                     69:        CONST char *, representation,
                     70:        CONST char *, command,
                     71:        float,  quality,
                     72:        float,  secs, 
                     73:        float,  secs_per_byte
                     74: ){
                     75: 
                     76:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                     77:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                     78:     
                     79:     pres->rep = HTAtom_for(representation);
                     80:     pres->rep_out = WWW_PRESENT;               /* Fixed for now ... :-) */
                     81:     pres->converter = HTSaveAndExecute;                /* Fixed for now ...     */
                     82:     pres->quality = quality;
                     83:     pres->secs = secs;
                     84:     pres->secs_per_byte = secs_per_byte;
                     85:     pres->rep = HTAtom_for(representation);
                     86:     pres->command = 0;
                     87:     StrAllocCopy(pres->command, command);
                     88:     
                     89:     if (!HTPresentations) HTPresentations = HTList_new();
                     90:     
                     91:     if (strcmp(representation, "*")==0) {
                     92:         if (default_presentation) free(default_presentation);
                     93:        default_presentation = pres;
                     94:     } else {
                     95:         HTList_addObject(HTPresentations, pres);
                     96:     }
                     97: }
                     98: 
                     99: 
                    100: /*     Define a built-in function for a content-type
                    101: **     ---------------------------------------------
                    102: */
                    103: PUBLIC void HTSetConversion ARGS6(
                    104:        CONST char *, representation_in,
                    105:        CONST char *, representation_out,
1.6       timbl     106:        HTConverter*,   converter,
1.2       timbl     107:        float,  quality,
                    108:        float,  secs, 
                    109:        float,  secs_per_byte
                    110: ){
1.1       timbl     111: 
1.2       timbl     112:     HTPresentation * pres = (HTPresentation *)malloc(sizeof(HTPresentation));
                    113:     if (pres == NULL) outofmem(__FILE__, "HTSetPresentation");
                    114:     
                    115:     pres->rep = HTAtom_for(representation_in);
                    116:     pres->rep_out = HTAtom_for(representation_out);
                    117:     pres->converter = converter;
                    118:     pres->command = NULL;              /* Fixed */
                    119:     pres->quality = quality;
                    120:     pres->secs = secs;
                    121:     pres->secs_per_byte = secs_per_byte;
                    122:     pres->command = 0;
                    123:     
                    124:     if (!HTPresentations) HTPresentations = HTList_new();
                    125:     
                    126:     if (strcmp(representation_in, "*")==0) {
                    127:         if (default_presentation) free(default_presentation);
                    128:        default_presentation = pres;
                    129:     } else {
                    130:         HTList_addObject(HTPresentations, pres);
                    131:     }
                    132: }
1.1       timbl     133: 
                    134: 
                    135: 
                    136: /*     File buffering
                    137: **     --------------
                    138: **
                    139: **     The input file is read using the macro which can read from
                    140: **     a socket or a file.
                    141: **     The input buffer size, if large will give greater efficiency and
                    142: **     release the server faster, and if small will save space on PCs etc.
                    143: */
                    144: #define INPUT_BUFFER_SIZE 4096         /* Tradeoff */
                    145: PRIVATE char input_buffer[INPUT_BUFFER_SIZE];
                    146: PRIVATE char * input_pointer;
                    147: PRIVATE char * input_limit;
                    148: PRIVATE int input_file_number;
                    149: 
                    150: 
                    151: /*     Set up the buffering
                    152: **
                    153: **     These routines are public because they are in fact needed by
                    154: **     many parsers, and on PCs and Macs we should not duplicate
                    155: **     the static buffer area.
                    156: */
                    157: PUBLIC void HTInitInput ARGS1 (int,file_number)
                    158: {
                    159:     input_file_number = file_number;
                    160:     input_pointer = input_limit = input_buffer;
                    161: }
                    162: 
                    163: 
                    164: PUBLIC char HTGetChararcter NOARGS
                    165: {
                    166:     char ch;
                    167:     do {
                    168:        if (input_pointer >= input_limit) {
                    169:            int status = NETREAD(
                    170:                    input_file_number, input_buffer, INPUT_BUFFER_SIZE);
                    171:            if (status <= 0) {
                    172:                if (status == 0) return (char)EOF;
                    173:                if (TRACE) fprintf(stderr,
                    174:                    "HTFormat: File read error %d\n", status);
                    175:                return (char)EOF; /* -1 is returned by UCX at end of HTTP link */
                    176:            }
                    177:            input_pointer = input_buffer;
                    178:            input_limit = input_buffer + status;
                    179:        }
                    180:        ch = *input_pointer++;
                    181:     } while (ch == (char) 13); /* Ignore ASCII carriage return */
                    182:     
                    183:     return FROMASCII(ch);
                    184: }
                    185: 
                    186: /*     Stream the data to an ouput file as binary
                    187: */
                    188: PUBLIC int HTOutputBinary ARGS2( int,          input,
                    189:                                  FILE *,       output)
                    190: {
                    191:     do {
                    192:            int status = NETREAD(
                    193:                    input, input_buffer, INPUT_BUFFER_SIZE);
                    194:            if (status <= 0) {
                    195:                if (status == 0) return 0;
                    196:                if (TRACE) fprintf(stderr,
                    197:                    "HTFormat: File read error %d\n", status);
                    198:                return 2;                       /* Error */
                    199:            }
                    200:            fwrite(input_buffer, sizeof(char), status, output);
                    201:     } while (YES);
                    202: }
                    203: 
                    204: 
1.2       timbl     205: /*             Create a filter stack
                    206: **             ---------------------
                    207: **
1.7     ! secret    208: **     If a wildcard match is made, a temporary HTPresentation
1.2       timbl     209: **     structure is made to hold the destination format while the
                    210: **     new stack is generated. This is just to pass the out format to
                    211: **     MIME so far.  Storing the format of a stream in the stream might
                    212: **     be a lot neater.
                    213: */
                    214: PUBLIC HTStream * HTStreamStack ARGS4(
                    215:        HTFormat,               format_in,
                    216:        HTFormat,               rep_out,
                    217:        HTStream*,              sink,
                    218:        HTParentAnchor*,        anchor)
                    219: {
                    220:     HTAtom * wildcard = HTAtom_for("*");
                    221:     HTPresentation temp;
                    222:     if (TRACE) fprintf(stderr,
                    223:        "HTFormat: Constructing stream stack for %s to %s\n",
                    224:        HTAtom_name(format_in), 
                    225:        HTAtom_name(rep_out));
                    226:                
                    227:     if (rep_out == WWW_SOURCE ||
                    228:        rep_out == format_in) return sink;
                    229: 
                    230:     if (!HTPresentations) HTFormatInit();      /* set up the list */
                    231:     
                    232:     {
                    233:        int n = HTList_count(HTPresentations);
                    234:        int i;
                    235:        HTPresentation * pres;
                    236:        for(i=0; i<n; i++) {
                    237:            pres = HTList_objectAt(HTPresentations, i);
                    238:            if (pres->rep == format_in) {
                    239:                if (pres->rep_out == rep_out)
                    240:                    return (*pres->converter)(pres, anchor, sink);
                    241:                if (pres->rep_out == wildcard) {
                    242:                    temp = *pres;/* make temp conversion to needed fmt */
                    243:                    temp.rep_out = rep_out;             /* yuk */
                    244:                    return (*pres->converter)(&temp, anchor, sink);
                    245:                }
                    246:            }
                    247:        }
                    248:     }
                    249:     
1.3       timbl     250: #ifdef XMOSAIC_HACK
                    251:     return sink;
                    252: #else
1.2       timbl     253:     return NULL;
1.3       timbl     254: #endif
1.2       timbl     255: }
                    256:        
                    257: 
                    258: /*             Find the cost of a filter stack
                    259: **             -------------------------------
                    260: **
                    261: **     Must return the cost of the same stack which StreamStack would set up.
                    262: **
                    263: ** On entry,
                    264: **     length  The size of the data to be converted
                    265: */
                    266: PUBLIC float HTStackValue ARGS4(
                    267:        HTFormat,               format_in,
                    268:        HTFormat,               rep_out,
                    269:        float,                  initial_value,
                    270:        long int,               length)
                    271: {
                    272:     HTAtom * wildcard = HTAtom_for("*");
                    273: 
                    274:     if (TRACE) fprintf(stderr,
                    275:        "HTFormat: Evaluating stream stack for %s worth %.3f to %s\n",
                    276:        HTAtom_name(format_in), initial_value,
                    277:        HTAtom_name(rep_out));
                    278:                
                    279:     if (rep_out == WWW_SOURCE ||
                    280:        rep_out == format_in) return 0.0;
                    281: 
                    282:     if (!HTPresentations) HTFormatInit();      /* set up the list */
                    283:     
                    284:     {
                    285:        int n = HTList_count(HTPresentations);
                    286:        int i;
                    287:        HTPresentation * pres;
                    288:        for(i=0; i<n; i++) {
                    289:            pres = HTList_objectAt(HTPresentations, i);
                    290:            if (pres->rep == format_in && (
                    291:                        pres->rep_out == rep_out ||
                    292:                        pres->rep_out == wildcard)) {
                    293:                float value = initial_value * pres->quality;
                    294:                if (HTMaxSecs != 0.0)
                    295:                value = value - (length*pres->secs_per_byte + pres->secs)
                    296:                                         /HTMaxSecs;
                    297:                return value;
                    298:            }
                    299:        }
                    300:     }
                    301:     
                    302:     return -1e30;              /* Really bad */
                    303: 
                    304: }
                    305:        
1.1       timbl     306: 
1.2       timbl     307: /*     Push data from a socket down a stream
                    308: **     -------------------------------------
1.1       timbl     309: **
1.2       timbl     310: **   This routine is responsible for creating and PRESENTING any
1.1       timbl     311: **   graphic (or other) objects described by the file.
1.2       timbl     312: **
                    313: **   The file number given is assumed to be a TELNET stream ie containing
                    314: **   CRLF at the end of lines which need to be stripped to LF for unix
                    315: **   when the format is textual.
                    316: **
1.1       timbl     317: */
1.2       timbl     318: PUBLIC void HTCopy ARGS2(
                    319:        int,                    file_number,
                    320:        HTStream*,              sink)
1.1       timbl     321: {
1.2       timbl     322:     HTStreamClass targetClass;    
                    323:     
1.5       timbl     324: /*     Push the data down the stream
1.2       timbl     325: **
                    326: */
                    327:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    328:     
                    329:     /* Push binary from socket down sink
                    330:     */
                    331:     for(;;) {
                    332:        int status = NETREAD(
                    333:                file_number, input_buffer, INPUT_BUFFER_SIZE);
                    334:        if (status <= 0) {
                    335:            if (status == 0) break;
                    336:            if (TRACE) fprintf(stderr,
                    337:                "HTFormat: Read error, read returns %d\n", status);
                    338:            break;
                    339:        }
1.4       timbl     340:        (*targetClass.put_block)(sink, input_buffer, status);
1.2       timbl     341:     } /* next bufferload */
                    342:        
                    343: }
                    344: 
1.1       timbl     345: 
1.7     ! secret    346: 
        !           347: /*     Push data from a file pointer down a stream
        !           348: **     -------------------------------------
        !           349: **
        !           350: **   This routine is responsible for creating and PRESENTING any
        !           351: **   graphic (or other) objects described by the file.
        !           352: **
        !           353: **
        !           354: */
        !           355: PUBLIC void HTFileCopy ARGS2(
        !           356:        FILE *,                 fp,
        !           357:        HTStream*,              sink)
        !           358: {
        !           359:     HTStreamClass targetClass;    
        !           360:     
        !           361: /*     Push the data down the stream
        !           362: **
        !           363: */
        !           364:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
        !           365:     
        !           366:     /* Push binary from socket down sink
        !           367:     */
        !           368:     for(;;) {
        !           369:        int status = fread(
        !           370:               input_buffer, 1, INPUT_BUFFER_SIZE, fp);
        !           371:        if (status == 0) { /* EOF or error */
        !           372:            if (ferror(fp) == 0) break;
        !           373:            if (TRACE) fprintf(stderr,
        !           374:                "HTFormat: Read error, read returns %d\n", ferror(fp));
        !           375:            break;
        !           376:        }
        !           377:        (*targetClass.put_block)(sink, input_buffer, status);
        !           378:     } /* next bufferload */
        !           379:        
        !           380: }
        !           381: 
        !           382: 
        !           383: 
        !           384: 
1.2       timbl     385: /*     Push data from a socket down a stream STRIPPING CR
                    386: **     --------------------------------------------------
                    387: **
                    388: **   This routine is responsible for creating and PRESENTING any
                    389: **   graphic (or other) objects described by the file.
                    390: **
                    391: **   The file number given is assumed to be a TELNET stream ie containing
                    392: **   CRLF at the end of lines which need to be stripped to LF for unix
                    393: **   when the format is textual.
                    394: **
1.1       timbl     395: */
1.2       timbl     396: PUBLIC void HTCopyNoCR ARGS2(
                    397:        int,                    file_number,
                    398:        HTStream*,              sink)
                    399: {
                    400:     HTStreamClass targetClass;    
1.1       timbl     401:     
1.2       timbl     402: /*     Push the data, ignoring CRLF, down the stream
                    403: **
                    404: */
                    405:     targetClass = *(sink->isa);        /* Copy pointers to procedures */
                    406: 
                    407: /*     Push text from telnet socket down sink
                    408: **
                    409: **     @@@@@ To push strings could be faster? (especially is we
                    410: **     cheat and don't ignore CR! :-}
                    411: */  
1.1       timbl     412:     HTInitInput(file_number);
1.2       timbl     413:     for(;;) {
                    414:        char character;
                    415:        character = HTGetChararcter();
                    416:        if (character == (char)EOF) break;
                    417:        (*targetClass.put_character)(sink, character);           
                    418:     }
                    419: }
1.1       timbl     420: 
1.2       timbl     421: 
1.7     ! secret    422: 
1.2       timbl     423: /*     Parse a socket given format and file number
                    424: **
                    425: **   This routine is responsible for creating and PRESENTING any
                    426: **   graphic (or other) objects described by the file.
                    427: **
                    428: **   The file number given is assumed to be a TELNET stream ie containing
                    429: **   CRLF at the end of lines which need to be stripped to LF for unix
                    430: **   when the format is textual.
                    431: **
                    432: */
                    433: PUBLIC int HTParseSocket ARGS5(
                    434:        HTFormat,               format_in,
                    435:        HTFormat,               format_out,
                    436:        HTParentAnchor *,       anchor,
                    437:        int,                    file_number,
                    438:        HTStream*,              sink)
                    439: {
                    440:     HTStream * stream;
                    441:     HTStreamClass targetClass;    
1.1       timbl     442: 
1.2       timbl     443:     stream = HTStreamStack(format_in,
                    444:                        format_out,
                    445:                        sink , anchor);
                    446:     
                    447:     if (!stream) {
                    448:         char buffer[1024];     /* @@@@@@@@ */
                    449:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
                    450:                HTAtom_name(format_in), HTAtom_name(format_out));
1.3       timbl     451:        if (TRACE) fprintf(stderr, "HTFormat: %s\n", buffer);
1.2       timbl     452:         return HTLoadError(sink, 501, buffer);
                    453:     }
1.1       timbl     454:     
1.3       timbl     455: /*     Push the data, ignoring CRLF if necessary, down the stream
                    456: **
1.2       timbl     457: **
1.3       timbl     458: **   @@  Bug:  This decision ought to be made based on "encoding"
                    459: **   rather than on format.  @@@  Whne we handle encoding.
                    460: **   The current method smells anyway.
1.2       timbl     461: */
                    462:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
1.3       timbl     463:     if (format_in == WWW_BINARY || HTOutputSource
                    464:         || strstr(HTAtom_name(format_in), "image/")
                    465:        || strstr(HTAtom_name(format_in), "video/")) { /* @@@@@@ */
1.2       timbl     466:         HTCopy(file_number, stream);
                    467:     } else {   /* ascii text with CRLFs :-( */
                    468:         HTCopyNoCR(file_number, stream);
                    469:     }
1.7     ! secret    470:     (*targetClass.end_document)(stream);
        !           471:     (*targetClass.free)(stream);
        !           472:     
        !           473:     return HT_LOADED;
        !           474: }
        !           475: 
        !           476: 
        !           477: 
        !           478: /*     Parse a file given format and file pointer
        !           479: **
        !           480: **   This routine is responsible for creating and PRESENTING any
        !           481: **   graphic (or other) objects described by the file.
        !           482: **
        !           483: **   The file number given is assumed to be a TELNET stream ie containing
        !           484: **   CRLF at the end of lines which need to be stripped to LF for unix
        !           485: **   when the format is textual.
        !           486: **
        !           487: */
        !           488: PUBLIC int HTParseFile ARGS5(
        !           489:        HTFormat,               format_in,
        !           490:        HTFormat,               format_out,
        !           491:        HTParentAnchor *,       anchor,
        !           492:        FILE *,                 fp,
        !           493:        HTStream*,              sink)
        !           494: {
        !           495:     HTStream * stream;
        !           496:     HTStreamClass targetClass;    
        !           497: 
        !           498:     stream = HTStreamStack(format_in,
        !           499:                        format_out,
        !           500:                        sink , anchor);
        !           501:     
        !           502:     if (!stream) {
        !           503:         char buffer[1024];     /* @@@@@@@@ */
        !           504:        sprintf(buffer, "Sorry, can't convert from %s to %s.",
        !           505:                HTAtom_name(format_in), HTAtom_name(format_out));
        !           506:        if (TRACE) fprintf(stderr, "HTFormat(in HTParseFile): %s\n", buffer);
        !           507:         return HTLoadError(sink, 501, buffer);
        !           508:     }
        !           509:     
        !           510: /*     Push the data, ignoring CRLF if necessary, down the stream
        !           511: **
        !           512: **
        !           513: **   @@  Bug:  This decision ought to be made based on "encoding"
        !           514: **   rather than on format.  @@@  When we handle encoding.
        !           515: **   The current method smells anyway.
        !           516: */
        !           517:     targetClass = *(stream->isa);      /* Copy pointers to procedures */
        !           518:     HTFileCopy(fp, stream);
1.2       timbl     519:     (*targetClass.end_document)(stream);
                    520:     (*targetClass.free)(stream);
1.1       timbl     521:     
1.2       timbl     522:     return HT_LOADED;
1.1       timbl     523: }
1.2       timbl     524: 
                    525: 
                    526: 

Webmaster