Annotation of libwww/Library/src/HTFormat.html, revision 2.17

2.10      timbl       1: <HTML>
                      2: <HEAD>
2.1       timbl       3: <TITLE>HTFormat: The format manager in the WWW Library</TITLE>
2.15      timbl       4: <NEXTID N="z18">
2.10      timbl       5: </HEAD>
2.1       timbl       6: <BODY>
                      7: <H1>Manage different document formats</H1>Here we describe the functions of
                      8: the HTFormat module which handles
                      9: conversion between different data
                     10: representations.  (In MIME parlance,
                     11: a representation is known as a content-type.
2.2       timbl      12: In WWW  the term "format" is often
2.1       timbl      13: used as it is shorter).<P>
                     14: This module is implemented by <A
2.10      timbl      15: NAME="z0" HREF="HTFormat.c">HTFormat.c</A>
2.7       timbl      16: . This hypertext document is used
                     17: to generate the <A
2.11      timbl      18: NAME="z8" HREF="HTFormat.h">HTFormat.h</A> include
2.9       timbl      19: file.  Part of the <A
2.10      timbl      20: NAME="z10" HREF="Overview.html">WWW library</A> .
2.1       timbl      21: <H2>Preamble</H2>
                     22: <PRE>#ifndef HTFORMAT_H
                     23: #define HTFORMAT_H
                     24: 
                     25: #include "HTUtils.h"
                     26: #include <A
2.10      timbl      27: NAME="z7" HREF="HTStream.html">"HTStream.h"</A>
2.1       timbl      28: #include "HTAtom.h"
2.2       timbl      29: #include "HTList.h"
2.1       timbl      30: 
                     31: #ifdef SHORT_NAMES
                     32: #define HTOutputSource HTOuSour
                     33: #define HTOutputBinary HTOuBina
                     34: #endif
                     35: 
                     36: </PRE>
2.17    ! luotonen   37: <H2>HT<A
        !            38: NAME="z15"> Input Socket: Buffering for network
        !            39: in</A></H2>This routines provide simple character
        !            40: input from sockets. These are used
        !            41: for parsing input in arbitrary IP
        !            42: protocols (Gopher, NNTP, FTP).
        !            43: <PRE>#define INPUT_BUFFER_SIZE 4096            /* Tradeoff spped vs memory*/
        !            44: typedef struct _socket_buffer {
        !            45:         char input_buffer[INPUT_BUFFER_SIZE];
        !            46:        char * input_pointer;
        !            47:        char * input_limit;
        !            48:        int input_file_number;
        !            49: } HTInputSocket;
        !            50: 
        !            51: </PRE>
        !            52: <H3>Create input buffer and set file
        !            53: number</H3>
        !            54: <PRE>extern HTInputSocket* HTInputSocket_new PARAMS((int file_number));
        !            55: 
        !            56: </PRE>
        !            57: <H3>Get next character from buffer</H3>
        !            58: <PRE>extern char HTInputSocket_getCharacter PARAMS((HTInputSocket* isoc));
        !            59: 
        !            60: </PRE>
        !            61: <H3>Read block from input socket</H3>
        !            62: Read <CODE>*len</CODE> characters and return a buffer (don't free)
        !            63: containing <CODE>*len</CODE> characters (<CODE>*len</CODE> may have
        !            64: changed).  Buffer is not NULL-terminated.
        !            65: <PRE>extern char * HTInputSocket_getBlock PARAMS((HTInputSocket * isoc,
        !            66:                                                  int *           len));
        !            67: 
        !            68: </PRE>
        !            69: 
        !            70: <H3>Free input socket buffer</H3>
        !            71: <PRE>extern void HTInputSocket_free PARAMS((HTInputSocket * isoc));
        !            72: 
        !            73: </PRE>
        !            74: <PRE>
        !            75: PUBLIC char * HTInputSocket_getLine PARAMS((HTInputSocket * isoc));
        !            76: PUBLIC char * HTInputSocket_getUnfoldedLine PARAMS((HTInputSocket * isoc));
        !            77: PUBLIC char * HTInputSocket_getStatusLine PARAMS((HTInputSocket * isoc));
        !            78: PUBLIC BOOL   HTInputSocket_seemsBinary PARAMS((HTInputSocket * isoc));
        !            79: 
        !            80: </PRE>
2.1       timbl      81: <H2>The HTFormat type</H2>We use the HTAtom object for holding
                     82: representations. This allows faster
                     83: manipulation (comparison and copying)
2.14      timbl      84: that if we stayed with strings.<P>
                     85: The following have to be defined
                     86: in advance of the other include files
                     87: because of circular references.
2.1       timbl      88: <PRE>typedef HTAtom * HTFormat;
2.13      timbl      89: 
2.14      timbl      90: #include <A
                     91: NAME="z14" HREF="HTAccess.html">"HTAccess.h"</A>   /* Required for HTRequest definition */
                     92:                
2.1       timbl      93: </PRE>These macros (which used to be constants)
                     94: define some basic internally referenced
2.13      timbl      95: representations. 
                     96: <H3>Internal ones</H3>The www/xxx ones are of course not
                     97: MIME standard.<P>
2.1       timbl      98: www/source  is an output format which
                     99: leaves the input untouched. It is
                    100: useful for diagnostics, and for users
                    101: who want to see the original, whatever
                    102: it is.
2.13      timbl     103: <H3></H3>
                    104: <PRE>#define WWW_SOURCE HTAtom_for("www/source")       /* Whatever it was originally*/
2.1       timbl     105: 
                    106: </PRE>www/present represents the user's
                    107: perception of the document.  If you
                    108: convert to www/present, you present
                    109: the material to the user. 
                    110: <PRE>#define WWW_PRESENT HTAtom_for("www/present")     /* The user's perception */
                    111: 
                    112: </PRE>The message/rfc822 format means a
                    113: MIME message or a plain text message
                    114: with no MIME header. This is what
                    115: is returned by an HTTP server.
                    116: <PRE>#define WWW_MIME HTAtom_for("www/mime")           /* A MIME message */
2.10      timbl     117: 
2.1       timbl     118: </PRE>www/print is like www/present except
                    119: it represents a printed copy.
                    120: <PRE>#define WWW_PRINT HTAtom_for("www/print") /* A printed copy */
                    121: 
2.10      timbl     122: </PRE>www/unknown is a really unknown type.
2.11      timbl     123: Some default action is appropriate.
2.10      timbl     124: <PRE>#define WWW_UNKNOWN     HTAtom_for("www/unknown")
                    125: 
2.13      timbl     126: 
                    127: 
                    128: </PRE>
                    129: <H3>MIME ones (a few)</H3>These are regular MIME types.  HTML
2.11      timbl     130: is assumed to be added by the W3
                    131: code. application/octet-stream was
                    132: mistakenly application/binary in
                    133: earlier libwww versions (pre 2.11).
2.10      timbl     134: <PRE>#define WWW_PLAINTEXT     HTAtom_for("text/plain")
2.1       timbl     135: #define WWW_POSTSCRIPT         HTAtom_for("application/postscript")
                    136: #define WWW_RICHTEXT   HTAtom_for("application/rtf")
2.10      timbl     137: #define WWW_AUDIO       HTAtom_for("audio/basic")
2.1       timbl     138: #define WWW_HTML       HTAtom_for("text/html")
2.11      timbl     139: #define WWW_BINARY     HTAtom_for("application/octet-stream")
2.7       timbl     140: 
2.1       timbl     141: </PRE>We must include the following file
                    142: after defining HTFormat, to which
2.10      timbl     143: it makes reference.
                    144: <H2>The HTEncoding type</H2>
                    145: <PRE>typedef HTAtom* HTEncoding;
                    146: 
                    147: </PRE>The following are values for the
                    148: MIME types:
                    149: <PRE>#define WWW_ENC_7BIT              HTAtom_for("7bit")
                    150: #define WWW_ENC_8BIT           HTAtom_for("8bit")
                    151: #define WWW_ENC_BINARY         HTAtom_for("binary")
                    152: 
                    153: </PRE>We also add
                    154: <PRE>#define WWW_ENC_COMPRESS  HTAtom_for("compress")
                    155: 
                    156: #include "HTAnchor.h"
2.1       timbl     157: 
                    158: </PRE>
                    159: <H2>The HTPresentation and HTConverter
                    160: types</H2>This HTPresentation structure represents
                    161: a possible conversion algorithm from
                    162: one format to annother.  It includes
                    163: a pointer to a conversion routine.
                    164: The conversion routine returns a
                    165: stream to which data should be fed.
                    166: See also <A
2.12      timbl     167: NAME="z5" HREF="#z3">HTStreamStack</A> which scans
2.1       timbl     168: the list of registered converters
                    169: and calls one. See the <A
2.10      timbl     170: NAME="z6" HREF="HTInit.html">initialisation
2.1       timbl     171: module</A> for a list of conversion routines.
                    172: <PRE>typedef struct _HTPresentation HTPresentation;
                    173: 
2.13      timbl     174: typedef HTStream * <A
                    175: NAME="z12">HTConverter</A> PARAMS((
                    176:        HTRequest *             request,
                    177:        void *                  param,
                    178:        HTFormat                input_format,
                    179:        HTFormat                output_format,
                    180:        HTStream *              output_stream));
2.1       timbl     181:        
                    182: struct _HTPresentation {
2.13      timbl     183:        HTAtom* rep;            /* representation name atomized */
2.1       timbl     184:        HTAtom* rep_out;        /* resulting representation */
2.2       timbl     185:        HTConverter *converter; /* The routine to gen the stream stack */
2.1       timbl     186:        char *  command;        /* MIME-format string */
                    187:        float   quality;        /* Between 0 (bad) and 1 (good) */
                    188:        float   secs;
                    189:        float   secs_per_byte;
                    190: };
                    191: 
2.15      timbl     192: </PRE>A global list of converters is kept
2.1       timbl     193: by this module.  It is also scanned
                    194: by modules which want to know the
                    195: set of formats supported. for example.
2.15      timbl     196:  Note there is also an additional
                    197: list associated with each <A
                    198: NAME="z16" HREF="HTAccess.html#z5">request</A>.
                    199: <PRE>extern HTList * <A
                    200: NAME="z17">HTConversions</A> ;
2.1       timbl     201: 
2.12      timbl     202: 
2.1       timbl     203: </PRE>
                    204: <H2>HTSetPresentation: Register a system
                    205: command to present a format</H2>
2.8       timbl     206: <H3>On entry,</H3>
2.1       timbl     207: <DL>
                    208: <DT>rep
                    209: <DD> is the MIME - style format name
                    210: <DT>command
                    211: <DD> is the MAILCAP - style command
                    212: template
                    213: <DT>quality
                    214: <DD> A degradation faction 0..1
                    215: <DT>maxbytes
                    216: <DD> A limit on the length acceptable
                    217: as input (0 infinite)
                    218: <DT>maxsecs
                    219: <DD> A limit on the time user
                    220: will wait (0 for infinity)
                    221: </DL>
                    222: 
                    223: <PRE>extern void HTSetPresentation PARAMS((
2.13      timbl     224:        HTList *        conversions,
                    225:        CONST char *    representation,
                    226:        CONST char *    command,
                    227:        float           quality,
                    228:        float           secs, 
                    229:        float           secs_per_byte
2.1       timbl     230: ));
                    231: 
                    232: 
                    233: </PRE>
                    234: <H2>HTSetConversion:   Register a converstion
                    235: routine</H2>
2.8       timbl     236: <H3>On entry,</H3>
2.1       timbl     237: <DL>
                    238: <DT>rep_in
                    239: <DD> is the content-type input
                    240: <DT>rep_out
                    241: <DD> is the resulting content-type
                    242: <DT>converter
                    243: <DD> is the routine to make
                    244: the stream to do it
                    245: </DL>
                    246: 
                    247: <PRE>
                    248: extern void HTSetConversion PARAMS((
2.13      timbl     249:        HTList *        conversions,
2.1       timbl     250:        CONST char *    rep_in,
                    251:        CONST char *    rep_out,
2.2       timbl     252:        HTConverter *   converter,
2.1       timbl     253:        float           quality,
                    254:        float           secs, 
                    255:        float           secs_per_byte
                    256: ));
                    257: 
                    258: 
                    259: </PRE>
                    260: <H2><A
2.10      timbl     261: NAME="z3">HTStreamStack:   Create a stack of
2.1       timbl     262: streams</A></H2>This is the routine which actually
                    263: sets up the conversion. It currently
                    264: checks only for direct conversions,
2.8       timbl     265: but multi-stage conversions are forseen.
2.2       timbl     266: It takes a stream into which the
2.1       timbl     267: output should be sent in the final
                    268: format, builds the conversion stack,
                    269: and returns a stream into which the
                    270: data in the input format should be
                    271: fed.  The anchor is passed because
                    272: hypertxet objects load information
                    273: into the anchor object which represents
                    274: them.
                    275: <PRE>extern HTStream * HTStreamStack PARAMS((
                    276:        HTFormat                format_in,
2.13      timbl     277:        HTRequest *             request));
2.1       timbl     278: 
                    279: </PRE>
                    280: <H2>HTStackValue: Find the cost of a
                    281: filter stack</H2>Must return the cost of the same
                    282: stack which HTStreamStack would set
                    283: up.
2.8       timbl     284: <H3>On entry,</H3>
2.1       timbl     285: <DL>
                    286: <DT>format_in
                    287: <DD> The fomat of the data to
                    288: be converted
                    289: <DT>format_out
                    290: <DD> The format required
                    291: <DT>initial_value
                    292: <DD> The intrinsic "value"
                    293: of the data before conversion on
                    294: a scale from 0 to 1
                    295: <DT>length
                    296: <DD> The number of bytes expected
                    297: in the input format
                    298: </DL>
                    299: 
                    300: <PRE>extern float HTStackValue PARAMS((
2.13      timbl     301:        HTList *                conversions,
2.1       timbl     302:        HTFormat                format_in,
2.13      timbl     303:        HTFormat                format_out,
2.1       timbl     304:        float                   initial_value,
                    305:        long int                length));
                    306: 
                    307: #define NO_VALUE_FOUND -1e20           /* returned if none found */
                    308: 
                    309: </PRE>
                    310: <H2><A
2.10      timbl     311: NAME="z1">HTCopy:  Copy a socket to a stream</A></H2>This is used by the protocol engines
2.6       secret    312: to send data down a stream, typically
2.1       timbl     313: one which has been generated by HTStreamStack.
                    314: <PRE>extern void HTCopy PARAMS((
                    315:        int                     file_number,
                    316:        HTStream*               sink));
                    317: 
                    318:        
2.6       secret    319: </PRE>
                    320: <H2><A
2.10      timbl     321: NAME="c6">HTFileCopy:  Copy a file to a stream</A></H2>This is used by the protocol engines
2.6       secret    322: to send data down a stream, typically
2.7       timbl     323: one which has been generated by HTStreamStack.
                    324: It is currently called by <A
2.12      timbl     325: NAME="z9" HREF="#c7">HTParseFile</A>
2.6       secret    326: <PRE>extern void HTFileCopy PARAMS((
                    327:        FILE*                   fp,
                    328:        HTStream*               sink));
                    329: 
                    330:        
2.7       timbl     331: </PRE>
                    332: <H2><A
2.10      timbl     333: NAME="c2">HTCopyNoCR: Copy a socket to a stream,
2.7       timbl     334: stripping CR characters.</A></H2>It is slower than <A
2.12      timbl     335: NAME="z2" HREF="#z1">HTCopy</A> .
2.1       timbl     336: <PRE>
                    337: extern void HTCopyNoCR PARAMS((
                    338:        int                     file_number,
                    339:        HTStream*               sink));
                    340: 
2.16      luotonen  341: 
                    342: </PRE>
2.1       timbl     343: <H2>HTParseSocket: Parse a socket given
                    344: its format</H2>This routine is called by protocol
                    345: modules to load an object.  uses<A
2.12      timbl     346: NAME="z4" HREF="#z3">
2.1       timbl     347: HTStreamStack</A> and the copy routines
                    348: above.  Returns HT_LOADED if succesful,
                    349: &lt;0 if not.
                    350: <PRE>extern int HTParseSocket PARAMS((
                    351:        HTFormat        format_in,
                    352:        int             file_number,
2.13      timbl     353:        HTRequest *     request));
2.6       secret    354: 
                    355: </PRE>
                    356: <H2><A
2.10      timbl     357: NAME="c1">HTParseFile: Parse a File through
2.7       timbl     358: a file pointer</A></H2>This routine is called by protocols
                    359: modules to load an object. uses<A
2.12      timbl     360: NAME="z4" HREF="#z3"> HTStreamStack</A>
2.7       timbl     361: and <A
2.12      timbl     362: NAME="c7" HREF="#c6">HTFileCopy</A> .  Returns HT_LOADED
2.7       timbl     363: if succesful, &lt;0 if not.
2.6       secret    364: <PRE>extern int HTParseFile PARAMS((
                    365:        HTFormat        format_in,
                    366:        FILE            *fp,
2.13      timbl     367:        HTRequest *     request));
2.8       timbl     368: 
                    369: </PRE>
2.11      timbl     370: <H2><A
                    371: NAME="z11">HTNetToText: Convert Net ASCII to
                    372: local representation</A></H2>This is a filter stream suitable
                    373: for taking text from a socket and
                    374: passing it into a stream which expects
                    375: text in the local C representation.
                    376: It does ASCII and newline conversion.
                    377: As usual, pass its output stream
                    378: to it when creating it.
                    379: <PRE>extern HTStream *  HTNetToText PARAMS ((HTStream * sink));
                    380: 
                    381: </PRE>
2.8       timbl     382: <H2>HTFormatInit: Set up default presentations
                    383: and conversions</H2>These are defined in HTInit.c or
                    384: HTSInit.c if these have been replaced.
                    385: If you don't call this routine, and
                    386: you don't define any presentations,
                    387: then this routine will automatically
                    388: be called the first time a conversion
                    389: is needed. However, if you explicitly
                    390: add some conversions (eg using HTLoadRules)
                    391: then you may want also to explicitly
                    392: call this to get the defaults as
                    393: well.
2.13      timbl     394: <PRE>extern void HTFormatInit PARAMS((HTList * conversions));
2.1       timbl     395: 
                    396: </PRE>
                    397: <H2>Epilogue</H2>
                    398: <PRE>extern BOOL HTOutputSource;       /* Flag: shortcut parser */
                    399: #endif
                    400: 
2.15      timbl     401: </PRE>end</A></BODY>
2.10      timbl     402: </HTML>

Webmaster