Annotation of libwww/Library/src/HTML.html, revision 2.17

2.7       timbl       1: <HTML>
                      2: <HEAD>
                      3: <TITLE>HTML to rich text converter for libwww</TITLE>
2.12      timbl       4: <NEXTID N="z5">
2.7       timbl       5: </HEAD>
2.6       timbl       6: <BODY>
2.8       timbl       7: <H1>The HTML to styled text object converter</H1>This interprets the <A
                      8: NAME="z0" HREF="../../MarkUp/MarkUp.html">HTML</A> semantics
                      9: and some HTMLPlus.<P>
                     10: Part of <A
                     11: NAME="z2" HREF="Overview.html">libwww</A> . Implemented by <A
                     12: NAME="z3" HREF="HTML.c">HTML.c</A>
2.6       timbl      13: <PRE>#ifndef HTML_H
2.1       timbl      14: #define HTML_H
                     15: 
                     16: #include "HTUtils.h"
2.13      luotonen   17: #include "HTFormat.h"
2.1       timbl      18: #include "HTAnchor.h"
2.8       timbl      19: #include "HTMLPDTD.h"
2.1       timbl      20: 
2.8       timbl      21: #define DTD HTMLP_dtd
                     22: 
2.6       timbl      23: #ifdef SHORT_NAMES
                     24: #define HTMLPresentation       HTMLPren
                     25: #define HTMLPresent            HTMLPres
                     26: #endif
2.1       timbl      27: 
                     28: extern CONST HTStructuredClass HTMLPresentation;
2.17    ! frystyk    29: </PRE>
        !            30: 
        !            31: <H2>HTML_new: A structured stream to parse HTML</H2>
2.1       timbl      32: 
2.17    ! frystyk    33: When this routine is called, the request structure may contain a <A
        !            34: NAME="z4" HREF="HTAccess.html#z6">childAnchor</A> value.  In that case
        !            35: it is the responsability of this module to select the anchor.<P>
2.12      timbl      36:  
2.9       timbl      37: <PRE>extern HTStructured* HTML_new PARAMS((HTRequest * request,
2.8       timbl      38:                                        void *   param,
                     39:                                        HTFormat input_format,
                     40:                                        HTFormat output_format,
                     41:                                        HTStream * output_stream));
                     42: 
2.10      luotonen   43: </PRE>
2.1       timbl      44: 
2.17    ! frystyk    45: <H3>Reopen</H3>
        !            46: 
        !            47: Reopening an existing HTML object allows it to be retained (for
        !            48: example by the styled text object) after the structured stream has
        !            49: been closed.  To be actually deleted, the HTML object must be closed
        !            50: once more times than it has been reopened.
        !            51: 
        !            52: <PRE>
        !            53: extern void HTML_reopen PARAMS((HTStructured * me));
2.10      luotonen   54: </PRE>
2.17    ! frystyk    55: 
2.10      luotonen   56: <H2>Converters</H2>
2.8       timbl      57: 
2.17    ! frystyk    58: These are the converters implemented in this module:
2.8       timbl      59: 
2.17    ! frystyk    60: <PRE>
        !            61: #ifndef pyramid
        !            62: extern HTConverter HTMLToPlain, HTMLToC, HTMLPresent, HTMLToTeX;
        !            63: #endif
2.8       timbl      64: </PRE>
2.17    ! frystyk    65: 
2.8       timbl      66: <H2>Selecting internal character set
                     67: representations</H2>
                     68: <PRE>typedef enum _HTMLCharacterSet {
2.1       timbl      69:        HTML_ISO_LATIN1,
                     70:        HTML_NEXT_CHARS,
                     71:        HTML_PC_CP950
                     72: } HTMLCharacterSet;
                     73: 
                     74: extern void HTMLUseCharacterSet PARAMS((HTMLCharacterSet i));
                     75: 
2.6       timbl      76: </PRE>
                     77: <H2>Record error message as a hypertext
                     78: object</H2>The error message should be marked
                     79: as an error so that it can be reloaded
                     80: later. This implementation just throws
                     81: up an error message and leaves the
                     82: document unloaded.
2.10      luotonen   83: <H3>On entry,</H3>
                     84: <DL>
                     85: <DT>sink
2.11      timbl      86: <DD> is a stream to the output device
2.10      luotonen   87: if any
                     88: <DT>number
2.11      timbl      89: <DD> is the HTTP error number
2.10      luotonen   90: <DT>message
2.11      timbl      91: <DD> is the human readable message.
2.10      luotonen   92: </DL>
                     93: 
                     94: <H3>On exit,</H3>a return code like HT_LOADED if object
                     95: exists else &lt; 0
                     96: <PRE>PUBLIC int HTLoadError PARAMS((
2.14      luotonen   97:        HTRequest *     req,
2.1       timbl      98:        int             number,
                     99:        CONST char *    message));
                    100: 
2.6       timbl     101: 
2.16      timbl     102: </PRE>
                    103: <h2>White Space Treatment</h2>
                    104: There is a small number of different ways of treating white
                    105: space in SGML, in mapping from a text object to HTML.
                    106: These have to be programmed it seems.
                    107: <pre>
                    108: /*
                    109: In text object \n\n            \n      tab     \n\n\t
                    110: -------------- -------------   -----   -----   -------
                    111: in Address,
                    112: Blockquote,
                    113: Normal,                <P>             <BR>    -               NORMAL
                    114: H1-6:          close+open      <BR>    -               HEADING
                    115: Glossary       <DT>            <DT>    <DD>    <P>     GLOSSARY
                    116: List,                          
                    117: Menu           <LI>            <LI>    -       <P>     LIST
                    118: Dir            <LI>            <LI>    <LI>            DIR
                    119: Pre etc                \n\n            \n      \t              PRE
2.7       timbl     120: 
2.16      timbl     121: */
                    122: 
                    123: typedef enum _white_space_treatment {
                    124:        WS_NORMAL,
                    125:        WS_HEADING,
                    126:        WS_GLOSSARY,
                    127:        WS_LIST,
                    128:        WS_DIR,
                    129:        WS_PRE
                    130: } white_space_treatment;
                    131: 
                    132: </pre>
                    133: <h2>Nesting State</h2>
                    134: These elements form tree with an item for each nesting state: that
                    135: is, each unique combination of nested elements which has a
                    136: specific style.
                    137: <pre>
                    138: typedef struct _HTNesting {
                    139:     void *                     style;  /* HTStyle *: Platform dependent */
                    140:     white_space_treatment      wst;
                    141:     struct _HTNesting *                parent;
                    142:     int                                element_number;
                    143:     int                                item_number;    /* only for ordered lists */
                    144:     int                                list_level;     /* how deep nested */
                    145:     HTList *                   children;
                    146:     BOOL                       paragraph_break;
                    147:     int                                magic;
                    148:     BOOL                       object_gens_HTML; /* we don't generate HTML */
                    149: } HTNesting;
                    150: 
                    151: 
                    152: </pre>
                    153: <H2>Nesting functions</H2>
                    154: These functions were new with HTML2.c.  They allow the tree
                    155: of SGML nesting states to be manipulated, and SGML regenerated from the
                    156: style sequence.
                    157: <PRE>
                    158: 
                    159: extern void HTRegenInit NOPARAMS;
                    160: 
                    161: extern void HTRegenCharacter PARAMS((
                    162:        char                    c,
                    163:        HTNesting *             nesting,
                    164:        HTStructured *          target));
                    165: 
                    166: extern void HTNestingChange PARAMS((
                    167:        HTStructured*   s, 
                    168:        HTNesting*              old, 
                    169:        HTNesting *             new,
                    170:        HTChildAnchor *         info,
                    171:        CONST char *            aName));
                    172: 
                    173: extern HTNesting * HTMLCommonality PARAMS((
                    174:        HTNesting *     s1,
                    175:        HTNesting *     s2));
                    176: 
                    177: extern HTNesting * HTNestElement PARAMS((HTNesting * p, int ele));
                    178: extern /* HTStyle * */ void * HTStyleForNesting PARAMS((HTNesting * n));
                    179: 
                    180: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
                    181: extern HTNesting* CopyBranch PARAMS((HTNesting * old, HTNesting * new, int depth));
                    182: extern HTNesting * HTInsertLevel PARAMS((HTNesting * old,
                    183:                int     element_number,
                    184:                int     level));
                    185: extern HTNesting * HTDeleteLevel PARAMS((HTNesting * old,
                    186:                int     level));
                    187: extern int HTMLElementNumber PARAMS((HTNesting * s));
                    188: extern int HTMLLevel PARAMS(( HTNesting * s));
                    189: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
                    190: 
                    191: #endif         /* end HTML_H */
                    192: 
                    193: </PRE>
                    194: 
                    195: end</BODY>
2.7       timbl     196: </HTML>

Webmaster