Annotation of libwww/Library/src/HTML.html, revision 2.20

2.7       timbl       1: <HTML>
                      2: <HEAD>
                      3: <TITLE>HTML to rich text converter for libwww</TITLE>
                      4: </HEAD>
2.6       timbl       5: <BODY>
2.20    ! frystyk     6: 
        !             7: <H1>The HTML to styled text object converter</H1>
        !             8: 
        !             9: <PRE>
        !            10: /*
        !            11: **     (c) COPYRIGHT CERN 1994.
        !            12: **     Please first read the full copyright statement in the file COPYRIGH.
        !            13: */
        !            14: </PRE>
        !            15: 
        !            16: This interprets the <A
        !            17: HREF="http://info.cern.ch/hypertext/WWW/MarkUp/MarkUp.html">HTML</A>
        !            18: semantics and some HTMLPlus.<P>
        !            19: 
        !            20: This module is implemented by <A HREF="HTML.c">HTML.c</A>, and it is
        !            21: a part of the <A
        !            22: HREF="http://info.cern.ch/hypertext/WWW/Library/User/Guide/Guide.html">
        !            23: Library of Common Code</A>.
        !            24: 
        !            25: <PRE>
        !            26: #ifndef HTML_H
2.1       timbl      27: #define HTML_H
                     28: 
                     29: #include "HTUtils.h"
2.13      luotonen   30: #include "HTFormat.h"
2.1       timbl      31: #include "HTAnchor.h"
2.8       timbl      32: #include "HTMLPDTD.h"
2.1       timbl      33: 
2.8       timbl      34: #define DTD HTMLP_dtd
                     35: 
2.6       timbl      36: #ifdef SHORT_NAMES
                     37: #define HTMLPresentation       HTMLPren
                     38: #define HTMLPresent            HTMLPres
                     39: #endif
2.1       timbl      40: 
                     41: extern CONST HTStructuredClass HTMLPresentation;
2.17      frystyk    42: </PRE>
                     43: 
                     44: <H2>HTML_new: A structured stream to parse HTML</H2>
2.1       timbl      45: 
2.17      frystyk    46: When this routine is called, the request structure may contain a <A
                     47: NAME="z4" HREF="HTAccess.html#z6">childAnchor</A> value.  In that case
                     48: it is the responsability of this module to select the anchor.<P>
2.12      timbl      49:  
2.9       timbl      50: <PRE>extern HTStructured* HTML_new PARAMS((HTRequest * request,
2.8       timbl      51:                                        void *   param,
                     52:                                        HTFormat input_format,
                     53:                                        HTFormat output_format,
                     54:                                        HTStream * output_stream));
                     55: 
2.10      luotonen   56: </PRE>
2.1       timbl      57: 
2.17      frystyk    58: <H3>Reopen</H3>
                     59: 
                     60: Reopening an existing HTML object allows it to be retained (for
                     61: example by the styled text object) after the structured stream has
                     62: been closed.  To be actually deleted, the HTML object must be closed
                     63: once more times than it has been reopened.
                     64: 
                     65: <PRE>
                     66: extern void HTML_reopen PARAMS((HTStructured * me));
2.10      luotonen   67: </PRE>
2.17      frystyk    68: 
2.10      luotonen   69: <H2>Converters</H2>
2.8       timbl      70: 
2.17      frystyk    71: These are the converters implemented in this module:
2.8       timbl      72: 
2.17      frystyk    73: <PRE>
                     74: #ifndef pyramid
                     75: extern HTConverter HTMLToPlain, HTMLToC, HTMLPresent, HTMLToTeX;
                     76: #endif
2.8       timbl      77: </PRE>
2.17      frystyk    78: 
2.8       timbl      79: <H2>Selecting internal character set
                     80: representations</H2>
                     81: <PRE>typedef enum _HTMLCharacterSet {
2.1       timbl      82:        HTML_ISO_LATIN1,
                     83:        HTML_NEXT_CHARS,
                     84:        HTML_PC_CP950
                     85: } HTMLCharacterSet;
                     86: 
                     87: extern void HTMLUseCharacterSet PARAMS((HTMLCharacterSet i));
                     88: 
2.6       timbl      89: </PRE>
                     90: <H2>Record error message as a hypertext
                     91: object</H2>The error message should be marked
                     92: as an error so that it can be reloaded
                     93: later. This implementation just throws
                     94: up an error message and leaves the
                     95: document unloaded.
2.10      luotonen   96: <H3>On entry,</H3>
                     97: <DL>
                     98: <DT>sink
2.11      timbl      99: <DD> is a stream to the output device
2.10      luotonen  100: if any
                    101: <DT>number
2.11      timbl     102: <DD> is the HTTP error number
2.10      luotonen  103: <DT>message
2.11      timbl     104: <DD> is the human readable message.
2.10      luotonen  105: </DL>
                    106: 
                    107: <H3>On exit,</H3>a return code like HT_LOADED if object
                    108: exists else &lt; 0
2.19      frystyk   109: <PRE>extern int HTLoadError PARAMS((
2.14      luotonen  110:        HTRequest *     req,
2.1       timbl     111:        int             number,
                    112:        CONST char *    message));
                    113: 
2.6       timbl     114: 
2.16      timbl     115: </PRE>
                    116: <h2>White Space Treatment</h2>
                    117: There is a small number of different ways of treating white
                    118: space in SGML, in mapping from a text object to HTML.
                    119: These have to be programmed it seems.
                    120: <pre>
                    121: /*
                    122: In text object \n\n            \n      tab     \n\n\t
                    123: -------------- -------------   -----   -----   -------
                    124: in Address,
                    125: Blockquote,
                    126: Normal,                <P>             <BR>    -               NORMAL
                    127: H1-6:          close+open      <BR>    -               HEADING
                    128: Glossary       <DT>            <DT>    <DD>    <P>     GLOSSARY
                    129: List,                          
                    130: Menu           <LI>            <LI>    -       <P>     LIST
                    131: Dir            <LI>            <LI>    <LI>            DIR
                    132: Pre etc                \n\n            \n      \t              PRE
2.7       timbl     133: 
2.16      timbl     134: */
                    135: 
                    136: typedef enum _white_space_treatment {
                    137:        WS_NORMAL,
                    138:        WS_HEADING,
                    139:        WS_GLOSSARY,
                    140:        WS_LIST,
                    141:        WS_DIR,
                    142:        WS_PRE
                    143: } white_space_treatment;
                    144: 
                    145: </pre>
                    146: <h2>Nesting State</h2>
                    147: These elements form tree with an item for each nesting state: that
                    148: is, each unique combination of nested elements which has a
                    149: specific style.
                    150: <pre>
                    151: typedef struct _HTNesting {
                    152:     void *                     style;  /* HTStyle *: Platform dependent */
                    153:     white_space_treatment      wst;
                    154:     struct _HTNesting *                parent;
                    155:     int                                element_number;
                    156:     int                                item_number;    /* only for ordered lists */
                    157:     int                                list_level;     /* how deep nested */
                    158:     HTList *                   children;
                    159:     BOOL                       paragraph_break;
                    160:     int                                magic;
                    161:     BOOL                       object_gens_HTML; /* we don't generate HTML */
                    162: } HTNesting;
                    163: 
                    164: 
                    165: </pre>
                    166: <H2>Nesting functions</H2>
                    167: These functions were new with HTML2.c.  They allow the tree
                    168: of SGML nesting states to be manipulated, and SGML regenerated from the
                    169: style sequence.
                    170: <PRE>
                    171: 
                    172: extern void HTRegenInit NOPARAMS;
                    173: 
                    174: extern void HTRegenCharacter PARAMS((
                    175:        char                    c,
                    176:        HTNesting *             nesting,
                    177:        HTStructured *          target));
                    178: 
                    179: extern void HTNestingChange PARAMS((
                    180:        HTStructured*   s, 
                    181:        HTNesting*              old, 
2.18      frystyk   182:        HTNesting *             newnest,
2.16      timbl     183:        HTChildAnchor *         info,
                    184:        CONST char *            aName));
                    185: 
                    186: extern HTNesting * HTMLCommonality PARAMS((
                    187:        HTNesting *     s1,
                    188:        HTNesting *     s2));
                    189: 
                    190: extern HTNesting * HTNestElement PARAMS((HTNesting * p, int ele));
                    191: extern /* HTStyle * */ void * HTStyleForNesting PARAMS((HTNesting * n));
                    192: 
                    193: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
2.18      frystyk   194: 
                    195: extern HTNesting* CopyBranch PARAMS((HTNesting * old, HTNesting * newnest,
                    196:                                     int depth));
                    197: 
2.16      timbl     198: extern HTNesting * HTInsertLevel PARAMS((HTNesting * old,
                    199:                int     element_number,
                    200:                int     level));
                    201: extern HTNesting * HTDeleteLevel PARAMS((HTNesting * old,
                    202:                int     level));
                    203: extern int HTMLElementNumber PARAMS((HTNesting * s));
                    204: extern int HTMLLevel PARAMS(( HTNesting * s));
                    205: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
                    206: 
                    207: #endif         /* end HTML_H */
                    208: 
                    209: </PRE>
                    210: 
                    211: end</BODY>
2.7       timbl     212: </HTML>

Webmaster