Annotation of libwww/Library/src/HTML.html, revision 2.16

2.7       timbl       1: <HTML>
                      2: <HEAD>
                      3: <TITLE>HTML to rich text converter for libwww</TITLE>
2.12      timbl       4: <NEXTID N="z5">
2.7       timbl       5: </HEAD>
2.6       timbl       6: <BODY>
2.8       timbl       7: <H1>The HTML to styled text object converter</H1>This interprets the <A
                      8: NAME="z0" HREF="../../MarkUp/MarkUp.html">HTML</A> semantics
                      9: and some HTMLPlus.<P>
                     10: Part of <A
                     11: NAME="z2" HREF="Overview.html">libwww</A> . Implemented by <A
                     12: NAME="z3" HREF="HTML.c">HTML.c</A>
2.6       timbl      13: <PRE>#ifndef HTML_H
2.1       timbl      14: #define HTML_H
                     15: 
                     16: #include "HTUtils.h"
2.13      luotonen   17: #include "HTFormat.h"
2.1       timbl      18: #include "HTAnchor.h"
2.8       timbl      19: #include "HTMLPDTD.h"
2.1       timbl      20: 
2.8       timbl      21: #define DTD HTMLP_dtd
                     22: 
2.6       timbl      23: #ifdef SHORT_NAMES
                     24: #define HTMLPresentation       HTMLPren
                     25: #define HTMLPresent            HTMLPres
                     26: #endif
2.1       timbl      27: 
                     28: extern CONST HTStructuredClass HTMLPresentation;
                     29: 
2.6       timbl      30: </PRE>
2.9       timbl      31: <H2>HTML_new: A structured stream to
2.12      timbl      32: parse HTML</H2>When this routine is called, the
                     33: request structure may contain a <A
                     34: NAME="z4" HREF="HTAccess.html#z6">childAnchor</A>
                     35: value.  I that case it is the responsability
                     36: of this module to select  the anchor.<P>
                     37:  
2.9       timbl      38: <PRE>extern HTStructured* HTML_new PARAMS((HTRequest * request,
2.8       timbl      39:                                        void *   param,
                     40:                                        HTFormat input_format,
                     41:                                        HTFormat output_format,
                     42:                                        HTStream * output_stream));
                     43: 
2.10      luotonen   44: </PRE>
                     45: <H3>Reopen</H3>Reopening an existing HTML object
                     46: allows it to be retained (for example
                     47: by the styled text object) after
                     48: the structured stream has been closed.
2.11      timbl      49: To be actually deleted, the HTML
2.10      luotonen   50: object must  be closed once more
                     51: times than it has been reopened.
2.11      timbl      52: <PRE>extern void HTML_reopen PARAMS((HTStructured * me));
2.1       timbl      53: 
2.10      luotonen   54: </PRE>
                     55: <H2>Converters</H2>
2.15      frystyk    56: <PRE>extern HTConverter HTMLToPlain, HTMLToC, HTMLPresent, HTMLToTeX;
2.8       timbl      57: 
                     58: 
                     59: </PRE>
                     60: <H2>Selecting internal character set
                     61: representations</H2>
                     62: <PRE>typedef enum _HTMLCharacterSet {
2.1       timbl      63:        HTML_ISO_LATIN1,
                     64:        HTML_NEXT_CHARS,
                     65:        HTML_PC_CP950
                     66: } HTMLCharacterSet;
                     67: 
                     68: extern void HTMLUseCharacterSet PARAMS((HTMLCharacterSet i));
                     69: 
2.6       timbl      70: </PRE>
                     71: <H2>Record error message as a hypertext
                     72: object</H2>The error message should be marked
                     73: as an error so that it can be reloaded
                     74: later. This implementation just throws
                     75: up an error message and leaves the
                     76: document unloaded.
2.10      luotonen   77: <H3>On entry,</H3>
                     78: <DL>
                     79: <DT>sink
2.11      timbl      80: <DD> is a stream to the output device
2.10      luotonen   81: if any
                     82: <DT>number
2.11      timbl      83: <DD> is the HTTP error number
2.10      luotonen   84: <DT>message
2.11      timbl      85: <DD> is the human readable message.
2.10      luotonen   86: </DL>
                     87: 
                     88: <H3>On exit,</H3>a return code like HT_LOADED if object
                     89: exists else &lt; 0
                     90: <PRE>PUBLIC int HTLoadError PARAMS((
2.14      luotonen   91:        HTRequest *     req,
2.1       timbl      92:        int             number,
                     93:        CONST char *    message));
                     94: 
2.6       timbl      95: 
2.16    ! timbl      96: </PRE>
        !            97: <h2>White Space Treatment</h2>
        !            98: There is a small number of different ways of treating white
        !            99: space in SGML, in mapping from a text object to HTML.
        !           100: These have to be programmed it seems.
        !           101: <pre>
        !           102: /*
        !           103: In text object \n\n            \n      tab     \n\n\t
        !           104: -------------- -------------   -----   -----   -------
        !           105: in Address,
        !           106: Blockquote,
        !           107: Normal,                <P>             <BR>    -               NORMAL
        !           108: H1-6:          close+open      <BR>    -               HEADING
        !           109: Glossary       <DT>            <DT>    <DD>    <P>     GLOSSARY
        !           110: List,                          
        !           111: Menu           <LI>            <LI>    -       <P>     LIST
        !           112: Dir            <LI>            <LI>    <LI>            DIR
        !           113: Pre etc                \n\n            \n      \t              PRE
2.7       timbl     114: 
2.16    ! timbl     115: */
        !           116: 
        !           117: typedef enum _white_space_treatment {
        !           118:        WS_NORMAL,
        !           119:        WS_HEADING,
        !           120:        WS_GLOSSARY,
        !           121:        WS_LIST,
        !           122:        WS_DIR,
        !           123:        WS_PRE
        !           124: } white_space_treatment;
        !           125: 
        !           126: </pre>
        !           127: <h2>Nesting State</h2>
        !           128: These elements form tree with an item for each nesting state: that
        !           129: is, each unique combination of nested elements which has a
        !           130: specific style.
        !           131: <pre>
        !           132: typedef struct _HTNesting {
        !           133:     void *                     style;  /* HTStyle *: Platform dependent */
        !           134:     white_space_treatment      wst;
        !           135:     struct _HTNesting *                parent;
        !           136:     int                                element_number;
        !           137:     int                                item_number;    /* only for ordered lists */
        !           138:     int                                list_level;     /* how deep nested */
        !           139:     HTList *                   children;
        !           140:     BOOL                       paragraph_break;
        !           141:     int                                magic;
        !           142:     BOOL                       object_gens_HTML; /* we don't generate HTML */
        !           143: } HTNesting;
        !           144: 
        !           145: 
        !           146: </pre>
        !           147: <H2>Nesting functions</H2>
        !           148: These functions were new with HTML2.c.  They allow the tree
        !           149: of SGML nesting states to be manipulated, and SGML regenerated from the
        !           150: style sequence.
        !           151: <PRE>
        !           152: 
        !           153: extern void HTRegenInit NOPARAMS;
        !           154: 
        !           155: extern void HTRegenCharacter PARAMS((
        !           156:        char                    c,
        !           157:        HTNesting *             nesting,
        !           158:        HTStructured *          target));
        !           159: 
        !           160: extern void HTNestingChange PARAMS((
        !           161:        HTStructured*   s, 
        !           162:        HTNesting*              old, 
        !           163:        HTNesting *             new,
        !           164:        HTChildAnchor *         info,
        !           165:        CONST char *            aName));
        !           166: 
        !           167: extern HTNesting * HTMLCommonality PARAMS((
        !           168:        HTNesting *     s1,
        !           169:        HTNesting *     s2));
        !           170: 
        !           171: extern HTNesting * HTNestElement PARAMS((HTNesting * p, int ele));
        !           172: extern /* HTStyle * */ void * HTStyleForNesting PARAMS((HTNesting * n));
        !           173: 
        !           174: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
        !           175: extern HTNesting* CopyBranch PARAMS((HTNesting * old, HTNesting * new, int depth));
        !           176: extern HTNesting * HTInsertLevel PARAMS((HTNesting * old,
        !           177:                int     element_number,
        !           178:                int     level));
        !           179: extern HTNesting * HTDeleteLevel PARAMS((HTNesting * old,
        !           180:                int     level));
        !           181: extern int HTMLElementNumber PARAMS((HTNesting * s));
        !           182: extern int HTMLLevel PARAMS(( HTNesting * s));
        !           183: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
        !           184: 
        !           185: #endif         /* end HTML_H */
        !           186: 
        !           187: </PRE>
        !           188: 
        !           189: end</BODY>
2.7       timbl     190: </HTML>

Webmaster