Annotation of libwww/Library/src/HTML.html, revision 2.16
2.7 timbl 1: <HTML>
2: <HEAD>
3: <TITLE>HTML to rich text converter for libwww</TITLE>
2.12 timbl 4: <NEXTID N="z5">
2.7 timbl 5: </HEAD>
2.6 timbl 6: <BODY>
2.8 timbl 7: <H1>The HTML to styled text object converter</H1>This interprets the <A
8: NAME="z0" HREF="../../MarkUp/MarkUp.html">HTML</A> semantics
9: and some HTMLPlus.<P>
10: Part of <A
11: NAME="z2" HREF="Overview.html">libwww</A> . Implemented by <A
12: NAME="z3" HREF="HTML.c">HTML.c</A>
2.6 timbl 13: <PRE>#ifndef HTML_H
2.1 timbl 14: #define HTML_H
15:
16: #include "HTUtils.h"
2.13 luotonen 17: #include "HTFormat.h"
2.1 timbl 18: #include "HTAnchor.h"
2.8 timbl 19: #include "HTMLPDTD.h"
2.1 timbl 20:
2.8 timbl 21: #define DTD HTMLP_dtd
22:
2.6 timbl 23: #ifdef SHORT_NAMES
24: #define HTMLPresentation HTMLPren
25: #define HTMLPresent HTMLPres
26: #endif
2.1 timbl 27:
28: extern CONST HTStructuredClass HTMLPresentation;
29:
2.6 timbl 30: </PRE>
2.9 timbl 31: <H2>HTML_new: A structured stream to
2.12 timbl 32: parse HTML</H2>When this routine is called, the
33: request structure may contain a <A
34: NAME="z4" HREF="HTAccess.html#z6">childAnchor</A>
35: value. I that case it is the responsability
36: of this module to select the anchor.<P>
37:
2.9 timbl 38: <PRE>extern HTStructured* HTML_new PARAMS((HTRequest * request,
2.8 timbl 39: void * param,
40: HTFormat input_format,
41: HTFormat output_format,
42: HTStream * output_stream));
43:
2.10 luotonen 44: </PRE>
45: <H3>Reopen</H3>Reopening an existing HTML object
46: allows it to be retained (for example
47: by the styled text object) after
48: the structured stream has been closed.
2.11 timbl 49: To be actually deleted, the HTML
2.10 luotonen 50: object must be closed once more
51: times than it has been reopened.
2.11 timbl 52: <PRE>extern void HTML_reopen PARAMS((HTStructured * me));
2.1 timbl 53:
2.10 luotonen 54: </PRE>
55: <H2>Converters</H2>
2.15 frystyk 56: <PRE>extern HTConverter HTMLToPlain, HTMLToC, HTMLPresent, HTMLToTeX;
2.8 timbl 57:
58:
59: </PRE>
60: <H2>Selecting internal character set
61: representations</H2>
62: <PRE>typedef enum _HTMLCharacterSet {
2.1 timbl 63: HTML_ISO_LATIN1,
64: HTML_NEXT_CHARS,
65: HTML_PC_CP950
66: } HTMLCharacterSet;
67:
68: extern void HTMLUseCharacterSet PARAMS((HTMLCharacterSet i));
69:
2.6 timbl 70: </PRE>
71: <H2>Record error message as a hypertext
72: object</H2>The error message should be marked
73: as an error so that it can be reloaded
74: later. This implementation just throws
75: up an error message and leaves the
76: document unloaded.
2.10 luotonen 77: <H3>On entry,</H3>
78: <DL>
79: <DT>sink
2.11 timbl 80: <DD> is a stream to the output device
2.10 luotonen 81: if any
82: <DT>number
2.11 timbl 83: <DD> is the HTTP error number
2.10 luotonen 84: <DT>message
2.11 timbl 85: <DD> is the human readable message.
2.10 luotonen 86: </DL>
87:
88: <H3>On exit,</H3>a return code like HT_LOADED if object
89: exists else < 0
90: <PRE>PUBLIC int HTLoadError PARAMS((
2.14 luotonen 91: HTRequest * req,
2.1 timbl 92: int number,
93: CONST char * message));
94:
2.6 timbl 95:
2.16 ! timbl 96: </PRE>
! 97: <h2>White Space Treatment</h2>
! 98: There is a small number of different ways of treating white
! 99: space in SGML, in mapping from a text object to HTML.
! 100: These have to be programmed it seems.
! 101: <pre>
! 102: /*
! 103: In text object \n\n \n tab \n\n\t
! 104: -------------- ------------- ----- ----- -------
! 105: in Address,
! 106: Blockquote,
! 107: Normal, <P> <BR> - NORMAL
! 108: H1-6: close+open <BR> - HEADING
! 109: Glossary <DT> <DT> <DD> <P> GLOSSARY
! 110: List,
! 111: Menu <LI> <LI> - <P> LIST
! 112: Dir <LI> <LI> <LI> DIR
! 113: Pre etc \n\n \n \t PRE
2.7 timbl 114:
2.16 ! timbl 115: */
! 116:
! 117: typedef enum _white_space_treatment {
! 118: WS_NORMAL,
! 119: WS_HEADING,
! 120: WS_GLOSSARY,
! 121: WS_LIST,
! 122: WS_DIR,
! 123: WS_PRE
! 124: } white_space_treatment;
! 125:
! 126: </pre>
! 127: <h2>Nesting State</h2>
! 128: These elements form tree with an item for each nesting state: that
! 129: is, each unique combination of nested elements which has a
! 130: specific style.
! 131: <pre>
! 132: typedef struct _HTNesting {
! 133: void * style; /* HTStyle *: Platform dependent */
! 134: white_space_treatment wst;
! 135: struct _HTNesting * parent;
! 136: int element_number;
! 137: int item_number; /* only for ordered lists */
! 138: int list_level; /* how deep nested */
! 139: HTList * children;
! 140: BOOL paragraph_break;
! 141: int magic;
! 142: BOOL object_gens_HTML; /* we don't generate HTML */
! 143: } HTNesting;
! 144:
! 145:
! 146: </pre>
! 147: <H2>Nesting functions</H2>
! 148: These functions were new with HTML2.c. They allow the tree
! 149: of SGML nesting states to be manipulated, and SGML regenerated from the
! 150: style sequence.
! 151: <PRE>
! 152:
! 153: extern void HTRegenInit NOPARAMS;
! 154:
! 155: extern void HTRegenCharacter PARAMS((
! 156: char c,
! 157: HTNesting * nesting,
! 158: HTStructured * target));
! 159:
! 160: extern void HTNestingChange PARAMS((
! 161: HTStructured* s,
! 162: HTNesting* old,
! 163: HTNesting * new,
! 164: HTChildAnchor * info,
! 165: CONST char * aName));
! 166:
! 167: extern HTNesting * HTMLCommonality PARAMS((
! 168: HTNesting * s1,
! 169: HTNesting * s2));
! 170:
! 171: extern HTNesting * HTNestElement PARAMS((HTNesting * p, int ele));
! 172: extern /* HTStyle * */ void * HTStyleForNesting PARAMS((HTNesting * n));
! 173:
! 174: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
! 175: extern HTNesting* CopyBranch PARAMS((HTNesting * old, HTNesting * new, int depth));
! 176: extern HTNesting * HTInsertLevel PARAMS((HTNesting * old,
! 177: int element_number,
! 178: int level));
! 179: extern HTNesting * HTDeleteLevel PARAMS((HTNesting * old,
! 180: int level));
! 181: extern int HTMLElementNumber PARAMS((HTNesting * s));
! 182: extern int HTMLLevel PARAMS(( HTNesting * s));
! 183: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
! 184:
! 185: #endif /* end HTML_H */
! 186:
! 187: </PRE>
! 188:
! 189: end</BODY>
2.7 timbl 190: </HTML>
Webmaster