Annotation of libwww/Library/src/HTML.html, revision 2.18
2.7 timbl 1: <HTML>
2: <HEAD>
3: <TITLE>HTML to rich text converter for libwww</TITLE>
2.12 timbl 4: <NEXTID N="z5">
2.7 timbl 5: </HEAD>
2.6 timbl 6: <BODY>
2.8 timbl 7: <H1>The HTML to styled text object converter</H1>This interprets the <A
8: NAME="z0" HREF="../../MarkUp/MarkUp.html">HTML</A> semantics
9: and some HTMLPlus.<P>
10: Part of <A
11: NAME="z2" HREF="Overview.html">libwww</A> . Implemented by <A
12: NAME="z3" HREF="HTML.c">HTML.c</A>
2.6 timbl 13: <PRE>#ifndef HTML_H
2.1 timbl 14: #define HTML_H
15:
16: #include "HTUtils.h"
2.13 luotonen 17: #include "HTFormat.h"
2.1 timbl 18: #include "HTAnchor.h"
2.8 timbl 19: #include "HTMLPDTD.h"
2.1 timbl 20:
2.8 timbl 21: #define DTD HTMLP_dtd
22:
2.6 timbl 23: #ifdef SHORT_NAMES
24: #define HTMLPresentation HTMLPren
25: #define HTMLPresent HTMLPres
26: #endif
2.1 timbl 27:
28: extern CONST HTStructuredClass HTMLPresentation;
2.17 frystyk 29: </PRE>
30:
31: <H2>HTML_new: A structured stream to parse HTML</H2>
2.1 timbl 32:
2.17 frystyk 33: When this routine is called, the request structure may contain a <A
34: NAME="z4" HREF="HTAccess.html#z6">childAnchor</A> value. In that case
35: it is the responsability of this module to select the anchor.<P>
2.12 timbl 36:
2.9 timbl 37: <PRE>extern HTStructured* HTML_new PARAMS((HTRequest * request,
2.8 timbl 38: void * param,
39: HTFormat input_format,
40: HTFormat output_format,
41: HTStream * output_stream));
42:
2.10 luotonen 43: </PRE>
2.1 timbl 44:
2.17 frystyk 45: <H3>Reopen</H3>
46:
47: Reopening an existing HTML object allows it to be retained (for
48: example by the styled text object) after the structured stream has
49: been closed. To be actually deleted, the HTML object must be closed
50: once more times than it has been reopened.
51:
52: <PRE>
53: extern void HTML_reopen PARAMS((HTStructured * me));
2.10 luotonen 54: </PRE>
2.17 frystyk 55:
2.10 luotonen 56: <H2>Converters</H2>
2.8 timbl 57:
2.17 frystyk 58: These are the converters implemented in this module:
2.8 timbl 59:
2.17 frystyk 60: <PRE>
61: #ifndef pyramid
62: extern HTConverter HTMLToPlain, HTMLToC, HTMLPresent, HTMLToTeX;
63: #endif
2.8 timbl 64: </PRE>
2.17 frystyk 65:
2.8 timbl 66: <H2>Selecting internal character set
67: representations</H2>
68: <PRE>typedef enum _HTMLCharacterSet {
2.1 timbl 69: HTML_ISO_LATIN1,
70: HTML_NEXT_CHARS,
71: HTML_PC_CP950
72: } HTMLCharacterSet;
73:
74: extern void HTMLUseCharacterSet PARAMS((HTMLCharacterSet i));
75:
2.6 timbl 76: </PRE>
77: <H2>Record error message as a hypertext
78: object</H2>The error message should be marked
79: as an error so that it can be reloaded
80: later. This implementation just throws
81: up an error message and leaves the
82: document unloaded.
2.10 luotonen 83: <H3>On entry,</H3>
84: <DL>
85: <DT>sink
2.11 timbl 86: <DD> is a stream to the output device
2.10 luotonen 87: if any
88: <DT>number
2.11 timbl 89: <DD> is the HTTP error number
2.10 luotonen 90: <DT>message
2.11 timbl 91: <DD> is the human readable message.
2.10 luotonen 92: </DL>
93:
94: <H3>On exit,</H3>a return code like HT_LOADED if object
95: exists else < 0
96: <PRE>PUBLIC int HTLoadError PARAMS((
2.14 luotonen 97: HTRequest * req,
2.1 timbl 98: int number,
99: CONST char * message));
100:
2.6 timbl 101:
2.16 timbl 102: </PRE>
103: <h2>White Space Treatment</h2>
104: There is a small number of different ways of treating white
105: space in SGML, in mapping from a text object to HTML.
106: These have to be programmed it seems.
107: <pre>
108: /*
109: In text object \n\n \n tab \n\n\t
110: -------------- ------------- ----- ----- -------
111: in Address,
112: Blockquote,
113: Normal, <P> <BR> - NORMAL
114: H1-6: close+open <BR> - HEADING
115: Glossary <DT> <DT> <DD> <P> GLOSSARY
116: List,
117: Menu <LI> <LI> - <P> LIST
118: Dir <LI> <LI> <LI> DIR
119: Pre etc \n\n \n \t PRE
2.7 timbl 120:
2.16 timbl 121: */
122:
123: typedef enum _white_space_treatment {
124: WS_NORMAL,
125: WS_HEADING,
126: WS_GLOSSARY,
127: WS_LIST,
128: WS_DIR,
129: WS_PRE
130: } white_space_treatment;
131:
132: </pre>
133: <h2>Nesting State</h2>
134: These elements form tree with an item for each nesting state: that
135: is, each unique combination of nested elements which has a
136: specific style.
137: <pre>
138: typedef struct _HTNesting {
139: void * style; /* HTStyle *: Platform dependent */
140: white_space_treatment wst;
141: struct _HTNesting * parent;
142: int element_number;
143: int item_number; /* only for ordered lists */
144: int list_level; /* how deep nested */
145: HTList * children;
146: BOOL paragraph_break;
147: int magic;
148: BOOL object_gens_HTML; /* we don't generate HTML */
149: } HTNesting;
150:
151:
152: </pre>
153: <H2>Nesting functions</H2>
154: These functions were new with HTML2.c. They allow the tree
155: of SGML nesting states to be manipulated, and SGML regenerated from the
156: style sequence.
157: <PRE>
158:
159: extern void HTRegenInit NOPARAMS;
160:
161: extern void HTRegenCharacter PARAMS((
162: char c,
163: HTNesting * nesting,
164: HTStructured * target));
165:
166: extern void HTNestingChange PARAMS((
167: HTStructured* s,
168: HTNesting* old,
2.18 ! frystyk 169: HTNesting * newnest,
2.16 timbl 170: HTChildAnchor * info,
171: CONST char * aName));
172:
173: extern HTNesting * HTMLCommonality PARAMS((
174: HTNesting * s1,
175: HTNesting * s2));
176:
177: extern HTNesting * HTNestElement PARAMS((HTNesting * p, int ele));
178: extern /* HTStyle * */ void * HTStyleForNesting PARAMS((HTNesting * n));
179:
180: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
2.18 ! frystyk 181:
! 182: extern HTNesting* CopyBranch PARAMS((HTNesting * old, HTNesting * newnest,
! 183: int depth));
! 184:
2.16 timbl 185: extern HTNesting * HTInsertLevel PARAMS((HTNesting * old,
186: int element_number,
187: int level));
188: extern HTNesting * HTDeleteLevel PARAMS((HTNesting * old,
189: int level));
190: extern int HTMLElementNumber PARAMS((HTNesting * s));
191: extern int HTMLLevel PARAMS(( HTNesting * s));
192: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
193:
194: #endif /* end HTML_H */
195:
196: </PRE>
197:
198: end</BODY>
2.7 timbl 199: </HTML>
Webmaster