Annotation of libwww/Library/src/HTML.html, revision 2.20.2.1
2.7 timbl 1: <HTML>
2: <HEAD>
3: <TITLE>HTML to rich text converter for libwww</TITLE>
4: </HEAD>
2.6 timbl 5: <BODY>
2.20 frystyk 6:
7: <H1>The HTML to styled text object converter</H1>
8:
9: <PRE>
10: /*
11: ** (c) COPYRIGHT CERN 1994.
12: ** Please first read the full copyright statement in the file COPYRIGH.
13: */
14: </PRE>
15:
16: This interprets the <A
17: HREF="http://info.cern.ch/hypertext/WWW/MarkUp/MarkUp.html">HTML</A>
18: semantics and some HTMLPlus.<P>
19:
20: This module is implemented by <A HREF="HTML.c">HTML.c</A>, and it is
21: a part of the <A
22: HREF="http://info.cern.ch/hypertext/WWW/Library/User/Guide/Guide.html">
23: Library of Common Code</A>.
24:
25: <PRE>
26: #ifndef HTML_H
2.1 timbl 27: #define HTML_H
28:
29: #include "HTUtils.h"
2.13 luotonen 30: #include "HTFormat.h"
2.1 timbl 31: #include "HTAnchor.h"
2.8 timbl 32: #include "HTMLPDTD.h"
2.1 timbl 33:
2.8 timbl 34: #define DTD HTMLP_dtd
35:
2.6 timbl 36: #ifdef SHORT_NAMES
37: #define HTMLPresentation HTMLPren
38: #define HTMLPresent HTMLPres
39: #endif
2.1 timbl 40:
41: extern CONST HTStructuredClass HTMLPresentation;
2.17 frystyk 42: </PRE>
43:
44: <H2>HTML_new: A structured stream to parse HTML</H2>
2.1 timbl 45:
2.17 frystyk 46: When this routine is called, the request structure may contain a <A
47: NAME="z4" HREF="HTAccess.html#z6">childAnchor</A> value. In that case
48: it is the responsability of this module to select the anchor.<P>
2.12 timbl 49:
2.9 timbl 50: <PRE>extern HTStructured* HTML_new PARAMS((HTRequest * request,
2.8 timbl 51: void * param,
52: HTFormat input_format,
53: HTFormat output_format,
54: HTStream * output_stream));
55:
2.10 luotonen 56: </PRE>
2.1 timbl 57:
2.17 frystyk 58: <H3>Reopen</H3>
59:
60: Reopening an existing HTML object allows it to be retained (for
61: example by the styled text object) after the structured stream has
62: been closed. To be actually deleted, the HTML object must be closed
63: once more times than it has been reopened.
64:
65: <PRE>
66: extern void HTML_reopen PARAMS((HTStructured * me));
2.10 luotonen 67: </PRE>
2.17 frystyk 68:
2.10 luotonen 69: <H2>Converters</H2>
2.8 timbl 70:
2.17 frystyk 71: These are the converters implemented in this module:
2.8 timbl 72:
2.17 frystyk 73: <PRE>
74: #ifndef pyramid
75: extern HTConverter HTMLToPlain, HTMLToC, HTMLPresent, HTMLToTeX;
76: #endif
2.8 timbl 77: </PRE>
2.17 frystyk 78:
2.8 timbl 79: <H2>Selecting internal character set
80: representations</H2>
81: <PRE>typedef enum _HTMLCharacterSet {
2.1 timbl 82: HTML_ISO_LATIN1,
83: HTML_NEXT_CHARS,
84: HTML_PC_CP950
85: } HTMLCharacterSet;
86:
87: extern void HTMLUseCharacterSet PARAMS((HTMLCharacterSet i));
88:
2.6 timbl 89: </PRE>
90: <H2>Record error message as a hypertext
91: object</H2>The error message should be marked
92: as an error so that it can be reloaded
93: later. This implementation just throws
94: up an error message and leaves the
95: document unloaded.
2.10 luotonen 96: <H3>On entry,</H3>
97: <DL>
98: <DT>sink
2.11 timbl 99: <DD> is a stream to the output device
2.10 luotonen 100: if any
101: <DT>number
2.11 timbl 102: <DD> is the HTTP error number
2.10 luotonen 103: <DT>message
2.11 timbl 104: <DD> is the human readable message.
2.10 luotonen 105: </DL>
106:
107: <H3>On exit,</H3>a return code like HT_LOADED if object
108: exists else < 0
2.19 frystyk 109: <PRE>extern int HTLoadError PARAMS((
2.14 luotonen 110: HTRequest * req,
2.1 timbl 111: int number,
112: CONST char * message));
113:
2.6 timbl 114:
2.16 timbl 115: </PRE>
2.20.2.1! frystyk 116:
! 117: <H2>White Space Treatment</H2>
! 118:
! 119: There is a small number of different ways of treating white space in
! 120: SGML, in mapping from a text object to HTML. These have to be
! 121: programmed it seems.
! 122:
! 123: <PRE>
2.16 timbl 124: /*
125: In text object \n\n \n tab \n\n\t
126: -------------- ------------- ----- ----- -------
127: in Address,
128: Blockquote,
2.20.2.1! frystyk 129: Normal, <P> <BR> - NORMAL
! 130: H1-6: close+open <BR> - HEADING
! 131: Glossary <DT> <DT> <DD> <P> GLOSSARY
2.16 timbl 132: List,
2.20.2.1! frystyk 133: Menu <LI> <LI> - <P> LIST
! 134: Dir <LI> <LI> <LI> DIR
2.16 timbl 135: Pre etc \n\n \n \t PRE
2.7 timbl 136:
2.16 timbl 137: */
138:
139: typedef enum _white_space_treatment {
140: WS_NORMAL,
141: WS_HEADING,
142: WS_GLOSSARY,
143: WS_LIST,
144: WS_DIR,
145: WS_PRE
146: } white_space_treatment;
147:
148: </pre>
2.20.2.1! frystyk 149:
2.16 timbl 150: <h2>Nesting State</h2>
151: These elements form tree with an item for each nesting state: that
152: is, each unique combination of nested elements which has a
153: specific style.
154: <pre>
155: typedef struct _HTNesting {
156: void * style; /* HTStyle *: Platform dependent */
157: white_space_treatment wst;
158: struct _HTNesting * parent;
159: int element_number;
160: int item_number; /* only for ordered lists */
161: int list_level; /* how deep nested */
162: HTList * children;
163: BOOL paragraph_break;
164: int magic;
165: BOOL object_gens_HTML; /* we don't generate HTML */
166: } HTNesting;
167:
168:
169: </pre>
170: <H2>Nesting functions</H2>
171: These functions were new with HTML2.c. They allow the tree
172: of SGML nesting states to be manipulated, and SGML regenerated from the
173: style sequence.
174: <PRE>
175:
176: extern void HTRegenInit NOPARAMS;
177:
178: extern void HTRegenCharacter PARAMS((
179: char c,
180: HTNesting * nesting,
181: HTStructured * target));
182:
183: extern void HTNestingChange PARAMS((
184: HTStructured* s,
185: HTNesting* old,
2.18 frystyk 186: HTNesting * newnest,
2.16 timbl 187: HTChildAnchor * info,
188: CONST char * aName));
189:
190: extern HTNesting * HTMLCommonality PARAMS((
191: HTNesting * s1,
192: HTNesting * s2));
193:
194: extern HTNesting * HTNestElement PARAMS((HTNesting * p, int ele));
195: extern /* HTStyle * */ void * HTStyleForNesting PARAMS((HTNesting * n));
196:
197: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
2.18 frystyk 198:
199: extern HTNesting* CopyBranch PARAMS((HTNesting * old, HTNesting * newnest,
200: int depth));
201:
2.16 timbl 202: extern HTNesting * HTInsertLevel PARAMS((HTNesting * old,
203: int element_number,
204: int level));
205: extern HTNesting * HTDeleteLevel PARAMS((HTNesting * old,
206: int level));
207: extern int HTMLElementNumber PARAMS((HTNesting * s));
208: extern int HTMLLevel PARAMS(( HTNesting * s));
209: extern HTNesting* HTMLAncestor PARAMS((HTNesting * old, int depth));
210:
211: #endif /* end HTML_H */
212:
213: </PRE>
214:
215: end</BODY>
2.7 timbl 216: </HTML>
Webmaster