Annotation of libwww/Library/src/SGML.html, revision 2.5
2.4 timbl 1: <HTML>
2: <HEAD>
3: <TITLE>SGML parse and stream definition for libwww</TITLE></HEAD>
2.1 timbl 4: <BODY>
5: <H1>SGML and Structured streams</H1>The SGML parser is a state machine.
6: It is called for every character<P>
7: of the input stream. The DTD data
8: structure contains pointers<P>
9: to functions which are called to
10: implement the actual effect of the<P>
11: text read. When these functions are
12: called, the attribute structures
13: pointed to by the DTD are valid,
14: and the function is passed a pointer
15: to the curent tag structure, and
16: an "element stack" which represents
17: the state of nesting within SGML
18: elements.<P>
19: The following aspects are from Dan
20: Connolly's suggestions: Binary search,
21: Strcutured object scheme basically,
22: SGML content enum type.<P>
23: (c) Copyright CERN 1991 - See Copyright.html
24: <PRE>#ifndef SGML_H
25: #define SGML_H
26:
27: #include "HTUtils.h"
28: #include "HTStream.h"
29:
30: </PRE>
31: <H2>SGML content types</H2>
32: <PRE>typedef enum _SGMLContent{
33: SGML_EMPTY, /* no content */
34: SGML_LITTERAL, /* character data. Recognised excat close tag only. litteral
35: Old www server compatibility only! Not SGML */
36: SGML_CDATA, /* character data. recognize </ only */
37: SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
38: SGML_MIXED, /* elements and parsed character data. recognize all markup */
39: SGML_ELEMENT /* any data found will be returned as an error*/
40: } SGMLContent;
41:
42:
43: typedef struct {
44: char * name; /* The (constant) name of the attribute */
45: /* Could put type info in here */
46: } attr;
47:
48:
49: /* A tag structure describes an SGML element.
50: ** -----------------------------------------
51: **
52: **
53: ** name is the string which comes after the tag opener "<".
54: **
55: ** attributes points to a zero-terminated array
56: ** of attribute names.
57: **
58: ** litteral determines how the SGML engine parses the charaters
59: ** within the element. If set, tag openers are ignored
60: ** except for that which opens a matching closing tag.
61: **
62: */
63: typedef struct _tag HTTag;
64: struct _tag{
65: char * name; /* The name of the tag */
66: attr * attributes; /* The list of acceptable attributes */
67: int number_of_attributes; /* Number of possible attributes */
68: SGMLContent contents; /* End only on end tag @@ */
69: };
70:
71:
72:
73:
74: /* DTD Information
75: ** ---------------
76: **
77: ** Not the whole DTD, but all this parser usues of it.
78: */
79: typedef struct {
2.2 timbl 80: HTTag * tags; /* Must be in strcmp order by name */
81: int number_of_tags;
82: CONST char ** entity_names; /* Must be in strcmp order by name */
83: int number_of_entities;
2.1 timbl 84: } SGML_dtd;
85:
86:
87: /* SGML context passed to parsers
88: */
89: typedef struct _HTSGMLContext *HTSGMLContext; /* Hidden */
90:
91:
92: /*__________________________________________________________________________
93: */
94: /* Structured Object definition
95: **
96: ** A structured object is something which can reasonably be
97: ** represented in SGML. I'll rephrase that. A structured
98: ** object is am ordered tree-structured arrangement of data
99: ** which is representable as text.
100: **
101: ** The SGML parer outputs to a Structured object.
102: ** A Structured object can output its contents
103: ** to another Structured Object.
104: ** It's a kind of typed stream. The architecure
105: ** is largely Dan Conolly's.
106: ** Elements and entities are passed to the sob by number, implying
107: ** a knowledge of the DTD.
108: ** Knowledge of the SGML syntax is not here, though.
109: **
110: ** Superclass: HTStream
111: */
112:
113:
114: /* The creation methods will vary on the type of Structured Object.
115: ** Maybe the callerData is enough info to pass along.
116: */
117:
118: typedef struct _HTStructured HTStructured;
119:
120: typedef struct _HTStructuredClass{
121:
122: char* name; /* Just for diagnostics */
123:
124: void (*free) PARAMS((
125: HTStructured* me));
126:
2.4 timbl 127: void (*abort) PARAMS((
2.5 ! timbl 128: HTStructured* me,
! 129: HTError e));
2.1 timbl 130:
131: void (*put_character) PARAMS((
132: HTStructured* me,
133: char ch));
134:
135: void (*put_string) PARAMS((
136: HTStructured* me,
137: CONST char * str));
138:
139: void (*write) PARAMS((
2.2 timbl 140: HTStructured* me,
2.1 timbl 141: CONST char * str,
142: int len));
143:
144: void (*start_element) PARAMS((
145: HTStructured* me,
146: int element_number,
2.2 timbl 147: CONST BOOL* attribute_present,
148: CONST char** attribute_value));
2.1 timbl 149:
150: void (*end_element) PARAMS((
151: HTStructured* me,
152: int element_number));
153:
154: void (*put_entity) PARAMS((
155: HTStructured* me,
156: int entity_number));
157:
158: }HTStructuredClass;
159:
160:
161:
162: /* Create an SGML parser
163: **
164: ** On entry,
165: ** dtd must point to a DTD structure as defined above
166: ** callbacks must point to user routines.
167: ** callData is returned in callbacks transparently.
168: ** On exit,
169: ** The default tag starter has been processed.
170: */
171:
172:
173: extern HTStream* SGML_new PARAMS((
174: CONST SGML_dtd * dtd,
175: HTStructured * target));
176:
2.2 timbl 177: extern CONST HTStreamClass SGMLParser;
2.1 timbl 178:
179:
180: #endif /* SGML_H */
181:
2.3 timbl 182:
183:
2.4 timbl 184:
185: </BODY>
186: </HTML>
Webmaster