Annotation of libwww/Library/src/SGML.html, revision 2.2
2.1 timbl 1: <HEADER>
2: <TITLE>/Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/SGML.html</TITLE></HEADER>
3: <BODY>
4: <H1>SGML and Structured streams</H1>The SGML parser is a state machine.
5: It is called for every character<P>
6: of the input stream. The DTD data
7: structure contains pointers<P>
8: to functions which are called to
9: implement the actual effect of the<P>
10: text read. When these functions are
11: called, the attribute structures
12: pointed to by the DTD are valid,
13: and the function is passed a pointer
14: to the curent tag structure, and
15: an "element stack" which represents
16: the state of nesting within SGML
17: elements.<P>
18: The following aspects are from Dan
19: Connolly's suggestions: Binary search,
20: Strcutured object scheme basically,
21: SGML content enum type.<P>
22: (c) Copyright CERN 1991 - See Copyright.html
23: <PRE>#ifndef SGML_H
24: #define SGML_H
25:
26: #include "HTUtils.h"
27: #include "HTStream.h"
28:
29: </PRE>
30: <H2>SGML content types</H2>
31: <PRE>typedef enum _SGMLContent{
32: SGML_EMPTY, /* no content */
33: SGML_LITTERAL, /* character data. Recognised excat close tag only. litteral
34: Old www server compatibility only! Not SGML */
35: SGML_CDATA, /* character data. recognize </ only */
36: SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
37: SGML_MIXED, /* elements and parsed character data. recognize all markup */
38: SGML_ELEMENT /* any data found will be returned as an error*/
39: } SGMLContent;
40:
41:
42: typedef struct {
43: char * name; /* The (constant) name of the attribute */
44: /* Could put type info in here */
45: } attr;
46:
47:
48: /* A tag structure describes an SGML element.
49: ** -----------------------------------------
50: **
51: **
52: ** name is the string which comes after the tag opener "<".
53: **
54: ** attributes points to a zero-terminated array
55: ** of attribute names.
56: **
57: ** litteral determines how the SGML engine parses the charaters
58: ** within the element. If set, tag openers are ignored
59: ** except for that which opens a matching closing tag.
60: **
61: */
62: typedef struct _tag HTTag;
63: struct _tag{
64: char * name; /* The name of the tag */
65: attr * attributes; /* The list of acceptable attributes */
66: int number_of_attributes; /* Number of possible attributes */
67: SGMLContent contents; /* End only on end tag @@ */
68: };
69:
70:
71:
72:
73: /* DTD Information
74: ** ---------------
75: **
76: ** Not the whole DTD, but all this parser usues of it.
77: */
78: typedef struct {
2.2 ! timbl 79: HTTag * tags; /* Must be in strcmp order by name */
! 80: int number_of_tags;
! 81: CONST char ** entity_names; /* Must be in strcmp order by name */
! 82: int number_of_entities;
2.1 timbl 83: } SGML_dtd;
84:
85:
86: /* SGML context passed to parsers
87: */
88: typedef struct _HTSGMLContext *HTSGMLContext; /* Hidden */
89:
90:
91: /*__________________________________________________________________________
92: */
93: /* Structured Object definition
94: **
95: ** A structured object is something which can reasonably be
96: ** represented in SGML. I'll rephrase that. A structured
97: ** object is am ordered tree-structured arrangement of data
98: ** which is representable as text.
99: **
100: ** The SGML parer outputs to a Structured object.
101: ** A Structured object can output its contents
102: ** to another Structured Object.
103: ** It's a kind of typed stream. The architecure
104: ** is largely Dan Conolly's.
105: ** Elements and entities are passed to the sob by number, implying
106: ** a knowledge of the DTD.
107: ** Knowledge of the SGML syntax is not here, though.
108: **
109: ** Superclass: HTStream
110: */
111:
112:
113: /* The creation methods will vary on the type of Structured Object.
114: ** Maybe the callerData is enough info to pass along.
115: */
116:
117: typedef struct _HTStructured HTStructured;
118:
119: typedef struct _HTStructuredClass{
120:
121: char* name; /* Just for diagnostics */
122:
123: void (*free) PARAMS((
124: HTStructured* me));
125:
126: void (*end_document) PARAMS((
127: HTStructured* me));
128:
129: void (*put_character) PARAMS((
130: HTStructured* me,
131: char ch));
132:
133: void (*put_string) PARAMS((
134: HTStructured* me,
135: CONST char * str));
136:
137: void (*write) PARAMS((
2.2 ! timbl 138: HTStructured* me,
2.1 timbl 139: CONST char * str,
140: int len));
141:
142: void (*start_element) PARAMS((
143: HTStructured* me,
144: int element_number,
2.2 ! timbl 145: CONST BOOL* attribute_present,
! 146: CONST char** attribute_value));
2.1 timbl 147:
148: void (*end_element) PARAMS((
149: HTStructured* me,
150: int element_number));
151:
152: void (*put_entity) PARAMS((
153: HTStructured* me,
154: int entity_number));
155:
156: }HTStructuredClass;
157:
158:
159:
160: /* Create an SGML parser
161: **
162: ** On entry,
163: ** dtd must point to a DTD structure as defined above
164: ** callbacks must point to user routines.
165: ** callData is returned in callbacks transparently.
166: ** On exit,
167: ** The default tag starter has been processed.
168: */
169:
170:
171: extern HTStream* SGML_new PARAMS((
172: CONST SGML_dtd * dtd,
173: HTStructured * target));
174:
2.2 ! timbl 175: extern CONST HTStreamClass SGMLParser;
2.1 timbl 176:
177:
178: #endif /* SGML_H */
179:
2.2 ! timbl 180: </PRE>
! 181: <p>
2.1 timbl 182: </BODY>
Webmaster