Annotation of libwww/Library/src/SGML.html, revision 2.20
2.4 timbl 1: <HTML>
2: <HEAD>
2.17 frystyk 3: <TITLE>SGML Parse Definition</TITLE>
2.14 frystyk 4: </HEAD>
2.1 timbl 5: <BODY>
2.11 frystyk 6:
2.17 frystyk 7: <H1>SGML Parse Definition</H1>
2.11 frystyk 8:
2.14 frystyk 9: <PRE>
10: /*
2.18 frystyk 11: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 12: ** Please first read the full copyright statement in the file COPYRIGH.
13: */
14: </PRE>
15:
16: The SGML parser is a state machine. It is called for every character
17: of the input stream. The DTD data structure contains pointers to
18: functions which are called to implement the actual effect of the text
19: read. When these functions are called, the attribute structures
20: pointed to by the DTD are valid, and the function is parsed a pointer
21: to the curent tag structure, and an "element stack" which represents
22: the state of nesting within SGML elements. See also the <A
23: HREF="HTStream.html">the generic Stream definition</A><P>
24:
25: The following aspects are from Dan Connolly's suggestions: Binary
26: search, Strcutured object scheme basically, SGML content enum type.<P>
27:
28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a
29: part of the <A
2.20 ! frystyk 30: HREF="http://www.w3.org/pub/WWW/Library/">
2.19 frystyk 31: W3C Reference Library</A>.
2.11 frystyk 32:
33: <PRE>
34: #ifndef SGML_H
2.1 timbl 35: #define SGML_H
36:
37: #include "HTStream.h"
2.17 frystyk 38: #include "HTStruct.h"
2.16 frystyk 39: </PRE>
2.1 timbl 40:
41: <H2>SGML content types</H2>
42: <PRE>typedef enum _SGMLContent{
43: SGML_EMPTY, /* no content */
2.8 timbl 44: SGML_LITERAL, /* character data. Recognized exact close tag only.
2.1 timbl 45: Old www server compatibility only! Not SGML */
46: SGML_CDATA, /* character data. recognize </ only */
47: SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
48: SGML_MIXED, /* elements and parsed character data. recognize all markup */
49: SGML_ELEMENT /* any data found will be returned as an error*/
50: } SGMLContent;
51:
52:
53: typedef struct {
54: char * name; /* The (constant) name of the attribute */
55: /* Could put type info in here */
56: } attr;
57:
58:
59: /* A tag structure describes an SGML element.
60: ** -----------------------------------------
61: **
62: **
63: ** name is the string which comes after the tag opener "<".
64: **
65: ** attributes points to a zero-terminated array
66: ** of attribute names.
67: **
68: ** litteral determines how the SGML engine parses the charaters
69: ** within the element. If set, tag openers are ignored
70: ** except for that which opens a matching closing tag.
71: **
72: */
73: typedef struct _tag HTTag;
74: struct _tag{
75: char * name; /* The name of the tag */
76: attr * attributes; /* The list of acceptable attributes */
77: int number_of_attributes; /* Number of possible attributes */
78: SGMLContent contents; /* End only on end tag @@ */
79: };
80:
81: /* DTD Information
82: ** ---------------
83: **
84: ** Not the whole DTD, but all this parser usues of it.
85: */
86: typedef struct {
2.2 timbl 87: HTTag * tags; /* Must be in strcmp order by name */
88: int number_of_tags;
89: CONST char ** entity_names; /* Must be in strcmp order by name */
90: int number_of_entities;
2.1 timbl 91: } SGML_dtd;
92:
2.17 frystyk 93: #define MAX_ATTRIBUTES 20 /* Max number of attributes per element */
94:
95: /* SGML context passed to parsers */
2.1 timbl 96:
97: typedef struct _HTSGMLContext *HTSGMLContext; /* Hidden */
2.17 frystyk 98: </PRE>
2.1 timbl 99:
2.17 frystyk 100: <H2>Find a Tag by Name</H2>
2.1 timbl 101:
2.17 frystyk 102: Returns a pointer to the tag within the DTD.
2.1 timbl 103:
2.17 frystyk 104: <PRE>
105: extern HTTag * SGMLFindTag PARAMS((CONST SGML_dtd* dtd, CONST char * string));
2.6 timbl 106: </PRE>
2.1 timbl 107:
2.17 frystyk 108: <H2>Find a Attribute by Name</H2>
2.1 timbl 109:
2.17 frystyk 110: Returns the number of the atribute or -1 if failure.
2.1 timbl 111:
2.17 frystyk 112: <PRE>
113: extern int SGMLFindAttribute PARAMS((HTTag* tag, CONST char * string));
2.6 timbl 114: </PRE>
2.10 timbl 115:
2.6 timbl 116: <H2>Create an SGML parser</H2>
117: <PRE>/*
2.1 timbl 118: ** On entry,
119: ** dtd must point to a DTD structure as defined above
120: ** callbacks must point to user routines.
121: ** callData is returned in callbacks transparently.
122: ** On exit,
123: ** The default tag starter has been processed.
124: */
125:
126:
127: extern HTStream* SGML_new PARAMS((
128: CONST SGML_dtd * dtd,
129: HTStructured * target));
130:
2.2 timbl 131: extern CONST HTStreamClass SGMLParser;
2.1 timbl 132:
133: #endif /* SGML_H */
2.8 timbl 134:
135: </PRE></BODY>
2.4 timbl 136: </HTML>
Webmaster