Annotation of libwww/Library/src/SGML.html, revision 2.21
2.4 timbl 1: <HTML>
2: <HEAD>
2.17 frystyk 3: <TITLE>SGML Parse Definition</TITLE>
2.21 ! frystyk 4: <!-- Changed by: Henrik Frystyk Nielsen, 19-Nov-1995 -->
2.14 frystyk 5: </HEAD>
2.1 timbl 6: <BODY>
2.11 frystyk 7:
2.17 frystyk 8: <H1>SGML Parse Definition</H1>
2.11 frystyk 9:
2.14 frystyk 10: <PRE>
11: /*
2.18 frystyk 12: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 13: ** Please first read the full copyright statement in the file COPYRIGH.
14: */
15: </PRE>
16:
17: The SGML parser is a state machine. It is called for every character
18: of the input stream. The DTD data structure contains pointers to
19: functions which are called to implement the actual effect of the text
20: read. When these functions are called, the attribute structures
21: pointed to by the DTD are valid, and the function is parsed a pointer
22: to the curent tag structure, and an "element stack" which represents
23: the state of nesting within SGML elements. See also the <A
24: HREF="HTStream.html">the generic Stream definition</A><P>
25:
26: The following aspects are from Dan Connolly's suggestions: Binary
27: search, Strcutured object scheme basically, SGML content enum type.<P>
28:
29: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a
30: part of the <A
2.20 frystyk 31: HREF="http://www.w3.org/pub/WWW/Library/">
2.19 frystyk 32: W3C Reference Library</A>.
2.11 frystyk 33:
34: <PRE>
35: #ifndef SGML_H
2.1 timbl 36: #define SGML_H
37:
38: #include "HTStream.h"
2.17 frystyk 39: #include "HTStruct.h"
2.16 frystyk 40: </PRE>
2.1 timbl 41:
42: <H2>SGML content types</H2>
43: <PRE>typedef enum _SGMLContent{
44: SGML_EMPTY, /* no content */
2.8 timbl 45: SGML_LITERAL, /* character data. Recognized exact close tag only.
2.1 timbl 46: Old www server compatibility only! Not SGML */
47: SGML_CDATA, /* character data. recognize </ only */
48: SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
49: SGML_MIXED, /* elements and parsed character data. recognize all markup */
50: SGML_ELEMENT /* any data found will be returned as an error*/
51: } SGMLContent;
52:
53:
54: typedef struct {
55: char * name; /* The (constant) name of the attribute */
56: /* Could put type info in here */
57: } attr;
58:
59:
60: /* A tag structure describes an SGML element.
61: ** -----------------------------------------
62: **
63: **
64: ** name is the string which comes after the tag opener "<".
65: **
66: ** attributes points to a zero-terminated array
67: ** of attribute names.
68: **
69: ** litteral determines how the SGML engine parses the charaters
70: ** within the element. If set, tag openers are ignored
71: ** except for that which opens a matching closing tag.
72: **
73: */
74: typedef struct _tag HTTag;
75: struct _tag{
76: char * name; /* The name of the tag */
77: attr * attributes; /* The list of acceptable attributes */
78: int number_of_attributes; /* Number of possible attributes */
79: SGMLContent contents; /* End only on end tag @@ */
80: };
81:
82: /* DTD Information
83: ** ---------------
84: **
85: ** Not the whole DTD, but all this parser usues of it.
86: */
87: typedef struct {
2.2 timbl 88: HTTag * tags; /* Must be in strcmp order by name */
89: int number_of_tags;
90: CONST char ** entity_names; /* Must be in strcmp order by name */
91: int number_of_entities;
2.1 timbl 92: } SGML_dtd;
93:
2.17 frystyk 94: #define MAX_ATTRIBUTES 20 /* Max number of attributes per element */
95:
96: /* SGML context passed to parsers */
2.1 timbl 97:
98: typedef struct _HTSGMLContext *HTSGMLContext; /* Hidden */
2.17 frystyk 99: </PRE>
2.1 timbl 100:
2.17 frystyk 101: <H2>Find a Tag by Name</H2>
2.1 timbl 102:
2.17 frystyk 103: Returns a pointer to the tag within the DTD.
2.1 timbl 104:
2.17 frystyk 105: <PRE>
2.21 ! frystyk 106: extern HTTag * SGMLFindTag (CONST SGML_dtd* dtd, CONST char * string);
2.6 timbl 107: </PRE>
2.1 timbl 108:
2.17 frystyk 109: <H2>Find a Attribute by Name</H2>
2.1 timbl 110:
2.17 frystyk 111: Returns the number of the atribute or -1 if failure.
2.1 timbl 112:
2.17 frystyk 113: <PRE>
2.21 ! frystyk 114: extern int SGMLFindAttribute (HTTag* tag, CONST char * string);
2.6 timbl 115: </PRE>
2.10 timbl 116:
2.6 timbl 117: <H2>Create an SGML parser</H2>
118: <PRE>/*
2.1 timbl 119: ** On entry,
120: ** dtd must point to a DTD structure as defined above
121: ** callbacks must point to user routines.
122: ** callData is returned in callbacks transparently.
123: ** On exit,
124: ** The default tag starter has been processed.
125: */
126:
127:
2.21 ! frystyk 128: extern HTStream* SGML_new (
2.1 timbl 129: CONST SGML_dtd * dtd,
2.21 ! frystyk 130: HTStructured * target);
2.1 timbl 131:
2.2 timbl 132: extern CONST HTStreamClass SGMLParser;
2.1 timbl 133:
134: #endif /* SGML_H */
2.8 timbl 135:
136: </PRE></BODY>
2.4 timbl 137: </HTML>
Webmaster