Annotation of libwww/Library/src/SGML.html, revision 2.29
2.4 timbl 1: <HTML>
2: <HEAD>
2.25 frystyk 3: <!-- Changed by: Henrik Frystyk Nielsen, 1-Jun-1996 -->
2.26 frystyk 4: <TITLE>W3C Sample Code Library libwww SGML Parser</TITLE>
2.14 frystyk 5: </HEAD>
2.1 timbl 6: <BODY>
2.25 frystyk 7: <H1>
8: SGML Parser
9: </H1>
2.14 frystyk 10: <PRE>
11: /*
2.18 frystyk 12: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 13: ** Please first read the full copyright statement in the file COPYRIGH.
14: */
15: </PRE>
2.25 frystyk 16: <P>
17: The SGML parser is a state machine. It is called for every character of the
18: input stream. The DTD data structure contains pointers to functions which
19: are called to implement the actual effect of the text read. When these functions
20: are called, the attribute structures pointed to by the DTD are valid, and
21: the function is parsed a pointer to the curent tag structure, and an "element
22: stack" which represents the state of nesting within SGML elements. See also
23: the <A HREF="HTStream.html">the generic Stream definition</A>
24: <P>
25: The following aspects are from Dan Connolly's suggestions: Binary search,
26: Strcutured object scheme basically, SGML content enum type.
27: <P>
28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a part
2.27 frystyk 29: of the <A HREF="http://www.w3.org/Library/">W3C Sample Code Library</A>.
2.11 frystyk 30: <PRE>
31: #ifndef SGML_H
2.1 timbl 32: #define SGML_H
33:
34: #include "HTStream.h"
2.17 frystyk 35: #include "HTStruct.h"
2.16 frystyk 36: </PRE>
2.25 frystyk 37: <H2>
2.28 frystyk 38: SGML Content Types
2.25 frystyk 39: </H2>
2.28 frystyk 40: <PRE>
41: typedef enum _SGMLContent{
2.25 frystyk 42: SGML_EMPTY, /* no content */
43: SGML_LITERAL, /* character data. Recognized exact close tag only.
44: Old www server compatibility only! Not SGML */
45: SGML_CDATA, /* character data. recognize </ only */
46: SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
47: SGML_MIXED, /* elements and parsed character data. recognize all markup */
48: SGML_ELEMENT /* any data found will be returned as an error*/
2.28 frystyk 49: } SGMLContent;
50: </PRE>
51: <H2>
52: Attribute Types
53: </H2>
54: <P>
55: Describes the SGML tag attribute
56: <PRE>
57: typedef struct _HTAttr {
2.1 timbl 58: char * name; /* The (constant) name of the attribute */
59: /* Could put type info in here */
2.28 frystyk 60: } HTAttr;
2.1 timbl 61:
2.28 frystyk 62: extern char * HTAttr_name (HTAttr * attr);
63: </PRE>
64: <H2>
65: Tag Structure Describing SGML Elements
66: </H2>
67: <DL>
68: <DT>
69: name
70: <DD>
71: is the string which comes after the tag opener "<".
72: <DT>
73: attributes
74: <DD>
75: points to a zero-terminated array of attribute names.
76: <DT>
77: litteral
78: <DD>
79: determines how the SGML engine parses the charaters within the element. If
80: set, tag openers are ignored except for that which opens a matching closing
81: tag.
82: </DL>
83: <PRE>
84: typedef struct _HTTag {
85: char * name; /* The name of the tag */
86: HTAttr * attributes; /* The list of acceptable attributes */
2.1 timbl 87: int number_of_attributes; /* Number of possible attributes */
88: SGMLContent contents; /* End only on end tag @@ */
2.28 frystyk 89: } HTTag;
90:
91: extern char * HTTag_name (HTTag * tag);
92: extern SGMLContent HTTag_content (HTTag * tag);
93: extern int HTTag_attributes (HTTag * tag);
94: extern char * HTTag_attributeName (HTTag * tag, int attribute_number);
95: </PRE>
96: <H2>
97: DTD Information
98: </H2>
99: <P>
100: Not the whole DTD, but all this parser uses of it.
101: <PRE>
102: #define MAX_ATTRIBUTES 20 /* Max number of attributes per element */
2.1 timbl 103:
104: typedef struct {
2.2 timbl 105: HTTag * tags; /* Must be in strcmp order by name */
106: int number_of_tags;
2.23 frystyk 107: const char ** entity_names; /* Must be in strcmp order by name */
2.2 timbl 108: int number_of_entities;
2.1 timbl 109: } SGML_dtd;
110:
2.28 frystyk 111: extern HTTag * SGML_findTag (SGML_dtd * dtd, int element_number);
112: extern char * SGML_findTagName (SGML_dtd * dtd, int element_number);
113: extern SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number);
2.29 ! frystyk 114: extern int SGML_findElementNumber(SGML_dtd *dtd, char *name_element);
2.17 frystyk 115: </PRE>
2.25 frystyk 116: <H2>
2.28 frystyk 117: Create an SGML Parser Instance
2.25 frystyk 118: </H2>
2.28 frystyk 119: <P>
120: Create an SGML parser instance which converts a stream to a structured stream.
121: <PRE>
2.25 frystyk 122: extern HTStream * SGML_new (const SGML_dtd * dtd,
123: HTStructured * target);
2.28 frystyk 124: </PRE>
125: <PRE>
2.1 timbl 126: #endif /* SGML_H */
2.25 frystyk 127: </PRE>
2.28 frystyk 128: <P>
129: <HR>
130: <ADDRESS>
2.29 ! frystyk 131: @(#) $Id: SGML.html,v 2.28 1999/01/06 15:38:48 frystyk Exp $
2.28 frystyk 132: </ADDRESS>
2.25 frystyk 133: </BODY></HTML>
Webmaster