Annotation of libwww/Library/src/SGML.html, revision 2.31
2.4 timbl 1: <HTML>
2: <HEAD>
2.25 frystyk 3: <!-- Changed by: Henrik Frystyk Nielsen, 1-Jun-1996 -->
2.26 frystyk 4: <TITLE>W3C Sample Code Library libwww SGML Parser</TITLE>
2.14 frystyk 5: </HEAD>
2.1 timbl 6: <BODY>
2.25 frystyk 7: <H1>
8: SGML Parser
9: </H1>
2.14 frystyk 10: <PRE>
11: /*
2.18 frystyk 12: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 13: ** Please first read the full copyright statement in the file COPYRIGH.
14: */
15: </PRE>
2.25 frystyk 16: <P>
17: The SGML parser is a state machine. It is called for every character of the
18: input stream. The DTD data structure contains pointers to functions which
19: are called to implement the actual effect of the text read. When these functions
20: are called, the attribute structures pointed to by the DTD are valid, and
21: the function is parsed a pointer to the curent tag structure, and an "element
22: stack" which represents the state of nesting within SGML elements. See also
23: the <A HREF="HTStream.html">the generic Stream definition</A>
24: <P>
25: The following aspects are from Dan Connolly's suggestions: Binary search,
26: Strcutured object scheme basically, SGML content enum type.
27: <P>
28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a part
2.27 frystyk 29: of the <A HREF="http://www.w3.org/Library/">W3C Sample Code Library</A>.
2.11 frystyk 30: <PRE>
31: #ifndef SGML_H
2.1 timbl 32: #define SGML_H
33:
34: #include "HTStream.h"
2.17 frystyk 35: #include "HTStruct.h"
2.31 ! vbancrof 36:
! 37: #ifdef __cplusplus
! 38: extern "C" {
! 39: #endif
2.16 frystyk 40: </PRE>
2.25 frystyk 41: <H2>
2.28 frystyk 42: SGML Content Types
2.25 frystyk 43: </H2>
2.28 frystyk 44: <PRE>
45: typedef enum _SGMLContent{
2.25 frystyk 46: SGML_EMPTY, /* no content */
47: SGML_LITERAL, /* character data. Recognized exact close tag only.
48: Old www server compatibility only! Not SGML */
49: SGML_CDATA, /* character data. recognize </ only */
50: SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
51: SGML_MIXED, /* elements and parsed character data. recognize all markup */
52: SGML_ELEMENT /* any data found will be returned as an error*/
2.28 frystyk 53: } SGMLContent;
54: </PRE>
55: <H2>
56: Attribute Types
57: </H2>
58: <P>
59: Describes the SGML tag attribute
60: <PRE>
61: typedef struct _HTAttr {
2.1 timbl 62: char * name; /* The (constant) name of the attribute */
63: /* Could put type info in here */
2.28 frystyk 64: } HTAttr;
2.1 timbl 65:
2.28 frystyk 66: extern char * HTAttr_name (HTAttr * attr);
67: </PRE>
68: <H2>
69: Tag Structure Describing SGML Elements
70: </H2>
71: <DL>
72: <DT>
73: name
74: <DD>
75: is the string which comes after the tag opener "<".
76: <DT>
77: attributes
78: <DD>
79: points to a zero-terminated array of attribute names.
80: <DT>
81: litteral
82: <DD>
83: determines how the SGML engine parses the charaters within the element. If
84: set, tag openers are ignored except for that which opens a matching closing
85: tag.
86: </DL>
87: <PRE>
88: typedef struct _HTTag {
89: char * name; /* The name of the tag */
90: HTAttr * attributes; /* The list of acceptable attributes */
2.1 timbl 91: int number_of_attributes; /* Number of possible attributes */
92: SGMLContent contents; /* End only on end tag @@ */
2.28 frystyk 93: } HTTag;
94:
95: extern char * HTTag_name (HTTag * tag);
96: extern SGMLContent HTTag_content (HTTag * tag);
97: extern int HTTag_attributes (HTTag * tag);
98: extern char * HTTag_attributeName (HTTag * tag, int attribute_number);
99: </PRE>
100: <H2>
101: DTD Information
102: </H2>
103: <P>
104: Not the whole DTD, but all this parser uses of it.
105: <PRE>
2.30 frystyk 106: #define MAX_ATTRIBUTES 32 /* Max number of attributes per element */
2.1 timbl 107:
108: typedef struct {
2.2 timbl 109: HTTag * tags; /* Must be in strcmp order by name */
110: int number_of_tags;
2.23 frystyk 111: const char ** entity_names; /* Must be in strcmp order by name */
2.2 timbl 112: int number_of_entities;
2.1 timbl 113: } SGML_dtd;
114:
2.28 frystyk 115: extern HTTag * SGML_findTag (SGML_dtd * dtd, int element_number);
116: extern char * SGML_findTagName (SGML_dtd * dtd, int element_number);
117: extern SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number);
2.29 frystyk 118: extern int SGML_findElementNumber(SGML_dtd *dtd, char *name_element);
2.17 frystyk 119: </PRE>
2.25 frystyk 120: <H2>
2.28 frystyk 121: Create an SGML Parser Instance
2.25 frystyk 122: </H2>
2.28 frystyk 123: <P>
124: Create an SGML parser instance which converts a stream to a structured stream.
125: <PRE>
2.25 frystyk 126: extern HTStream * SGML_new (const SGML_dtd * dtd,
127: HTStructured * target);
2.28 frystyk 128: </PRE>
129: <PRE>
2.31 ! vbancrof 130: #ifdef __cplusplus
! 131: }
! 132: #endif
! 133:
2.1 timbl 134: #endif /* SGML_H */
2.25 frystyk 135: </PRE>
2.28 frystyk 136: <P>
137: <HR>
138: <ADDRESS>
2.31 ! vbancrof 139: @(#) $Id: SGML.html,v 2.30 1999/04/02 16:10:36 frystyk Exp $
2.28 frystyk 140: </ADDRESS>
2.25 frystyk 141: </BODY></HTML>
Webmaster