Annotation of libwww/Library/src/SGML.html, revision 2.28
2.4 timbl 1: <HTML>
2: <HEAD>
2.25 frystyk 3: <!-- Changed by: Henrik Frystyk Nielsen, 1-Jun-1996 -->
2.26 frystyk 4: <TITLE>W3C Sample Code Library libwww SGML Parser</TITLE>
2.14 frystyk 5: </HEAD>
2.1 timbl 6: <BODY>
2.25 frystyk 7: <H1>
8: SGML Parser
9: </H1>
2.14 frystyk 10: <PRE>
11: /*
2.18 frystyk 12: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 13: ** Please first read the full copyright statement in the file COPYRIGH.
14: */
15: </PRE>
2.25 frystyk 16: <P>
17: The SGML parser is a state machine. It is called for every character of the
18: input stream. The DTD data structure contains pointers to functions which
19: are called to implement the actual effect of the text read. When these functions
20: are called, the attribute structures pointed to by the DTD are valid, and
21: the function is parsed a pointer to the curent tag structure, and an "element
22: stack" which represents the state of nesting within SGML elements. See also
23: the <A HREF="HTStream.html">the generic Stream definition</A>
24: <P>
25: The following aspects are from Dan Connolly's suggestions: Binary search,
26: Strcutured object scheme basically, SGML content enum type.
27: <P>
28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a part
2.27 frystyk 29: of the <A HREF="http://www.w3.org/Library/">W3C Sample Code Library</A>.
2.11 frystyk 30: <PRE>
31: #ifndef SGML_H
2.1 timbl 32: #define SGML_H
33:
34: #include "HTStream.h"
2.17 frystyk 35: #include "HTStruct.h"
2.16 frystyk 36: </PRE>
2.25 frystyk 37: <H2>
2.28 ! frystyk 38: SGML Content Types
2.25 frystyk 39: </H2>
2.28 ! frystyk 40: <PRE>
! 41: typedef enum _SGMLContent{
2.25 frystyk 42: SGML_EMPTY, /* no content */
43: SGML_LITERAL, /* character data. Recognized exact close tag only.
44: Old www server compatibility only! Not SGML */
45: SGML_CDATA, /* character data. recognize </ only */
46: SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
47: SGML_MIXED, /* elements and parsed character data. recognize all markup */
48: SGML_ELEMENT /* any data found will be returned as an error*/
2.28 ! frystyk 49: } SGMLContent;
! 50: </PRE>
! 51: <H2>
! 52: Attribute Types
! 53: </H2>
! 54: <P>
! 55: Describes the SGML tag attribute
! 56: <PRE>
! 57: typedef struct _HTAttr {
2.1 timbl 58: char * name; /* The (constant) name of the attribute */
59: /* Could put type info in here */
2.28 ! frystyk 60: } HTAttr;
2.1 timbl 61:
2.28 ! frystyk 62: extern char * HTAttr_name (HTAttr * attr);
! 63: </PRE>
! 64: <H2>
! 65: Tag Structure Describing SGML Elements
! 66: </H2>
! 67: <DL>
! 68: <DT>
! 69: name
! 70: <DD>
! 71: is the string which comes after the tag opener "<".
! 72: <DT>
! 73: attributes
! 74: <DD>
! 75: points to a zero-terminated array of attribute names.
! 76: <DT>
! 77: litteral
! 78: <DD>
! 79: determines how the SGML engine parses the charaters within the element. If
! 80: set, tag openers are ignored except for that which opens a matching closing
! 81: tag.
! 82: </DL>
! 83: <PRE>
! 84: typedef struct _HTTag {
! 85: char * name; /* The name of the tag */
! 86: HTAttr * attributes; /* The list of acceptable attributes */
2.1 timbl 87: int number_of_attributes; /* Number of possible attributes */
88: SGMLContent contents; /* End only on end tag @@ */
2.28 ! frystyk 89: } HTTag;
! 90:
! 91: extern char * HTTag_name (HTTag * tag);
! 92: extern SGMLContent HTTag_content (HTTag * tag);
! 93: extern int HTTag_attributes (HTTag * tag);
! 94: extern char * HTTag_attributeName (HTTag * tag, int attribute_number);
! 95: </PRE>
! 96: <H2>
! 97: DTD Information
! 98: </H2>
! 99: <P>
! 100: Not the whole DTD, but all this parser uses of it.
! 101: <PRE>
! 102: #define MAX_ATTRIBUTES 20 /* Max number of attributes per element */
2.1 timbl 103:
104: typedef struct {
2.2 timbl 105: HTTag * tags; /* Must be in strcmp order by name */
106: int number_of_tags;
2.23 frystyk 107: const char ** entity_names; /* Must be in strcmp order by name */
2.2 timbl 108: int number_of_entities;
2.1 timbl 109: } SGML_dtd;
110:
2.28 ! frystyk 111: extern HTTag * SGML_findTag (SGML_dtd * dtd, int element_number);
! 112: extern char * SGML_findTagName (SGML_dtd * dtd, int element_number);
! 113: extern SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number);
2.17 frystyk 114: </PRE>
2.25 frystyk 115: <H2>
2.28 ! frystyk 116: Create an SGML Parser Instance
2.25 frystyk 117: </H2>
2.28 ! frystyk 118: <P>
! 119: Create an SGML parser instance which converts a stream to a structured stream.
! 120: <PRE>
2.25 frystyk 121: extern HTStream * SGML_new (const SGML_dtd * dtd,
122: HTStructured * target);
2.28 ! frystyk 123: </PRE>
! 124: <PRE>
2.1 timbl 125: #endif /* SGML_H */
2.25 frystyk 126: </PRE>
2.28 ! frystyk 127: <P>
! 128: <HR>
! 129: <ADDRESS>
! 130: @(#) $Id: HTML.html,v 2.35 1998/05/14 02:10:42 frystyk Exp $
! 131: </ADDRESS>
2.25 frystyk 132: </BODY></HTML>
Webmaster