Annotation of libwww/Library/src/SGML.html, revision 2.25
2.4 timbl 1: <HTML>
2: <HEAD>
2.25 ! frystyk 3: <!-- Changed by: Henrik Frystyk Nielsen, 1-Jun-1996 -->
! 4: <TITLE>W3C Reference Library libwww SGML Parser</TITLE>
2.14 frystyk 5: </HEAD>
2.1 timbl 6: <BODY>
2.25 ! frystyk 7: <H1>
! 8: SGML Parser
! 9: </H1>
2.14 frystyk 10: <PRE>
11: /*
2.18 frystyk 12: ** (c) COPYRIGHT MIT 1995.
2.14 frystyk 13: ** Please first read the full copyright statement in the file COPYRIGH.
14: */
15: </PRE>
2.25 ! frystyk 16: <P>
! 17: The SGML parser is a state machine. It is called for every character of the
! 18: input stream. The DTD data structure contains pointers to functions which
! 19: are called to implement the actual effect of the text read. When these functions
! 20: are called, the attribute structures pointed to by the DTD are valid, and
! 21: the function is parsed a pointer to the curent tag structure, and an "element
! 22: stack" which represents the state of nesting within SGML elements. See also
! 23: the <A HREF="HTStream.html">the generic Stream definition</A>
! 24: <P>
! 25: The following aspects are from Dan Connolly's suggestions: Binary search,
! 26: Strcutured object scheme basically, SGML content enum type.
! 27: <P>
! 28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a part
! 29: of the <A HREF="http://www.w3.org/pub/WWW/Library/">W3C Reference Library</A>.
2.11 frystyk 30: <PRE>
31: #ifndef SGML_H
2.1 timbl 32: #define SGML_H
33:
34: #include "HTStream.h"
2.17 frystyk 35: #include "HTStruct.h"
2.16 frystyk 36: </PRE>
2.25 ! frystyk 37: <H2>
! 38: SGML content types
! 39: </H2>
2.1 timbl 40: <PRE>typedef enum _SGMLContent{
2.25 ! frystyk 41: SGML_EMPTY, /* no content */
! 42: SGML_LITERAL, /* character data. Recognized exact close tag only.
! 43: Old www server compatibility only! Not SGML */
! 44: SGML_CDATA, /* character data. recognize </ only */
! 45: SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
! 46: SGML_MIXED, /* elements and parsed character data. recognize all markup */
! 47: SGML_ELEMENT /* any data found will be returned as an error*/
2.1 timbl 48: } SGMLContent;
49:
50:
51: typedef struct {
52: char * name; /* The (constant) name of the attribute */
53: /* Could put type info in here */
54: } attr;
55:
56:
57: /* A tag structure describes an SGML element.
58: ** -----------------------------------------
59: **
60: **
61: ** name is the string which comes after the tag opener "<".
62: **
63: ** attributes points to a zero-terminated array
64: ** of attribute names.
65: **
66: ** litteral determines how the SGML engine parses the charaters
67: ** within the element. If set, tag openers are ignored
68: ** except for that which opens a matching closing tag.
69: **
70: */
71: typedef struct _tag HTTag;
72: struct _tag{
73: char * name; /* The name of the tag */
74: attr * attributes; /* The list of acceptable attributes */
75: int number_of_attributes; /* Number of possible attributes */
76: SGMLContent contents; /* End only on end tag @@ */
77: };
78:
79: /* DTD Information
80: ** ---------------
81: **
82: ** Not the whole DTD, but all this parser usues of it.
83: */
84: typedef struct {
2.2 timbl 85: HTTag * tags; /* Must be in strcmp order by name */
86: int number_of_tags;
2.23 frystyk 87: const char ** entity_names; /* Must be in strcmp order by name */
2.2 timbl 88: int number_of_entities;
2.1 timbl 89: } SGML_dtd;
90:
2.17 frystyk 91: #define MAX_ATTRIBUTES 20 /* Max number of attributes per element */
92: </PRE>
2.25 ! frystyk 93: <H2>
! 94: Create an SGML parser
! 95: </H2>
2.6 timbl 96: <PRE>/*
2.1 timbl 97: ** On entry,
98: ** dtd must point to a DTD structure as defined above
99: ** callbacks must point to user routines.
100: ** callData is returned in callbacks transparently.
101: ** On exit,
102: ** The default tag starter has been processed.
103: */
104:
105:
2.25 ! frystyk 106: extern HTStream * SGML_new (const SGML_dtd * dtd,
! 107: HTStructured * target);
2.1 timbl 108:
109: #endif /* SGML_H */
2.8 timbl 110:
2.25 ! frystyk 111: </PRE>
! 112: </BODY></HTML>
Webmaster