Annotation of libwww/Library/src/SGML.html, revision 2.1
2.1 ! timbl 1: <HEADER>
! 2: <TITLE>/Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/SGML.html</TITLE></HEADER>
! 3: <BODY>
! 4: <H1>SGML and Structured streams</H1>The SGML parser is a state machine.
! 5: It is called for every character<P>
! 6: of the input stream. The DTD data
! 7: structure contains pointers<P>
! 8: to functions which are called to
! 9: implement the actual effect of the<P>
! 10: text read. When these functions are
! 11: called, the attribute structures
! 12: pointed to by the DTD are valid,
! 13: and the function is passed a pointer
! 14: to the curent tag structure, and
! 15: an "element stack" which represents
! 16: the state of nesting within SGML
! 17: elements.<P>
! 18: The following aspects are from Dan
! 19: Connolly's suggestions: Binary search,
! 20: Strcutured object scheme basically,
! 21: SGML content enum type.<P>
! 22: (c) Copyright CERN 1991 - See Copyright.html
! 23: <PRE>#ifndef SGML_H
! 24: #define SGML_H
! 25:
! 26: #include "HTUtils.h"
! 27: #include "HTStream.h"
! 28:
! 29: </PRE>
! 30: <H2>SGML content types</H2>
! 31: <PRE>typedef enum _SGMLContent{
! 32: SGML_EMPTY, /* no content */
! 33: SGML_LITTERAL, /* character data. Recognised excat close tag only. litteral
! 34: Old www server compatibility only! Not SGML */
! 35: SGML_CDATA, /* character data. recognize </ only */
! 36: SGML_RCDATA, /* replaceable character data. recognize </ and &ref; */
! 37: SGML_MIXED, /* elements and parsed character data. recognize all markup */
! 38: SGML_ELEMENT /* any data found will be returned as an error*/
! 39: } SGMLContent;
! 40:
! 41:
! 42: typedef struct {
! 43: char * name; /* The (constant) name of the attribute */
! 44: /* Could put type info in here */
! 45: } attr;
! 46:
! 47:
! 48: /* A tag structure describes an SGML element.
! 49: ** -----------------------------------------
! 50: **
! 51: **
! 52: ** name is the string which comes after the tag opener "<".
! 53: **
! 54: ** attributes points to a zero-terminated array
! 55: ** of attribute names.
! 56: **
! 57: ** litteral determines how the SGML engine parses the charaters
! 58: ** within the element. If set, tag openers are ignored
! 59: ** except for that which opens a matching closing tag.
! 60: **
! 61: */
! 62: typedef struct _tag HTTag;
! 63: struct _tag{
! 64: char * name; /* The name of the tag */
! 65: attr * attributes; /* The list of acceptable attributes */
! 66: int number_of_attributes; /* Number of possible attributes */
! 67: SGMLContent contents; /* End only on end tag @@ */
! 68: };
! 69:
! 70:
! 71:
! 72:
! 73: /* DTD Information
! 74: ** ---------------
! 75: **
! 76: ** Not the whole DTD, but all this parser usues of it.
! 77: */
! 78: typedef struct {
! 79: HTTag * tags; /* Must be in strcmp order by name */
! 80: int number_of_tags;
! 81: char ** entity_names; /* Must be in strcmp order by name */
! 82: int number_of_entities;
! 83: } SGML_dtd;
! 84:
! 85:
! 86: /* SGML context passed to parsers
! 87: */
! 88: typedef struct _HTSGMLContext *HTSGMLContext; /* Hidden */
! 89:
! 90:
! 91: /*__________________________________________________________________________
! 92: */
! 93: /* Structured Object definition
! 94: **
! 95: ** A structured object is something which can reasonably be
! 96: ** represented in SGML. I'll rephrase that. A structured
! 97: ** object is am ordered tree-structured arrangement of data
! 98: ** which is representable as text.
! 99: **
! 100: ** The SGML parer outputs to a Structured object.
! 101: ** A Structured object can output its contents
! 102: ** to another Structured Object.
! 103: ** It's a kind of typed stream. The architecure
! 104: ** is largely Dan Conolly's.
! 105: ** Elements and entities are passed to the sob by number, implying
! 106: ** a knowledge of the DTD.
! 107: ** Knowledge of the SGML syntax is not here, though.
! 108: **
! 109: ** Superclass: HTStream
! 110: */
! 111:
! 112:
! 113: /* The creation methods will vary on the type of Structured Object.
! 114: ** Maybe the callerData is enough info to pass along.
! 115: */
! 116:
! 117: typedef struct _HTStructured HTStructured;
! 118:
! 119: typedef struct _HTStructuredClass{
! 120:
! 121: char* name; /* Just for diagnostics */
! 122:
! 123: void (*free) PARAMS((
! 124: HTStructured* me));
! 125:
! 126: void (*end_document) PARAMS((
! 127: HTStructured* me));
! 128:
! 129: void (*put_character) PARAMS((
! 130: HTStructured* me,
! 131: char ch));
! 132:
! 133: void (*put_string) PARAMS((
! 134: HTStructured* me,
! 135: CONST char * str));
! 136:
! 137: void (*write) PARAMS((
! 138: HTStream* me,
! 139: CONST char * str,
! 140: int len));
! 141:
! 142: void (*start_element) PARAMS((
! 143: HTStructured* me,
! 144: int element_number,
! 145: BOOL* attribute_present,
! 146: char** attribute_value));
! 147:
! 148: void (*end_element) PARAMS((
! 149: HTStructured* me,
! 150: int element_number));
! 151:
! 152: void (*put_entity) PARAMS((
! 153: HTStructured* me,
! 154: int entity_number));
! 155:
! 156: }HTStructuredClass;
! 157:
! 158:
! 159:
! 160: /* Create an SGML parser
! 161: **
! 162: ** On entry,
! 163: ** dtd must point to a DTD structure as defined above
! 164: ** callbacks must point to user routines.
! 165: ** callData is returned in callbacks transparently.
! 166: ** On exit,
! 167: ** The default tag starter has been processed.
! 168: */
! 169:
! 170:
! 171: extern HTStream* SGML_new PARAMS((
! 172: CONST SGML_dtd * dtd,
! 173: HTStructured * target));
! 174:
! 175: extern HTStreamClass SGMLParser;
! 176:
! 177:
! 178: #endif /* SGML_H */
! 179:
! 180: </BODY>
Webmaster