Annotation of libwww/Library/src/SGML.html, revision 2.5

2.4       timbl       1: <HTML>
                      2: <HEAD>
                      3: <TITLE>SGML parse and stream definition for libwww</TITLE></HEAD>
2.1       timbl       4: <BODY>
                      5: <H1>SGML and Structured streams</H1>The SGML parser is a state machine.
                      6: It is called for every character<P>
                      7: of the input stream. The DTD data
                      8: structure contains pointers<P>
                      9: to functions which are called to
                     10: implement the actual effect of the<P>
                     11: text read. When these functions are
                     12: called, the attribute structures
                     13: pointed to by the DTD are valid,
                     14: and the function is passed a pointer
                     15: to the curent tag structure, and
                     16: an "element stack" which represents
                     17: the state of nesting within SGML
                     18: elements.<P>
                     19: The following aspects are from Dan
                     20: Connolly's suggestions:  Binary search,
                     21: Strcutured object scheme basically,
                     22: SGML content enum type.<P>
                     23: (c) Copyright CERN 1991 - See Copyright.html
                     24: <PRE>#ifndef SGML_H
                     25: #define SGML_H
                     26: 
                     27: #include "HTUtils.h"
                     28: #include "HTStream.h"
                     29: 
                     30: </PRE>
                     31: <H2>SGML content types</H2>
                     32: <PRE>typedef enum _SGMLContent{
                     33:   SGML_EMPTY,    /* no content */
                     34:   SGML_LITTERAL, /* character data. Recognised excat close tag only. litteral
                     35:                    Old www server compatibility only! Not SGML */
                     36:   SGML_CDATA,    /* character data. recognize &lt;/ only */
                     37:   SGML_RCDATA,   /* replaceable character data. recognize &lt;/ and &amp;ref; */
                     38:   SGML_MIXED,    /* elements and parsed character data. recognize all markup */
                     39:   SGML_ELEMENT   /* any data found will be returned as an error*/
                     40:   } SGMLContent;
                     41: 
                     42: 
                     43: typedef struct {
                     44:     char *     name;           /* The (constant) name of the attribute */
                     45:                                /* Could put type info in here */
                     46: } attr;
                     47: 
                     48: 
                     49: /*             A tag structure describes an SGML element.
                     50: **             -----------------------------------------
                     51: **
                     52: **
                     53: **     name            is the string which comes after the tag opener "&lt;".
                     54: **
                     55: **     attributes      points to a zero-terminated array
                     56: **                     of attribute names.
                     57: **
                     58: **     litteral        determines how the SGML engine parses the charaters
                     59: **                     within the element. If set, tag openers are ignored
                     60: **                     except for that which opens a matching closing tag.
                     61: **
                     62: */
                     63: typedef struct _tag HTTag;
                     64: struct _tag{
                     65:     char *     name;                   /* The name of the tag */
                     66:     attr *     attributes;             /* The list of acceptable attributes */
                     67:     int                number_of_attributes;   /* Number of possible attributes */
                     68:     SGMLContent contents;              /* End only on end tag @@ */            
                     69: };
                     70: 
                     71: 
                     72: 
                     73: 
                     74: /*             DTD Information
                     75: **             ---------------
                     76: **
                     77: ** Not the whole DTD, but all this parser usues of it.
                     78: */
                     79: typedef struct {
2.2       timbl      80:     HTTag *            tags;           /* Must be in strcmp order by name */ 
                     81:     int                        number_of_tags;
                     82:     CONST char **      entity_names;   /* Must be in strcmp order by name */
                     83:     int                        number_of_entities;
2.1       timbl      84: } SGML_dtd;
                     85: 
                     86: 
                     87: /*     SGML context passed to parsers
                     88: */
                     89: typedef struct _HTSGMLContext *HTSGMLContext;  /* Hidden */
                     90: 
                     91: 
                     92: /*__________________________________________________________________________
                     93: */
                     94: /*             Structured Object definition
                     95: **
                     96: **     A structured object is something which can reasonably be
                     97: **     represented in SGML.  I'll rephrase that.  A structured
                     98: **     object is am ordered tree-structured arrangement of data
                     99: **     which is representable as text.
                    100: **
                    101: **     The SGML parer outputs to a Structured object. 
                    102: **     A Structured object can output its contents
                    103: **     to another Structured Object. 
                    104: **     It's a kind of typed stream.  The architecure
                    105: **     is largely Dan Conolly's.
                    106: **     Elements and entities are passed to the sob by number, implying
                    107: **     a knowledge of the DTD.
                    108: **     Knowledge of the SGML syntax is not here, though.
                    109: **
                    110: **     Superclass: HTStream
                    111: */
                    112: 
                    113: 
                    114: /*     The creation methods will vary on the type of Structured Object.
                    115: **     Maybe the callerData is enough info to pass along.
                    116: */
                    117: 
                    118: typedef struct _HTStructured HTStructured;
                    119: 
                    120: typedef struct _HTStructuredClass{
                    121: 
                    122:        char*  name;                            /* Just for diagnostics */
                    123: 
                    124:        void (*free) PARAMS((
                    125:                HTStructured*   me));
                    126: 
2.4       timbl     127:        void (*abort) PARAMS((
2.5     ! timbl     128:                HTStructured*   me,
        !           129:                HTError         e));
2.1       timbl     130:                
                    131:        void (*put_character) PARAMS((
                    132:                HTStructured*   me,
                    133:                char            ch));
                    134:                                
                    135:        void (*put_string) PARAMS((
                    136:                HTStructured*   me,
                    137:                CONST char *    str));
                    138:                
                    139:        void (*write) PARAMS((
2.2       timbl     140:                HTStructured*   me,
2.1       timbl     141:                CONST char *    str,
                    142:                int             len));
                    143:                
                    144:        void (*start_element) PARAMS((
                    145:                HTStructured*   me,
                    146:                int             element_number,
2.2       timbl     147:                CONST BOOL*             attribute_present,
                    148:                CONST char**            attribute_value));
2.1       timbl     149:                
                    150:        void (*end_element) PARAMS((
                    151:                HTStructured*   me,
                    152:                int             element_number));
                    153: 
                    154:        void (*put_entity) PARAMS((
                    155:                HTStructured*   me,
                    156:                int             entity_number));
                    157:                
                    158: }HTStructuredClass;
                    159: 
                    160: 
                    161: 
                    162: /*     Create an SGML parser
                    163: **
                    164: ** On entry,
                    165: **     dtd             must point to a DTD structure as defined above
                    166: **     callbacks       must point to user routines.
                    167: **     callData        is returned in callbacks transparently.
                    168: ** On exit,
                    169: **             The default tag starter has been processed.
                    170: */
                    171: 
                    172: 
                    173: extern HTStream* SGML_new PARAMS((
                    174:        CONST SGML_dtd *                dtd,
                    175:        HTStructured *          target));
                    176: 
2.2       timbl     177: extern CONST HTStreamClass SGMLParser;
2.1       timbl     178: 
                    179: 
                    180: #endif /* SGML_H */
                    181: 
2.3       timbl     182: 
                    183: 
2.4       timbl     184: 
                    185: </BODY>
                    186: </HTML>

Webmaster