Annotation of libwww/Library/src/SGML.html, revision 2.1

2.1     ! timbl       1: <HEADER>
        !             2: <TITLE>/Net/dxcern/userd/timbl/hypertext/WWW/Library/Implementation/SGML.html</TITLE></HEADER>
        !             3: <BODY>
        !             4: <H1>SGML and Structured streams</H1>The SGML parser is a state machine.
        !             5: It is called for every character<P>
        !             6: of the input stream. The DTD data
        !             7: structure contains pointers<P>
        !             8: to functions which are called to
        !             9: implement the actual effect of the<P>
        !            10: text read. When these functions are
        !            11: called, the attribute structures
        !            12: pointed to by the DTD are valid,
        !            13: and the function is passed a pointer
        !            14: to the curent tag structure, and
        !            15: an "element stack" which represents
        !            16: the state of nesting within SGML
        !            17: elements.<P>
        !            18: The following aspects are from Dan
        !            19: Connolly's suggestions:  Binary search,
        !            20: Strcutured object scheme basically,
        !            21: SGML content enum type.<P>
        !            22: (c) Copyright CERN 1991 - See Copyright.html
        !            23: <PRE>#ifndef SGML_H
        !            24: #define SGML_H
        !            25: 
        !            26: #include "HTUtils.h"
        !            27: #include "HTStream.h"
        !            28: 
        !            29: </PRE>
        !            30: <H2>SGML content types</H2>
        !            31: <PRE>typedef enum _SGMLContent{
        !            32:   SGML_EMPTY,    /* no content */
        !            33:   SGML_LITTERAL, /* character data. Recognised excat close tag only. litteral
        !            34:                    Old www server compatibility only! Not SGML */
        !            35:   SGML_CDATA,    /* character data. recognize &lt;/ only */
        !            36:   SGML_RCDATA,   /* replaceable character data. recognize &lt;/ and &amp;ref; */
        !            37:   SGML_MIXED,    /* elements and parsed character data. recognize all markup */
        !            38:   SGML_ELEMENT   /* any data found will be returned as an error*/
        !            39:   } SGMLContent;
        !            40: 
        !            41: 
        !            42: typedef struct {
        !            43:     char *     name;           /* The (constant) name of the attribute */
        !            44:                                /* Could put type info in here */
        !            45: } attr;
        !            46: 
        !            47: 
        !            48: /*             A tag structure describes an SGML element.
        !            49: **             -----------------------------------------
        !            50: **
        !            51: **
        !            52: **     name            is the string which comes after the tag opener "&lt;".
        !            53: **
        !            54: **     attributes      points to a zero-terminated array
        !            55: **                     of attribute names.
        !            56: **
        !            57: **     litteral        determines how the SGML engine parses the charaters
        !            58: **                     within the element. If set, tag openers are ignored
        !            59: **                     except for that which opens a matching closing tag.
        !            60: **
        !            61: */
        !            62: typedef struct _tag HTTag;
        !            63: struct _tag{
        !            64:     char *     name;                   /* The name of the tag */
        !            65:     attr *     attributes;             /* The list of acceptable attributes */
        !            66:     int                number_of_attributes;   /* Number of possible attributes */
        !            67:     SGMLContent contents;              /* End only on end tag @@ */            
        !            68: };
        !            69: 
        !            70: 
        !            71: 
        !            72: 
        !            73: /*             DTD Information
        !            74: **             ---------------
        !            75: **
        !            76: ** Not the whole DTD, but all this parser usues of it.
        !            77: */
        !            78: typedef struct {
        !            79:     HTTag *    tags;                   /* Must be in strcmp order by name */ 
        !            80:     int                number_of_tags;
        !            81:     char **    entity_names;           /* Must be in strcmp order by name */
        !            82:     int                number_of_entities;
        !            83: } SGML_dtd;
        !            84: 
        !            85: 
        !            86: /*     SGML context passed to parsers
        !            87: */
        !            88: typedef struct _HTSGMLContext *HTSGMLContext;  /* Hidden */
        !            89: 
        !            90: 
        !            91: /*__________________________________________________________________________
        !            92: */
        !            93: /*             Structured Object definition
        !            94: **
        !            95: **     A structured object is something which can reasonably be
        !            96: **     represented in SGML.  I'll rephrase that.  A structured
        !            97: **     object is am ordered tree-structured arrangement of data
        !            98: **     which is representable as text.
        !            99: **
        !           100: **     The SGML parer outputs to a Structured object. 
        !           101: **     A Structured object can output its contents
        !           102: **     to another Structured Object. 
        !           103: **     It's a kind of typed stream.  The architecure
        !           104: **     is largely Dan Conolly's.
        !           105: **     Elements and entities are passed to the sob by number, implying
        !           106: **     a knowledge of the DTD.
        !           107: **     Knowledge of the SGML syntax is not here, though.
        !           108: **
        !           109: **     Superclass: HTStream
        !           110: */
        !           111: 
        !           112: 
        !           113: /*     The creation methods will vary on the type of Structured Object.
        !           114: **     Maybe the callerData is enough info to pass along.
        !           115: */
        !           116: 
        !           117: typedef struct _HTStructured HTStructured;
        !           118: 
        !           119: typedef struct _HTStructuredClass{
        !           120: 
        !           121:        char*  name;                            /* Just for diagnostics */
        !           122: 
        !           123:        void (*free) PARAMS((
        !           124:                HTStructured*   me));
        !           125: 
        !           126:        void (*end_document) PARAMS((
        !           127:                HTStructured*   me));
        !           128:                
        !           129:        void (*put_character) PARAMS((
        !           130:                HTStructured*   me,
        !           131:                char            ch));
        !           132:                                
        !           133:        void (*put_string) PARAMS((
        !           134:                HTStructured*   me,
        !           135:                CONST char *    str));
        !           136:                
        !           137:        void (*write) PARAMS((
        !           138:                HTStream*       me,
        !           139:                CONST char *    str,
        !           140:                int             len));
        !           141:                
        !           142:        void (*start_element) PARAMS((
        !           143:                HTStructured*   me,
        !           144:                int             element_number,
        !           145:                BOOL*           attribute_present,
        !           146:                char**          attribute_value));
        !           147:                
        !           148:        void (*end_element) PARAMS((
        !           149:                HTStructured*   me,
        !           150:                int             element_number));
        !           151: 
        !           152:        void (*put_entity) PARAMS((
        !           153:                HTStructured*   me,
        !           154:                int             entity_number));
        !           155:                
        !           156: }HTStructuredClass;
        !           157: 
        !           158: 
        !           159: 
        !           160: /*     Create an SGML parser
        !           161: **
        !           162: ** On entry,
        !           163: **     dtd             must point to a DTD structure as defined above
        !           164: **     callbacks       must point to user routines.
        !           165: **     callData        is returned in callbacks transparently.
        !           166: ** On exit,
        !           167: **             The default tag starter has been processed.
        !           168: */
        !           169: 
        !           170: 
        !           171: extern HTStream* SGML_new PARAMS((
        !           172:        CONST SGML_dtd *                dtd,
        !           173:        HTStructured *          target));
        !           174: 
        !           175: extern HTStreamClass SGMLParser;
        !           176: 
        !           177: 
        !           178: #endif /* SGML_H */
        !           179: 
        !           180: </BODY>

Webmaster