Annotation of libwww/Library/src/SGML.html, revision 2.14

2.4       timbl       1: <HTML>
                      2: <HEAD>
2.14    ! frystyk     3: <TITLE>SGML parse and stream definition for libwww</TITLE>
        !             4: </HEAD>
2.1       timbl       5: <BODY>
2.11      frystyk     6: 
2.14    ! frystyk     7: <H1>SGML and Structured streams</H1>
2.11      frystyk     8: 
2.14    ! frystyk     9: <PRE>
        !            10: /*
        !            11: **     (c) COPYRIGHT CERN 1994.
        !            12: **     Please first read the full copyright statement in the file COPYRIGH.
        !            13: */
        !            14: </PRE>
        !            15: 
        !            16: The SGML parser is a state machine.  It is called for every character
        !            17: of the input stream. The DTD data structure contains pointers to
        !            18: functions which are called to implement the actual effect of the text
        !            19: read. When these functions are called, the attribute structures
        !            20: pointed to by the DTD are valid, and the function is parsed a pointer
        !            21: to the curent tag structure, and an "element stack" which represents
        !            22: the state of nesting within SGML elements. See also the <A
        !            23: HREF="HTStream.html">the generic Stream definition</A><P>
        !            24: 
        !            25: The following aspects are from Dan Connolly's suggestions: Binary
        !            26: search, Strcutured object scheme basically, SGML content enum type.<P>
        !            27: 
        !            28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a
        !            29: part of the <A
        !            30: HREF="http://info.cern.ch/hypertext/WWW/Library/User/Guide/Guide.html">
        !            31: Library of Common Code</A>.
2.11      frystyk    32: 
                     33: <PRE>
                     34: #ifndef SGML_H
2.1       timbl      35: #define SGML_H
                     36: 
                     37: #include "HTUtils.h"
                     38: #include "HTStream.h"
                     39: 
                     40: </PRE>
                     41: <H2>SGML content types</H2>
                     42: <PRE>typedef enum _SGMLContent{
                     43:   SGML_EMPTY,    /* no content */
2.8       timbl      44:   SGML_LITERAL, /* character data. Recognized exact close tag only.
2.1       timbl      45:                    Old www server compatibility only! Not SGML */
                     46:   SGML_CDATA,    /* character data. recognize &lt;/ only */
                     47:   SGML_RCDATA,   /* replaceable character data. recognize &lt;/ and &amp;ref; */
                     48:   SGML_MIXED,    /* elements and parsed character data. recognize all markup */
                     49:   SGML_ELEMENT   /* any data found will be returned as an error*/
                     50:   } SGMLContent;
                     51: 
                     52: 
                     53: typedef struct {
                     54:     char *     name;           /* The (constant) name of the attribute */
                     55:                                /* Could put type info in here */
                     56: } attr;
                     57: 
                     58: 
                     59: /*             A tag structure describes an SGML element.
                     60: **             -----------------------------------------
                     61: **
                     62: **
                     63: **     name            is the string which comes after the tag opener "&lt;".
                     64: **
                     65: **     attributes      points to a zero-terminated array
                     66: **                     of attribute names.
                     67: **
                     68: **     litteral        determines how the SGML engine parses the charaters
                     69: **                     within the element. If set, tag openers are ignored
                     70: **                     except for that which opens a matching closing tag.
                     71: **
                     72: */
                     73: typedef struct _tag HTTag;
                     74: struct _tag{
                     75:     char *     name;                   /* The name of the tag */
                     76:     attr *     attributes;             /* The list of acceptable attributes */
                     77:     int                number_of_attributes;   /* Number of possible attributes */
                     78:     SGMLContent contents;              /* End only on end tag @@ */            
                     79: };
                     80: 
                     81: 
                     82: 
                     83: 
                     84: /*             DTD Information
                     85: **             ---------------
                     86: **
                     87: ** Not the whole DTD, but all this parser usues of it.
                     88: */
                     89: typedef struct {
2.2       timbl      90:     HTTag *            tags;           /* Must be in strcmp order by name */ 
                     91:     int                        number_of_tags;
                     92:     CONST char **      entity_names;   /* Must be in strcmp order by name */
                     93:     int                        number_of_entities;
2.1       timbl      94: } SGML_dtd;
                     95: 
2.9       frystyk    96: #define MAX_ATTRIBUTES 20      /* Max number of attributes per element */
2.1       timbl      97: 
                     98: /*     SGML context passed to parsers
                     99: */
                    100: typedef struct _HTSGMLContext *HTSGMLContext;  /* Hidden */
                    101: 
                    102: 
                    103: /*__________________________________________________________________________
                    104: */
                    105: 
2.6       timbl     106: </PRE>
                    107: <H2>Structured Object definition</H2>A structured object is something
                    108: which can reasonably be represented
                    109: in SGML.  I'll rephrase that.  A
                    110: structured object is am ordered tree-structured
                    111: arrangement of data which is representable
                    112: as text.The SGML parer outputs to
                    113: a Structured object. A Structured
                    114: object can output its contents to
                    115: another Structured Object. It's a
                    116: kind of typed stream. The architecure
                    117: is largely Dan Conolly's. Elements
                    118: and entities are passed to the sob
                    119: by number, implying a knowledge of
                    120: the DTD. Knowledge of the SGML syntax
                    121: is not here, though.<P>
                    122: Superclass: HTStream<P>
                    123: The creation methods will vary on
                    124: the type of Structured Object.Maybe
                    125: the callerData is enough info to
2.13      frystyk   126: pass along. <P>
                    127: 
                    128: <B>NOTE: </B>The <CODE>put_block</CODE> method was <CODE>write</CODE>,
                    129: but this upset systems which had macros for <CODE>write()</CODE>. See
                    130: <A HREF="HTStream.html">the generic stream definition</A> for valid
                    131: return codes.<P>
                    132: 
2.6       timbl     133: <PRE>typedef struct _HTStructured HTStructured;
2.1       timbl     134: 
                    135: typedef struct _HTStructuredClass{
                    136: 
                    137:        char*  name;                            /* Just for diagnostics */
                    138: 
2.13      frystyk   139:        int (*_free) PARAMS((
2.1       timbl     140:                HTStructured*   me));
                    141: 
2.13      frystyk   142:        int (*abort) PARAMS((
2.5       timbl     143:                HTStructured*   me,
                    144:                HTError         e));
2.1       timbl     145:                
                    146:        void (*put_character) PARAMS((
                    147:                HTStructured*   me,
                    148:                char            ch));
                    149:                                
                    150:        void (*put_string) PARAMS((
                    151:                HTStructured*   me,
                    152:                CONST char *    str));
                    153:                
2.13      frystyk   154:        void (*put_block) PARAMS((
2.2       timbl     155:                HTStructured*   me,
2.1       timbl     156:                CONST char *    str,
                    157:                int             len));
                    158:                
                    159:        void (*start_element) PARAMS((
                    160:                HTStructured*   me,
                    161:                int             element_number,
2.2       timbl     162:                CONST BOOL*             attribute_present,
                    163:                CONST char**            attribute_value));
2.1       timbl     164:                
                    165:        void (*end_element) PARAMS((
                    166:                HTStructured*   me,
                    167:                int             element_number));
                    168: 
                    169:        void (*put_entity) PARAMS((
                    170:                HTStructured*   me,
                    171:                int             entity_number));
                    172:                
2.13      frystyk   173: } HTStructuredClass;
2.1       timbl     174: 
2.6       timbl     175: </PRE>
                    176: <H2>Find a Tag by Name</H2>Returns a pointer to the tag within
                    177: the DTD.
2.7       timbl     178: <PRE>extern HTTag * SGMLFindTag PARAMS((CONST SGML_dtd* dtd, CONST char * string));
2.1       timbl     179: 
                    180: 
2.6       timbl     181: </PRE>
2.10      timbl     182: <H2>Find a Attribute by Name</H2>Returns the number of the
                    183: atribute or -1 if failure.
                    184: <PRE>extern int SGMLFindAttribute PARAMS((HTTag* tag, CONST char * string));
                    185: 
                    186: 
                    187: </PRE>
2.6       timbl     188: <H2>Create an SGML parser</H2>
                    189: <PRE>/*
2.1       timbl     190: ** On entry,
                    191: **     dtd             must point to a DTD structure as defined above
                    192: **     callbacks       must point to user routines.
                    193: **     callData        is returned in callbacks transparently.
                    194: ** On exit,
                    195: **             The default tag starter has been processed.
                    196: */
                    197: 
                    198: 
                    199: extern HTStream* SGML_new PARAMS((
                    200:        CONST SGML_dtd *                dtd,
                    201:        HTStructured *          target));
                    202: 
2.2       timbl     203: extern CONST HTStreamClass SGMLParser;
2.1       timbl     204: 
                    205: 
                    206: #endif /* SGML_H */
2.7       timbl     207: 
2.1       timbl     208: 
2.3       timbl     209: 
                    210: 
2.4       timbl     211: 
2.6       timbl     212: 
                    213: 
                    214: 
                    215: 
2.8       timbl     216: 
                    217: </PRE></BODY>
2.4       timbl     218: </HTML>

Webmaster