Annotation of libwww/Library/src/SGML.html, revision 2.14.2.1

2.4       timbl       1: <HTML>
                      2: <HEAD>
2.14      frystyk     3: <TITLE>SGML parse and stream definition for libwww</TITLE>
                      4: </HEAD>
2.1       timbl       5: <BODY>
2.11      frystyk     6: 
2.14      frystyk     7: <H1>SGML and Structured streams</H1>
2.11      frystyk     8: 
2.14      frystyk     9: <PRE>
                     10: /*
                     11: **     (c) COPYRIGHT CERN 1994.
                     12: **     Please first read the full copyright statement in the file COPYRIGH.
                     13: */
                     14: </PRE>
                     15: 
                     16: The SGML parser is a state machine.  It is called for every character
                     17: of the input stream. The DTD data structure contains pointers to
                     18: functions which are called to implement the actual effect of the text
                     19: read. When these functions are called, the attribute structures
                     20: pointed to by the DTD are valid, and the function is parsed a pointer
                     21: to the curent tag structure, and an "element stack" which represents
                     22: the state of nesting within SGML elements. See also the <A
                     23: HREF="HTStream.html">the generic Stream definition</A><P>
                     24: 
                     25: The following aspects are from Dan Connolly's suggestions: Binary
                     26: search, Strcutured object scheme basically, SGML content enum type.<P>
                     27: 
                     28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a
                     29: part of the <A
                     30: HREF="http://info.cern.ch/hypertext/WWW/Library/User/Guide/Guide.html">
                     31: Library of Common Code</A>.
2.11      frystyk    32: 
                     33: <PRE>
                     34: #ifndef SGML_H
2.1       timbl      35: #define SGML_H
                     36: 
                     37: #include "HTStream.h"
                     38: </PRE>
2.14.2.1! frystyk    39: 
2.1       timbl      40: <H2>SGML content types</H2>
                     41: <PRE>typedef enum _SGMLContent{
                     42:   SGML_EMPTY,    /* no content */
2.8       timbl      43:   SGML_LITERAL, /* character data. Recognized exact close tag only.
2.1       timbl      44:                    Old www server compatibility only! Not SGML */
                     45:   SGML_CDATA,    /* character data. recognize &lt;/ only */
                     46:   SGML_RCDATA,   /* replaceable character data. recognize &lt;/ and &amp;ref; */
                     47:   SGML_MIXED,    /* elements and parsed character data. recognize all markup */
                     48:   SGML_ELEMENT   /* any data found will be returned as an error*/
                     49:   } SGMLContent;
                     50: 
                     51: 
                     52: typedef struct {
                     53:     char *     name;           /* The (constant) name of the attribute */
                     54:                                /* Could put type info in here */
                     55: } attr;
                     56: 
                     57: 
                     58: /*             A tag structure describes an SGML element.
                     59: **             -----------------------------------------
                     60: **
                     61: **
                     62: **     name            is the string which comes after the tag opener "&lt;".
                     63: **
                     64: **     attributes      points to a zero-terminated array
                     65: **                     of attribute names.
                     66: **
                     67: **     litteral        determines how the SGML engine parses the charaters
                     68: **                     within the element. If set, tag openers are ignored
                     69: **                     except for that which opens a matching closing tag.
                     70: **
                     71: */
                     72: typedef struct _tag HTTag;
                     73: struct _tag{
                     74:     char *     name;                   /* The name of the tag */
                     75:     attr *     attributes;             /* The list of acceptable attributes */
                     76:     int                number_of_attributes;   /* Number of possible attributes */
                     77:     SGMLContent contents;              /* End only on end tag @@ */            
                     78: };
                     79: 
                     80: /*             DTD Information
                     81: **             ---------------
                     82: **
                     83: ** Not the whole DTD, but all this parser usues of it.
                     84: */
                     85: typedef struct {
2.2       timbl      86:     HTTag *            tags;           /* Must be in strcmp order by name */ 
                     87:     int                        number_of_tags;
                     88:     CONST char **      entity_names;   /* Must be in strcmp order by name */
                     89:     int                        number_of_entities;
2.1       timbl      90: } SGML_dtd;
                     91: 
2.9       frystyk    92: #define MAX_ATTRIBUTES 20      /* Max number of attributes per element */
2.1       timbl      93: 
                     94: /*     SGML context passed to parsers
                     95: */
                     96: typedef struct _HTSGMLContext *HTSGMLContext;  /* Hidden */
                     97: 
                     98: 
                     99: /*__________________________________________________________________________
                    100: */
                    101: 
2.6       timbl     102: </PRE>
                    103: <H2>Structured Object definition</H2>A structured object is something
                    104: which can reasonably be represented
                    105: in SGML.  I'll rephrase that.  A
                    106: structured object is am ordered tree-structured
                    107: arrangement of data which is representable
                    108: as text.The SGML parer outputs to
                    109: a Structured object. A Structured
                    110: object can output its contents to
                    111: another Structured Object. It's a
                    112: kind of typed stream. The architecure
                    113: is largely Dan Conolly's. Elements
                    114: and entities are passed to the sob
                    115: by number, implying a knowledge of
                    116: the DTD. Knowledge of the SGML syntax
                    117: is not here, though.<P>
                    118: Superclass: HTStream<P>
                    119: The creation methods will vary on
                    120: the type of Structured Object.Maybe
                    121: the callerData is enough info to
2.13      frystyk   122: pass along. <P>
                    123: 
                    124: <B>NOTE: </B>The <CODE>put_block</CODE> method was <CODE>write</CODE>,
                    125: but this upset systems which had macros for <CODE>write()</CODE>. See
                    126: <A HREF="HTStream.html">the generic stream definition</A> for valid
                    127: return codes.<P>
                    128: 
2.6       timbl     129: <PRE>typedef struct _HTStructured HTStructured;
2.1       timbl     130: 
                    131: typedef struct _HTStructuredClass{
                    132: 
                    133:        char*  name;                            /* Just for diagnostics */
                    134: 
2.13      frystyk   135:        int (*_free) PARAMS((
2.1       timbl     136:                HTStructured*   me));
                    137: 
2.13      frystyk   138:        int (*abort) PARAMS((
2.5       timbl     139:                HTStructured*   me,
                    140:                HTError         e));
2.1       timbl     141:                
                    142:        void (*put_character) PARAMS((
                    143:                HTStructured*   me,
                    144:                char            ch));
                    145:                                
                    146:        void (*put_string) PARAMS((
                    147:                HTStructured*   me,
                    148:                CONST char *    str));
                    149:                
2.13      frystyk   150:        void (*put_block) PARAMS((
2.2       timbl     151:                HTStructured*   me,
2.1       timbl     152:                CONST char *    str,
                    153:                int             len));
                    154:                
                    155:        void (*start_element) PARAMS((
                    156:                HTStructured*   me,
                    157:                int             element_number,
2.2       timbl     158:                CONST BOOL*             attribute_present,
                    159:                CONST char**            attribute_value));
2.1       timbl     160:                
                    161:        void (*end_element) PARAMS((
                    162:                HTStructured*   me,
                    163:                int             element_number));
                    164: 
                    165:        void (*put_entity) PARAMS((
                    166:                HTStructured*   me,
                    167:                int             entity_number));
                    168:                
2.13      frystyk   169: } HTStructuredClass;
2.1       timbl     170: 
2.6       timbl     171: </PRE>
                    172: <H2>Find a Tag by Name</H2>Returns a pointer to the tag within
                    173: the DTD.
2.7       timbl     174: <PRE>extern HTTag * SGMLFindTag PARAMS((CONST SGML_dtd* dtd, CONST char * string));
2.1       timbl     175: 
                    176: 
2.6       timbl     177: </PRE>
2.10      timbl     178: <H2>Find a Attribute by Name</H2>Returns the number of the
                    179: atribute or -1 if failure.
                    180: <PRE>extern int SGMLFindAttribute PARAMS((HTTag* tag, CONST char * string));
                    181: 
                    182: 
                    183: </PRE>
2.6       timbl     184: <H2>Create an SGML parser</H2>
                    185: <PRE>/*
2.1       timbl     186: ** On entry,
                    187: **     dtd             must point to a DTD structure as defined above
                    188: **     callbacks       must point to user routines.
                    189: **     callData        is returned in callbacks transparently.
                    190: ** On exit,
                    191: **             The default tag starter has been processed.
                    192: */
                    193: 
                    194: 
                    195: extern HTStream* SGML_new PARAMS((
                    196:        CONST SGML_dtd *                dtd,
                    197:        HTStructured *          target));
                    198: 
2.2       timbl     199: extern CONST HTStreamClass SGMLParser;
2.1       timbl     200: 
                    201: 
                    202: #endif /* SGML_H */
2.7       timbl     203: 
2.1       timbl     204: 
2.3       timbl     205: 
                    206: 
2.4       timbl     207: 
2.6       timbl     208: 
                    209: 
                    210: 
                    211: 
2.8       timbl     212: 
                    213: </PRE></BODY>
2.4       timbl     214: </HTML>

Webmaster