Annotation of libwww/Library/src/SGML.html, revision 2.26

2.4       timbl       1: <HTML>
                      2: <HEAD>
2.25      frystyk     3:   <!-- Changed by: Henrik Frystyk Nielsen,  1-Jun-1996 -->
2.26    ! frystyk     4:   <TITLE>W3C Sample Code Library libwww SGML Parser</TITLE>
2.14      frystyk     5: </HEAD>
2.1       timbl       6: <BODY>
2.25      frystyk     7: <H1>
                      8:   SGML Parser
                      9: </H1>
2.14      frystyk    10: <PRE>
                     11: /*
2.18      frystyk    12: **     (c) COPYRIGHT MIT 1995.
2.14      frystyk    13: **     Please first read the full copyright statement in the file COPYRIGH.
                     14: */
                     15: </PRE>
2.25      frystyk    16: <P>
                     17: The SGML parser is a state machine. It is called for every character of the
                     18: input stream. The DTD data structure contains pointers to functions which
                     19: are called to implement the actual effect of the text read. When these functions
                     20: are called, the attribute structures pointed to by the DTD are valid, and
                     21: the function is parsed a pointer to the curent tag structure, and an "element
                     22: stack" which represents the state of nesting within SGML elements. See also
                     23: the <A HREF="HTStream.html">the generic Stream definition</A>
                     24: <P>
                     25: The following aspects are from Dan Connolly's suggestions: Binary search,
                     26: Strcutured object scheme basically, SGML content enum type.
                     27: <P>
                     28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a part
2.26    ! frystyk    29: of the <A HREF="http://www.w3.org/pub/WWW/Library/">W3C Sample Code Library</A>.
2.11      frystyk    30: <PRE>
                     31: #ifndef SGML_H
2.1       timbl      32: #define SGML_H
                     33: 
                     34: #include "HTStream.h"
2.17      frystyk    35: #include "HTStruct.h"
2.16      frystyk    36: </PRE>
2.25      frystyk    37: <H2>
                     38:   SGML content types
                     39: </H2>
2.1       timbl      40: <PRE>typedef enum _SGMLContent{
2.25      frystyk    41:     SGML_EMPTY,        /* no content */
                     42:     SGML_LITERAL,      /* character data. Recognized exact close tag only.
                     43:                           Old www server compatibility only! Not SGML */
                     44:     SGML_CDATA,        /* character data. recognize &lt;/ only */
                     45:     SGML_RCDATA,       /* replaceable character data. recognize &lt;/ and &amp;ref; */
                     46:     SGML_MIXED,        /* elements and parsed character data. recognize all markup */
                     47:     SGML_ELEMENT       /* any data found will be returned as an error*/
2.1       timbl      48:   } SGMLContent;
                     49: 
                     50: 
                     51: typedef struct {
                     52:     char *     name;           /* The (constant) name of the attribute */
                     53:                                /* Could put type info in here */
                     54: } attr;
                     55: 
                     56: 
                     57: /*             A tag structure describes an SGML element.
                     58: **             -----------------------------------------
                     59: **
                     60: **
                     61: **     name            is the string which comes after the tag opener "&lt;".
                     62: **
                     63: **     attributes      points to a zero-terminated array
                     64: **                     of attribute names.
                     65: **
                     66: **     litteral        determines how the SGML engine parses the charaters
                     67: **                     within the element. If set, tag openers are ignored
                     68: **                     except for that which opens a matching closing tag.
                     69: **
                     70: */
                     71: typedef struct _tag HTTag;
                     72: struct _tag{
                     73:     char *     name;                   /* The name of the tag */
                     74:     attr *     attributes;             /* The list of acceptable attributes */
                     75:     int                number_of_attributes;   /* Number of possible attributes */
                     76:     SGMLContent contents;              /* End only on end tag @@ */            
                     77: };
                     78: 
                     79: /*             DTD Information
                     80: **             ---------------
                     81: **
                     82: ** Not the whole DTD, but all this parser usues of it.
                     83: */
                     84: typedef struct {
2.2       timbl      85:     HTTag *            tags;           /* Must be in strcmp order by name */ 
                     86:     int                        number_of_tags;
2.23      frystyk    87:     const char **      entity_names;   /* Must be in strcmp order by name */
2.2       timbl      88:     int                        number_of_entities;
2.1       timbl      89: } SGML_dtd;
                     90: 
2.17      frystyk    91: #define MAX_ATTRIBUTES 20           /* Max number of attributes per element */
                     92: </PRE>
2.25      frystyk    93: <H2>
                     94:   Create an SGML parser
                     95: </H2>
2.6       timbl      96: <PRE>/*
2.1       timbl      97: ** On entry,
                     98: **     dtd             must point to a DTD structure as defined above
                     99: **     callbacks       must point to user routines.
                    100: **     callData        is returned in callbacks transparently.
                    101: ** On exit,
                    102: **             The default tag starter has been processed.
                    103: */
                    104: 
                    105: 
2.25      frystyk   106: extern HTStream * SGML_new (const SGML_dtd *   dtd,
                    107:                            HTStructured *      target);
2.1       timbl     108: 
                    109: #endif /* SGML_H */
2.8       timbl     110: 
2.25      frystyk   111: </PRE>
                    112: </BODY></HTML>

Webmaster