Annotation of libwww/Library/src/SGML.html, revision 2.19

2.4       timbl       1: <HTML>
                      2: <HEAD>
2.17      frystyk     3: <TITLE>SGML Parse Definition</TITLE>
2.14      frystyk     4: </HEAD>
2.1       timbl       5: <BODY>
2.11      frystyk     6: 
2.17      frystyk     7: <H1>SGML Parse Definition</H1>
2.11      frystyk     8: 
2.14      frystyk     9: <PRE>
                     10: /*
2.18      frystyk    11: **     (c) COPYRIGHT MIT 1995.
2.14      frystyk    12: **     Please first read the full copyright statement in the file COPYRIGH.
                     13: */
                     14: </PRE>
                     15: 
                     16: The SGML parser is a state machine.  It is called for every character
                     17: of the input stream. The DTD data structure contains pointers to
                     18: functions which are called to implement the actual effect of the text
                     19: read. When these functions are called, the attribute structures
                     20: pointed to by the DTD are valid, and the function is parsed a pointer
                     21: to the curent tag structure, and an "element stack" which represents
                     22: the state of nesting within SGML elements. See also the <A
                     23: HREF="HTStream.html">the generic Stream definition</A><P>
                     24: 
                     25: The following aspects are from Dan Connolly's suggestions: Binary
                     26: search, Strcutured object scheme basically, SGML content enum type.<P>
                     27: 
                     28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a
                     29: part of the <A
2.19    ! frystyk    30: HREF="http://www.w3.org/hypertext/WWW/Library/">
        !            31: W3C Reference Library</A>.
2.11      frystyk    32: 
                     33: <PRE>
                     34: #ifndef SGML_H
2.1       timbl      35: #define SGML_H
                     36: 
                     37: #include "HTStream.h"
2.17      frystyk    38: #include "HTStruct.h"
2.16      frystyk    39: </PRE>
2.1       timbl      40: 
                     41: <H2>SGML content types</H2>
                     42: <PRE>typedef enum _SGMLContent{
                     43:   SGML_EMPTY,    /* no content */
2.8       timbl      44:   SGML_LITERAL, /* character data. Recognized exact close tag only.
2.1       timbl      45:                    Old www server compatibility only! Not SGML */
                     46:   SGML_CDATA,    /* character data. recognize &lt;/ only */
                     47:   SGML_RCDATA,   /* replaceable character data. recognize &lt;/ and &amp;ref; */
                     48:   SGML_MIXED,    /* elements and parsed character data. recognize all markup */
                     49:   SGML_ELEMENT   /* any data found will be returned as an error*/
                     50:   } SGMLContent;
                     51: 
                     52: 
                     53: typedef struct {
                     54:     char *     name;           /* The (constant) name of the attribute */
                     55:                                /* Could put type info in here */
                     56: } attr;
                     57: 
                     58: 
                     59: /*             A tag structure describes an SGML element.
                     60: **             -----------------------------------------
                     61: **
                     62: **
                     63: **     name            is the string which comes after the tag opener "&lt;".
                     64: **
                     65: **     attributes      points to a zero-terminated array
                     66: **                     of attribute names.
                     67: **
                     68: **     litteral        determines how the SGML engine parses the charaters
                     69: **                     within the element. If set, tag openers are ignored
                     70: **                     except for that which opens a matching closing tag.
                     71: **
                     72: */
                     73: typedef struct _tag HTTag;
                     74: struct _tag{
                     75:     char *     name;                   /* The name of the tag */
                     76:     attr *     attributes;             /* The list of acceptable attributes */
                     77:     int                number_of_attributes;   /* Number of possible attributes */
                     78:     SGMLContent contents;              /* End only on end tag @@ */            
                     79: };
                     80: 
                     81: /*             DTD Information
                     82: **             ---------------
                     83: **
                     84: ** Not the whole DTD, but all this parser usues of it.
                     85: */
                     86: typedef struct {
2.2       timbl      87:     HTTag *            tags;           /* Must be in strcmp order by name */ 
                     88:     int                        number_of_tags;
                     89:     CONST char **      entity_names;   /* Must be in strcmp order by name */
                     90:     int                        number_of_entities;
2.1       timbl      91: } SGML_dtd;
                     92: 
2.17      frystyk    93: #define MAX_ATTRIBUTES 20           /* Max number of attributes per element */
                     94: 
                     95: /*     SGML context passed to parsers */
2.1       timbl      96: 
                     97: typedef struct _HTSGMLContext *HTSGMLContext;  /* Hidden */
2.17      frystyk    98: </PRE>
2.1       timbl      99: 
2.17      frystyk   100: <H2>Find a Tag by Name</H2>
2.1       timbl     101: 
2.17      frystyk   102: Returns a pointer to the tag within the DTD.
2.1       timbl     103: 
2.17      frystyk   104: <PRE>
                    105: extern HTTag * SGMLFindTag PARAMS((CONST SGML_dtd* dtd, CONST char * string));
2.6       timbl     106: </PRE>
2.1       timbl     107: 
2.17      frystyk   108: <H2>Find a Attribute by Name</H2>
2.1       timbl     109: 
2.17      frystyk   110: Returns the number of the atribute or -1 if failure.
2.1       timbl     111: 
2.17      frystyk   112: <PRE>
                    113: extern int SGMLFindAttribute PARAMS((HTTag* tag, CONST char * string));
2.6       timbl     114: </PRE>
2.10      timbl     115: 
2.6       timbl     116: <H2>Create an SGML parser</H2>
                    117: <PRE>/*
2.1       timbl     118: ** On entry,
                    119: **     dtd             must point to a DTD structure as defined above
                    120: **     callbacks       must point to user routines.
                    121: **     callData        is returned in callbacks transparently.
                    122: ** On exit,
                    123: **             The default tag starter has been processed.
                    124: */
                    125: 
                    126: 
                    127: extern HTStream* SGML_new PARAMS((
                    128:        CONST SGML_dtd *                dtd,
                    129:        HTStructured *          target));
                    130: 
2.2       timbl     131: extern CONST HTStreamClass SGMLParser;
2.1       timbl     132: 
                    133: #endif /* SGML_H */
2.8       timbl     134: 
                    135: </PRE></BODY>
2.4       timbl     136: </HTML>

Webmaster