Annotation of libwww/Library/src/SGML.html, revision 2.28

2.4       timbl       1: <HTML>
                      2: <HEAD>
2.25      frystyk     3:   <!-- Changed by: Henrik Frystyk Nielsen,  1-Jun-1996 -->
2.26      frystyk     4:   <TITLE>W3C Sample Code Library libwww SGML Parser</TITLE>
2.14      frystyk     5: </HEAD>
2.1       timbl       6: <BODY>
2.25      frystyk     7: <H1>
                      8:   SGML Parser
                      9: </H1>
2.14      frystyk    10: <PRE>
                     11: /*
2.18      frystyk    12: **     (c) COPYRIGHT MIT 1995.
2.14      frystyk    13: **     Please first read the full copyright statement in the file COPYRIGH.
                     14: */
                     15: </PRE>
2.25      frystyk    16: <P>
                     17: The SGML parser is a state machine. It is called for every character of the
                     18: input stream. The DTD data structure contains pointers to functions which
                     19: are called to implement the actual effect of the text read. When these functions
                     20: are called, the attribute structures pointed to by the DTD are valid, and
                     21: the function is parsed a pointer to the curent tag structure, and an "element
                     22: stack" which represents the state of nesting within SGML elements. See also
                     23: the <A HREF="HTStream.html">the generic Stream definition</A>
                     24: <P>
                     25: The following aspects are from Dan Connolly's suggestions: Binary search,
                     26: Strcutured object scheme basically, SGML content enum type.
                     27: <P>
                     28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a part
2.27      frystyk    29: of the <A HREF="http://www.w3.org/Library/">W3C Sample Code Library</A>.
2.11      frystyk    30: <PRE>
                     31: #ifndef SGML_H
2.1       timbl      32: #define SGML_H
                     33: 
                     34: #include "HTStream.h"
2.17      frystyk    35: #include "HTStruct.h"
2.16      frystyk    36: </PRE>
2.25      frystyk    37: <H2>
2.28    ! frystyk    38:   SGML Content Types
2.25      frystyk    39: </H2>
2.28    ! frystyk    40: <PRE>
        !            41: typedef enum _SGMLContent{
2.25      frystyk    42:     SGML_EMPTY,        /* no content */
                     43:     SGML_LITERAL,      /* character data. Recognized exact close tag only.
                     44:                           Old www server compatibility only! Not SGML */
                     45:     SGML_CDATA,        /* character data. recognize &lt;/ only */
                     46:     SGML_RCDATA,       /* replaceable character data. recognize &lt;/ and &amp;ref; */
                     47:     SGML_MIXED,        /* elements and parsed character data. recognize all markup */
                     48:     SGML_ELEMENT       /* any data found will be returned as an error*/
2.28    ! frystyk    49: } SGMLContent;
        !            50: </PRE>
        !            51: <H2>
        !            52:   Attribute Types
        !            53: </H2>
        !            54: <P>
        !            55: Describes the SGML tag attribute
        !            56: <PRE>
        !            57: typedef struct _HTAttr {
2.1       timbl      58:     char *     name;           /* The (constant) name of the attribute */
                     59:                                /* Could put type info in here */
2.28    ! frystyk    60: } HTAttr;
2.1       timbl      61: 
2.28    ! frystyk    62: extern char * HTAttr_name (HTAttr * attr);
        !            63: </PRE>
        !            64: <H2>
        !            65:   Tag Structure Describing SGML Elements
        !            66: </H2>
        !            67: <DL>
        !            68:   <DT>
        !            69:     name
        !            70:   <DD>
        !            71:     is the string which comes after the tag opener "&lt;".
        !            72:   <DT>
        !            73:     attributes
        !            74:   <DD>
        !            75:     points to a zero-terminated array of attribute names.
        !            76:   <DT>
        !            77:     litteral
        !            78:   <DD>
        !            79:     determines how the SGML engine parses the charaters within the element. If
        !            80:     set, tag openers are ignored except for that which opens a matching closing
        !            81:     tag.
        !            82: </DL>
        !            83: <PRE>
        !            84: typedef struct _HTTag {
        !            85:     char *      name;                  /* The name of the tag */
        !            86:     HTAttr *   attributes;             /* The list of acceptable attributes */
2.1       timbl      87:     int                number_of_attributes;   /* Number of possible attributes */
                     88:     SGMLContent contents;              /* End only on end tag @@ */            
2.28    ! frystyk    89: } HTTag;
        !            90: 
        !            91: extern char * HTTag_name (HTTag * tag);
        !            92: extern SGMLContent HTTag_content (HTTag * tag);
        !            93: extern int HTTag_attributes (HTTag * tag);
        !            94: extern char * HTTag_attributeName (HTTag * tag, int attribute_number);
        !            95: </PRE>
        !            96: <H2>
        !            97:   DTD Information
        !            98: </H2>
        !            99: <P>
        !           100: Not the whole DTD, but all this parser uses of it.
        !           101: <PRE>
        !           102: #define MAX_ATTRIBUTES 20           /* Max number of attributes per element */
2.1       timbl     103: 
                    104: typedef struct {
2.2       timbl     105:     HTTag *            tags;           /* Must be in strcmp order by name */ 
                    106:     int                        number_of_tags;
2.23      frystyk   107:     const char **      entity_names;   /* Must be in strcmp order by name */
2.2       timbl     108:     int                        number_of_entities;
2.1       timbl     109: } SGML_dtd;
                    110: 
2.28    ! frystyk   111: extern HTTag * SGML_findTag (SGML_dtd * dtd, int element_number);
        !           112: extern char * SGML_findTagName (SGML_dtd * dtd, int element_number);
        !           113: extern SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number);
2.17      frystyk   114: </PRE>
2.25      frystyk   115: <H2>
2.28    ! frystyk   116:   Create an SGML Parser Instance
2.25      frystyk   117: </H2>
2.28    ! frystyk   118: <P>
        !           119: Create an SGML parser instance which converts a stream to a structured stream.
        !           120: <PRE>
2.25      frystyk   121: extern HTStream * SGML_new (const SGML_dtd *   dtd,
                    122:                            HTStructured *      target);
2.28    ! frystyk   123: </PRE>
        !           124: <PRE>
2.1       timbl     125: #endif /* SGML_H */
2.25      frystyk   126: </PRE>
2.28    ! frystyk   127: <P>
        !           128:   <HR>
        !           129: <ADDRESS>
        !           130:   @(#) $Id: HTML.html,v 2.35 1998/05/14 02:10:42 frystyk Exp $
        !           131: </ADDRESS>
2.25      frystyk   132: </BODY></HTML>

Webmaster