Annotation of libwww/Library/src/SGML.html, revision 2.30

2.4       timbl       1: <HTML>
                      2: <HEAD>
2.25      frystyk     3:   <!-- Changed by: Henrik Frystyk Nielsen,  1-Jun-1996 -->
2.26      frystyk     4:   <TITLE>W3C Sample Code Library libwww SGML Parser</TITLE>
2.14      frystyk     5: </HEAD>
2.1       timbl       6: <BODY>
2.25      frystyk     7: <H1>
                      8:   SGML Parser
                      9: </H1>
2.14      frystyk    10: <PRE>
                     11: /*
2.18      frystyk    12: **     (c) COPYRIGHT MIT 1995.
2.14      frystyk    13: **     Please first read the full copyright statement in the file COPYRIGH.
                     14: */
                     15: </PRE>
2.25      frystyk    16: <P>
                     17: The SGML parser is a state machine. It is called for every character of the
                     18: input stream. The DTD data structure contains pointers to functions which
                     19: are called to implement the actual effect of the text read. When these functions
                     20: are called, the attribute structures pointed to by the DTD are valid, and
                     21: the function is parsed a pointer to the curent tag structure, and an "element
                     22: stack" which represents the state of nesting within SGML elements. See also
                     23: the <A HREF="HTStream.html">the generic Stream definition</A>
                     24: <P>
                     25: The following aspects are from Dan Connolly's suggestions: Binary search,
                     26: Strcutured object scheme basically, SGML content enum type.
                     27: <P>
                     28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a part
2.27      frystyk    29: of the <A HREF="http://www.w3.org/Library/">W3C Sample Code Library</A>.
2.11      frystyk    30: <PRE>
                     31: #ifndef SGML_H
2.1       timbl      32: #define SGML_H
                     33: 
                     34: #include "HTStream.h"
2.17      frystyk    35: #include "HTStruct.h"
2.16      frystyk    36: </PRE>
2.25      frystyk    37: <H2>
2.28      frystyk    38:   SGML Content Types
2.25      frystyk    39: </H2>
2.28      frystyk    40: <PRE>
                     41: typedef enum _SGMLContent{
2.25      frystyk    42:     SGML_EMPTY,        /* no content */
                     43:     SGML_LITERAL,      /* character data. Recognized exact close tag only.
                     44:                           Old www server compatibility only! Not SGML */
                     45:     SGML_CDATA,        /* character data. recognize &lt;/ only */
                     46:     SGML_RCDATA,       /* replaceable character data. recognize &lt;/ and &amp;ref; */
                     47:     SGML_MIXED,        /* elements and parsed character data. recognize all markup */
                     48:     SGML_ELEMENT       /* any data found will be returned as an error*/
2.28      frystyk    49: } SGMLContent;
                     50: </PRE>
                     51: <H2>
                     52:   Attribute Types
                     53: </H2>
                     54: <P>
                     55: Describes the SGML tag attribute
                     56: <PRE>
                     57: typedef struct _HTAttr {
2.1       timbl      58:     char *     name;           /* The (constant) name of the attribute */
                     59:                                /* Could put type info in here */
2.28      frystyk    60: } HTAttr;
2.1       timbl      61: 
2.28      frystyk    62: extern char * HTAttr_name (HTAttr * attr);
                     63: </PRE>
                     64: <H2>
                     65:   Tag Structure Describing SGML Elements
                     66: </H2>
                     67: <DL>
                     68:   <DT>
                     69:     name
                     70:   <DD>
                     71:     is the string which comes after the tag opener "&lt;".
                     72:   <DT>
                     73:     attributes
                     74:   <DD>
                     75:     points to a zero-terminated array of attribute names.
                     76:   <DT>
                     77:     litteral
                     78:   <DD>
                     79:     determines how the SGML engine parses the charaters within the element. If
                     80:     set, tag openers are ignored except for that which opens a matching closing
                     81:     tag.
                     82: </DL>
                     83: <PRE>
                     84: typedef struct _HTTag {
                     85:     char *      name;                  /* The name of the tag */
                     86:     HTAttr *   attributes;             /* The list of acceptable attributes */
2.1       timbl      87:     int                number_of_attributes;   /* Number of possible attributes */
                     88:     SGMLContent contents;              /* End only on end tag @@ */            
2.28      frystyk    89: } HTTag;
                     90: 
                     91: extern char * HTTag_name (HTTag * tag);
                     92: extern SGMLContent HTTag_content (HTTag * tag);
                     93: extern int HTTag_attributes (HTTag * tag);
                     94: extern char * HTTag_attributeName (HTTag * tag, int attribute_number);
                     95: </PRE>
                     96: <H2>
                     97:   DTD Information
                     98: </H2>
                     99: <P>
                    100: Not the whole DTD, but all this parser uses of it.
                    101: <PRE>
2.30    ! frystyk   102: #define MAX_ATTRIBUTES 32           /* Max number of attributes per element */
2.1       timbl     103: 
                    104: typedef struct {
2.2       timbl     105:     HTTag *            tags;           /* Must be in strcmp order by name */ 
                    106:     int                        number_of_tags;
2.23      frystyk   107:     const char **      entity_names;   /* Must be in strcmp order by name */
2.2       timbl     108:     int                        number_of_entities;
2.1       timbl     109: } SGML_dtd;
                    110: 
2.28      frystyk   111: extern HTTag * SGML_findTag (SGML_dtd * dtd, int element_number);
                    112: extern char * SGML_findTagName (SGML_dtd * dtd, int element_number);
                    113: extern SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number);
2.29      frystyk   114: extern int SGML_findElementNumber(SGML_dtd *dtd, char *name_element);
2.17      frystyk   115: </PRE>
2.25      frystyk   116: <H2>
2.28      frystyk   117:   Create an SGML Parser Instance
2.25      frystyk   118: </H2>
2.28      frystyk   119: <P>
                    120: Create an SGML parser instance which converts a stream to a structured stream.
                    121: <PRE>
2.25      frystyk   122: extern HTStream * SGML_new (const SGML_dtd *   dtd,
                    123:                            HTStructured *      target);
2.28      frystyk   124: </PRE>
                    125: <PRE>
2.1       timbl     126: #endif /* SGML_H */
2.25      frystyk   127: </PRE>
2.28      frystyk   128: <P>
                    129:   <HR>
                    130: <ADDRESS>
2.30    ! frystyk   131:   @(#) $Id: SGML.html,v 2.29 1999/02/22 01:04:24 frystyk Exp $
2.28      frystyk   132: </ADDRESS>
2.25      frystyk   133: </BODY></HTML>

Webmaster