Annotation of libwww/Library/src/SGML.html, revision 2.31

2.4       timbl       1: <HTML>
                      2: <HEAD>
2.25      frystyk     3:   <!-- Changed by: Henrik Frystyk Nielsen,  1-Jun-1996 -->
2.26      frystyk     4:   <TITLE>W3C Sample Code Library libwww SGML Parser</TITLE>
2.14      frystyk     5: </HEAD>
2.1       timbl       6: <BODY>
2.25      frystyk     7: <H1>
                      8:   SGML Parser
                      9: </H1>
2.14      frystyk    10: <PRE>
                     11: /*
2.18      frystyk    12: **     (c) COPYRIGHT MIT 1995.
2.14      frystyk    13: **     Please first read the full copyright statement in the file COPYRIGH.
                     14: */
                     15: </PRE>
2.25      frystyk    16: <P>
                     17: The SGML parser is a state machine. It is called for every character of the
                     18: input stream. The DTD data structure contains pointers to functions which
                     19: are called to implement the actual effect of the text read. When these functions
                     20: are called, the attribute structures pointed to by the DTD are valid, and
                     21: the function is parsed a pointer to the curent tag structure, and an "element
                     22: stack" which represents the state of nesting within SGML elements. See also
                     23: the <A HREF="HTStream.html">the generic Stream definition</A>
                     24: <P>
                     25: The following aspects are from Dan Connolly's suggestions: Binary search,
                     26: Strcutured object scheme basically, SGML content enum type.
                     27: <P>
                     28: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a part
2.27      frystyk    29: of the <A HREF="http://www.w3.org/Library/">W3C Sample Code Library</A>.
2.11      frystyk    30: <PRE>
                     31: #ifndef SGML_H
2.1       timbl      32: #define SGML_H
                     33: 
                     34: #include "HTStream.h"
2.17      frystyk    35: #include "HTStruct.h"
2.31    ! vbancrof   36: 
        !            37: #ifdef __cplusplus
        !            38: extern "C" { 
        !            39: #endif 
2.16      frystyk    40: </PRE>
2.25      frystyk    41: <H2>
2.28      frystyk    42:   SGML Content Types
2.25      frystyk    43: </H2>
2.28      frystyk    44: <PRE>
                     45: typedef enum _SGMLContent{
2.25      frystyk    46:     SGML_EMPTY,        /* no content */
                     47:     SGML_LITERAL,      /* character data. Recognized exact close tag only.
                     48:                           Old www server compatibility only! Not SGML */
                     49:     SGML_CDATA,        /* character data. recognize &lt;/ only */
                     50:     SGML_RCDATA,       /* replaceable character data. recognize &lt;/ and &amp;ref; */
                     51:     SGML_MIXED,        /* elements and parsed character data. recognize all markup */
                     52:     SGML_ELEMENT       /* any data found will be returned as an error*/
2.28      frystyk    53: } SGMLContent;
                     54: </PRE>
                     55: <H2>
                     56:   Attribute Types
                     57: </H2>
                     58: <P>
                     59: Describes the SGML tag attribute
                     60: <PRE>
                     61: typedef struct _HTAttr {
2.1       timbl      62:     char *     name;           /* The (constant) name of the attribute */
                     63:                                /* Could put type info in here */
2.28      frystyk    64: } HTAttr;
2.1       timbl      65: 
2.28      frystyk    66: extern char * HTAttr_name (HTAttr * attr);
                     67: </PRE>
                     68: <H2>
                     69:   Tag Structure Describing SGML Elements
                     70: </H2>
                     71: <DL>
                     72:   <DT>
                     73:     name
                     74:   <DD>
                     75:     is the string which comes after the tag opener "&lt;".
                     76:   <DT>
                     77:     attributes
                     78:   <DD>
                     79:     points to a zero-terminated array of attribute names.
                     80:   <DT>
                     81:     litteral
                     82:   <DD>
                     83:     determines how the SGML engine parses the charaters within the element. If
                     84:     set, tag openers are ignored except for that which opens a matching closing
                     85:     tag.
                     86: </DL>
                     87: <PRE>
                     88: typedef struct _HTTag {
                     89:     char *      name;                  /* The name of the tag */
                     90:     HTAttr *   attributes;             /* The list of acceptable attributes */
2.1       timbl      91:     int                number_of_attributes;   /* Number of possible attributes */
                     92:     SGMLContent contents;              /* End only on end tag @@ */            
2.28      frystyk    93: } HTTag;
                     94: 
                     95: extern char * HTTag_name (HTTag * tag);
                     96: extern SGMLContent HTTag_content (HTTag * tag);
                     97: extern int HTTag_attributes (HTTag * tag);
                     98: extern char * HTTag_attributeName (HTTag * tag, int attribute_number);
                     99: </PRE>
                    100: <H2>
                    101:   DTD Information
                    102: </H2>
                    103: <P>
                    104: Not the whole DTD, but all this parser uses of it.
                    105: <PRE>
2.30      frystyk   106: #define MAX_ATTRIBUTES 32           /* Max number of attributes per element */
2.1       timbl     107: 
                    108: typedef struct {
2.2       timbl     109:     HTTag *            tags;           /* Must be in strcmp order by name */ 
                    110:     int                        number_of_tags;
2.23      frystyk   111:     const char **      entity_names;   /* Must be in strcmp order by name */
2.2       timbl     112:     int                        number_of_entities;
2.1       timbl     113: } SGML_dtd;
                    114: 
2.28      frystyk   115: extern HTTag * SGML_findTag (SGML_dtd * dtd, int element_number);
                    116: extern char * SGML_findTagName (SGML_dtd * dtd, int element_number);
                    117: extern SGMLContent SGML_findTagContents (SGML_dtd * dtd, int element_number);
2.29      frystyk   118: extern int SGML_findElementNumber(SGML_dtd *dtd, char *name_element);
2.17      frystyk   119: </PRE>
2.25      frystyk   120: <H2>
2.28      frystyk   121:   Create an SGML Parser Instance
2.25      frystyk   122: </H2>
2.28      frystyk   123: <P>
                    124: Create an SGML parser instance which converts a stream to a structured stream.
                    125: <PRE>
2.25      frystyk   126: extern HTStream * SGML_new (const SGML_dtd *   dtd,
                    127:                            HTStructured *      target);
2.28      frystyk   128: </PRE>
                    129: <PRE>
2.31    ! vbancrof  130: #ifdef __cplusplus
        !           131: }
        !           132: #endif
        !           133: 
2.1       timbl     134: #endif /* SGML_H */
2.25      frystyk   135: </PRE>
2.28      frystyk   136: <P>
                    137:   <HR>
                    138: <ADDRESS>
2.31    ! vbancrof  139:   @(#) $Id: SGML.html,v 2.30 1999/04/02 16:10:36 frystyk Exp $
2.28      frystyk   140: </ADDRESS>
2.25      frystyk   141: </BODY></HTML>

Webmaster