Annotation of libwww/Library/src/SGML.html, revision 2.24

2.4       timbl       1: <HTML>
                      2: <HEAD>
2.24    ! frystyk     3: <TITLE>W3C Reference Library libwww SGML Parser</TITLE>
2.22      frystyk     4: <!-- Changed by: Henrik Frystyk Nielsen,  5-Dec-1995 -->
2.14      frystyk     5: </HEAD>
2.1       timbl       6: <BODY>
2.11      frystyk     7: 
2.24    ! frystyk     8: <H1>SGML Parser</H1>
2.11      frystyk     9: 
2.14      frystyk    10: <PRE>
                     11: /*
2.18      frystyk    12: **     (c) COPYRIGHT MIT 1995.
2.14      frystyk    13: **     Please first read the full copyright statement in the file COPYRIGH.
                     14: */
                     15: </PRE>
                     16: 
                     17: The SGML parser is a state machine.  It is called for every character
                     18: of the input stream. The DTD data structure contains pointers to
                     19: functions which are called to implement the actual effect of the text
                     20: read. When these functions are called, the attribute structures
                     21: pointed to by the DTD are valid, and the function is parsed a pointer
                     22: to the curent tag structure, and an "element stack" which represents
                     23: the state of nesting within SGML elements. See also the <A
                     24: HREF="HTStream.html">the generic Stream definition</A><P>
                     25: 
                     26: The following aspects are from Dan Connolly's suggestions: Binary
                     27: search, Strcutured object scheme basically, SGML content enum type.<P>
                     28: 
                     29: This module is implemented by <A HREF="SGML.c">SGML.c</A>, and it is a
                     30: part of the <A
2.20      frystyk    31: HREF="http://www.w3.org/pub/WWW/Library/">
2.19      frystyk    32: W3C Reference Library</A>.
2.11      frystyk    33: 
                     34: <PRE>
                     35: #ifndef SGML_H
2.1       timbl      36: #define SGML_H
                     37: 
                     38: #include "HTStream.h"
2.17      frystyk    39: #include "HTStruct.h"
2.16      frystyk    40: </PRE>
2.1       timbl      41: 
                     42: <H2>SGML content types</H2>
                     43: <PRE>typedef enum _SGMLContent{
                     44:   SGML_EMPTY,    /* no content */
2.8       timbl      45:   SGML_LITERAL, /* character data. Recognized exact close tag only.
2.1       timbl      46:                    Old www server compatibility only! Not SGML */
                     47:   SGML_CDATA,    /* character data. recognize &lt;/ only */
                     48:   SGML_RCDATA,   /* replaceable character data. recognize &lt;/ and &amp;ref; */
                     49:   SGML_MIXED,    /* elements and parsed character data. recognize all markup */
                     50:   SGML_ELEMENT   /* any data found will be returned as an error*/
                     51:   } SGMLContent;
                     52: 
                     53: 
                     54: typedef struct {
                     55:     char *     name;           /* The (constant) name of the attribute */
                     56:                                /* Could put type info in here */
                     57: } attr;
                     58: 
                     59: 
                     60: /*             A tag structure describes an SGML element.
                     61: **             -----------------------------------------
                     62: **
                     63: **
                     64: **     name            is the string which comes after the tag opener "&lt;".
                     65: **
                     66: **     attributes      points to a zero-terminated array
                     67: **                     of attribute names.
                     68: **
                     69: **     litteral        determines how the SGML engine parses the charaters
                     70: **                     within the element. If set, tag openers are ignored
                     71: **                     except for that which opens a matching closing tag.
                     72: **
                     73: */
                     74: typedef struct _tag HTTag;
                     75: struct _tag{
                     76:     char *     name;                   /* The name of the tag */
                     77:     attr *     attributes;             /* The list of acceptable attributes */
                     78:     int                number_of_attributes;   /* Number of possible attributes */
                     79:     SGMLContent contents;              /* End only on end tag @@ */            
                     80: };
                     81: 
                     82: /*             DTD Information
                     83: **             ---------------
                     84: **
                     85: ** Not the whole DTD, but all this parser usues of it.
                     86: */
                     87: typedef struct {
2.2       timbl      88:     HTTag *            tags;           /* Must be in strcmp order by name */ 
                     89:     int                        number_of_tags;
2.23      frystyk    90:     const char **      entity_names;   /* Must be in strcmp order by name */
2.2       timbl      91:     int                        number_of_entities;
2.1       timbl      92: } SGML_dtd;
                     93: 
2.17      frystyk    94: #define MAX_ATTRIBUTES 20           /* Max number of attributes per element */
                     95: 
                     96: /*     SGML context passed to parsers */
2.1       timbl      97: 
                     98: typedef struct _HTSGMLContext *HTSGMLContext;  /* Hidden */
2.17      frystyk    99: </PRE>
2.1       timbl     100: 
2.17      frystyk   101: <H2>Find a Tag by Name</H2>
2.1       timbl     102: 
2.17      frystyk   103: Returns a pointer to the tag within the DTD.
2.1       timbl     104: 
2.17      frystyk   105: <PRE>
2.23      frystyk   106: extern HTTag * SGMLFindTag (const SGML_dtd* dtd, const char * string);
2.6       timbl     107: </PRE>
2.1       timbl     108: 
2.17      frystyk   109: <H2>Find a Attribute by Name</H2>
2.1       timbl     110: 
2.17      frystyk   111: Returns the number of the atribute or -1 if failure.
2.1       timbl     112: 
2.17      frystyk   113: <PRE>
2.23      frystyk   114: extern int SGMLFindAttribute (HTTag* tag, const char * string);
2.6       timbl     115: </PRE>
2.10      timbl     116: 
2.6       timbl     117: <H2>Create an SGML parser</H2>
                    118: <PRE>/*
2.1       timbl     119: ** On entry,
                    120: **     dtd             must point to a DTD structure as defined above
                    121: **     callbacks       must point to user routines.
                    122: **     callData        is returned in callbacks transparently.
                    123: ** On exit,
                    124: **             The default tag starter has been processed.
                    125: */
                    126: 
                    127: 
2.21      frystyk   128: extern HTStream* SGML_new (
2.23      frystyk   129:        const SGML_dtd *                dtd,
2.21      frystyk   130:        HTStructured *          target);
2.1       timbl     131: 
2.22      frystyk   132: #if 0
2.23      frystyk   133: extern const HTStreamClass SGMLParser;
2.22      frystyk   134: #endif
2.1       timbl     135: 
                    136: #endif /* SGML_H */
2.8       timbl     137: 
                    138: </PRE></BODY>
2.4       timbl     139: </HTML>

Webmaster