Annotation of XML/parser.h, revision 1.45
1.1 veillard 1: /*
1.39 daniel 2: * parser.h : Interfaces, constants and types related to the XML parser.
1.6 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.23 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
9: #ifndef __XML_PARSER_H__
10: #define __XML_PARSER_H__
11:
1.2 veillard 12: #include "tree.h"
1.41 daniel 13: #include "valid.h"
1.32 daniel 14: #include "xmlIO.h"
1.2 veillard 15:
1.7 daniel 16: #ifdef __cplusplus
17: extern "C" {
18: #endif
19:
1.1 veillard 20: /*
21: * Constants.
22: */
23: #define XML_DEFAULT_VERSION "1.0"
24:
1.39 daniel 25: /**
26: * an xmlParserInput is an input flow for the XML processor.
27: * Each entity parsed is associated an xmlParserInput (except the
28: * few predefined ones). This is the case both for internal entities
29: * - in which case the flow is already completely in memory - or
30: * external entities - in which case we use the buf structure for
31: * progressive reading and I18N conversions to the internal UTF-8 format.
32: */
33:
1.25 daniel 34: typedef void (* xmlParserInputDeallocate)(CHAR *);
1.14 daniel 35: typedef struct xmlParserInput {
1.32 daniel 36: /* Input buffer */
37: xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
38:
1.14 daniel 39: const char *filename; /* The file analyzed, if any */
1.36 daniel 40: const char *directory; /* the directory/base of teh file */
1.14 daniel 41: const CHAR *base; /* Base of the array to parse */
42: const CHAR *cur; /* Current char being parsed */
43: int line; /* Current line */
44: int col; /* Current column */
1.35 daniel 45: int consumed; /* How many CHARs were already consumed */
1.25 daniel 46: xmlParserInputDeallocate free; /* function to deallocate the base */
1.24 daniel 47: } xmlParserInput;
48: typedef xmlParserInput *xmlParserInputPtr;
1.7 daniel 49:
1.45 ! daniel 50: typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL,
! 51: const char *ID,
! 52: xmlParserInputPtr context);
! 53:
1.39 daniel 54: /**
55: * the parser can be asked to collect Node informations, i.e. at what
56: * place in the file they were detected.
57: * NOTE: This is off by default and not very well tested.
58: */
1.24 daniel 59: typedef struct _xmlParserNodeInfo {
1.13 daniel 60: const struct xmlNode* node;
61: /* Position & line # that text that created the node begins & ends on */
62: unsigned long begin_pos;
63: unsigned long begin_line;
64: unsigned long end_pos;
65: unsigned long end_line;
1.24 daniel 66: } _xmlParserNodeInfo;
67: typedef _xmlParserNodeInfo xmlParserNodeInfo;
1.13 daniel 68:
69: typedef struct xmlParserNodeInfoSeq {
70: unsigned long maximum;
71: unsigned long length;
72: xmlParserNodeInfo* buffer;
1.24 daniel 73: } _xmlParserNodeInfoSeq;
74: typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
75: typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
1.13 daniel 76:
1.39 daniel 77: /**
78: * The parser is not a state based parser, but we need to maintain
79: * minimum state informations, especially for entities processing.
80: */
1.37 daniel 81: typedef enum xmlParserInputState {
82: XML_PARSER_EOF = 0,
83: XML_PARSER_PROLOG,
84: XML_PARSER_CONTENT,
85: XML_PARSER_ENTITY_DECL,
86: XML_PARSER_ENTITY_VALUE,
87: XML_PARSER_ATTRIBUTE_VALUE,
88: XML_PARSER_DTD,
89: XML_PARSER_EPILOG,
1.38 daniel 90: XML_PARSER_COMMENT,
1.44 daniel 91: XML_PARSER_CDATA_SECTION,
1.37 daniel 92: } xmlParserInputState;
93:
1.39 daniel 94: /**
95: * The parser context.
96: * NOTE This doesn't completely defines the parser state, the (current ?)
97: * design of the parser uses recursive function calls since this allow
98: * and easy mapping from the production rules of the specification
99: * to the actual code. The drawback is that the actual function call
100: * also reflect the parser state. However most of the parsing routines
101: * takes as the only argument the parser context pointer, so migrating
102: * to a state based parser for progressive parsing shouldn't be too hard.
103: */
1.24 daniel 104: typedef struct _xmlParserCtxt {
1.17 daniel 105: struct xmlSAXHandler *sax; /* The SAX handler */
1.27 daniel 106: void *userData; /* the document being built */
107: xmlDocPtr myDoc; /* the document being built */
1.30 daniel 108: int replaceEntities; /* shall we replace entities ? */
1.37 daniel 109: const CHAR *version; /* the XML version string */
110: const CHAR *encoding; /* encoding, if any */
111: int standalone; /* standalone document */
1.39 daniel 112: int hasExternalSubset; /* reference and external subset */
113: int hasPErefs; /* the internal subset has PE refs */
1.37 daniel 114: int html; /* are we parsing an HTML document */
1.39 daniel 115: int external; /* are we parsing an external entity */
116:
117: int wellFormed; /* is the document well formed */
118: int valid; /* is the document valid */
1.40 daniel 119: int validate; /* shall we try to validate ? */
1.41 daniel 120: xmlValidCtxt vctxt; /* The validity context */
1.16 daniel 121:
1.37 daniel 122: xmlParserInputState instate; /* current type of input */
1.38 daniel 123: int token; /* next char look-ahead */
1.42 daniel 124:
125: char *directory; /* the data directory */
1.37 daniel 126:
1.14 daniel 127: /* Input stream stack */
1.15 daniel 128: xmlParserInputPtr input; /* Current input stream */
1.14 daniel 129: int inputNr; /* Number of current input streams */
130: int inputMax; /* Max number of input streams */
131: xmlParserInputPtr *inputTab; /* stack of inputs */
1.15 daniel 132:
1.39 daniel 133: /* Node analysis stack only used for DOM building */
1.15 daniel 134: xmlNodePtr node; /* Current parsed Node */
135: int nodeNr; /* Depth of the parsing stack */
136: int nodeMax; /* Max depth of the parsing stack */
137: xmlNodePtr *nodeTab; /* array of nodes */
1.14 daniel 138:
1.13 daniel 139: int record_info; /* Whether node info should be kept */
140: xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
1.24 daniel 141: } _xmlParserCtxt;
142: typedef _xmlParserCtxt xmlParserCtxt;
143: typedef xmlParserCtxt *xmlParserCtxtPtr;
1.7 daniel 144:
1.39 daniel 145: /**
1.17 daniel 146: * a SAX Locator.
147: */
148: typedef struct xmlSAXLocator {
1.28 daniel 149: const CHAR *(*getPublicId)(void *ctx);
150: const CHAR *(*getSystemId)(void *ctx);
151: int (*getLineNumber)(void *ctx);
152: int (*getColumnNumber)(void *ctx);
1.24 daniel 153: } _xmlSAXLocator;
154: typedef _xmlSAXLocator xmlSAXLocator;
155: typedef xmlSAXLocator *xmlSAXLocatorPtr;
1.17 daniel 156:
1.39 daniel 157: /**
158: * a SAX handler is bunch of callbacks called by the parser when processing
159: * of the input generate data or structure informations.
1.17 daniel 160: */
161:
1.27 daniel 162: #include "entities.h"
163:
1.28 daniel 164: typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
1.18 daniel 165: const CHAR *publicId, const CHAR *systemId);
1.28 daniel 166: typedef void (*internalSubsetSAXFunc) (void *ctx, const CHAR *name,
1.27 daniel 167: const CHAR *ExternalID, const CHAR *SystemID);
1.28 daniel 168: typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
1.27 daniel 169: const CHAR *name);
1.39 daniel 170: typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
171: const CHAR *name);
1.28 daniel 172: typedef void (*entityDeclSAXFunc) (void *ctx,
1.27 daniel 173: const CHAR *name, int type, const CHAR *publicId,
174: const CHAR *systemId, CHAR *content);
1.28 daniel 175: typedef void (*notationDeclSAXFunc)(void *ctx, const CHAR *name,
1.18 daniel 176: const CHAR *publicId, const CHAR *systemId);
1.28 daniel 177: typedef void (*attributeDeclSAXFunc)(void *ctx, const CHAR *elem,
1.27 daniel 178: const CHAR *name, int type, int def,
179: const CHAR *defaultValue, xmlEnumerationPtr tree);
1.28 daniel 180: typedef void (*elementDeclSAXFunc)(void *ctx, const CHAR *name,
1.27 daniel 181: int type, xmlElementContentPtr content);
1.28 daniel 182: typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
1.18 daniel 183: const CHAR *name, const CHAR *publicId,
184: const CHAR *systemId, const CHAR *notationName);
1.28 daniel 185: typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
1.18 daniel 186: xmlSAXLocatorPtr loc);
1.28 daniel 187: typedef void (*startDocumentSAXFunc) (void *ctx);
188: typedef void (*endDocumentSAXFunc) (void *ctx);
189: typedef void (*startElementSAXFunc) (void *ctx, const CHAR *name,
1.27 daniel 190: const CHAR **atts);
1.28 daniel 191: typedef void (*endElementSAXFunc) (void *ctx, const CHAR *name);
192: typedef void (*attributeSAXFunc) (void *ctx, const CHAR *name,
1.19 daniel 193: const CHAR *value);
1.28 daniel 194: typedef void (*referenceSAXFunc) (void *ctx, const CHAR *name);
195: typedef void (*charactersSAXFunc) (void *ctx, const CHAR *ch,
1.27 daniel 196: int len);
1.28 daniel 197: typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
1.27 daniel 198: const CHAR *ch, int len);
1.28 daniel 199: typedef void (*processingInstructionSAXFunc) (void *ctx,
1.18 daniel 200: const CHAR *target, const CHAR *data);
1.28 daniel 201: typedef void (*commentSAXFunc) (void *ctx, const CHAR *value);
1.43 daniel 202: typedef void (*cdataBlockSAXFunc) (void *ctx, const CHAR *value, int len);
1.28 daniel 203: typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
204: typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
205: typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
206: typedef int (*isStandaloneSAXFunc) (void *ctx);
207: typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
208: typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
1.18 daniel 209:
1.17 daniel 210: typedef struct xmlSAXHandler {
1.27 daniel 211: internalSubsetSAXFunc internalSubset;
212: isStandaloneSAXFunc isStandalone;
213: hasInternalSubsetSAXFunc hasInternalSubset;
214: hasExternalSubsetSAXFunc hasExternalSubset;
1.18 daniel 215: resolveEntitySAXFunc resolveEntity;
1.27 daniel 216: getEntitySAXFunc getEntity;
217: entityDeclSAXFunc entityDecl;
1.18 daniel 218: notationDeclSAXFunc notationDecl;
1.27 daniel 219: attributeDeclSAXFunc attributeDecl;
220: elementDeclSAXFunc elementDecl;
1.18 daniel 221: unparsedEntityDeclSAXFunc unparsedEntityDecl;
222: setDocumentLocatorSAXFunc setDocumentLocator;
223: startDocumentSAXFunc startDocument;
224: endDocumentSAXFunc endDocument;
225: startElementSAXFunc startElement;
226: endElementSAXFunc endElement;
1.27 daniel 227: referenceSAXFunc reference;
1.18 daniel 228: charactersSAXFunc characters;
229: ignorableWhitespaceSAXFunc ignorableWhitespace;
230: processingInstructionSAXFunc processingInstruction;
1.27 daniel 231: commentSAXFunc comment;
1.18 daniel 232: warningSAXFunc warning;
233: errorSAXFunc error;
234: fatalErrorSAXFunc fatalError;
1.39 daniel 235: getParameterEntitySAXFunc getParameterEntity;
1.43 daniel 236: cdataBlockSAXFunc cdataBlock;
1.24 daniel 237: } xmlSAXHandler;
238: typedef xmlSAXHandler *xmlSAXHandlerPtr;
1.17 daniel 239:
1.39 daniel 240: /**
241: * Global variables: just the default SAX interface tables and XML version infos.
1.17 daniel 242: */
1.33 daniel 243: extern const char *xmlParserVersion;
244:
1.19 daniel 245: extern xmlSAXLocator xmlDefaultSAXLocator;
246: extern xmlSAXHandler xmlDefaultSAXHandler;
1.33 daniel 247: extern xmlSAXHandler htmlDefaultSAXHandler;
1.19 daniel 248:
249: #include "entities.h"
1.26 daniel 250: #include "xml-error.h"
1.35 daniel 251:
1.39 daniel 252: /**
1.35 daniel 253: * Input functions
254: */
255:
1.45 ! daniel 256: int xmlParserInputRead (xmlParserInputPtr in,
! 257: int len);
! 258: int xmlParserInputGrow (xmlParserInputPtr in,
! 259: int len);
1.17 daniel 260:
1.39 daniel 261: /**
1.22 daniel 262: * CHAR handling
1.2 veillard 263: */
1.45 ! daniel 264: CHAR * xmlStrdup (const CHAR *cur);
! 265: CHAR * xmlStrndup (const CHAR *cur,
! 266: int len);
! 267: CHAR * xmlStrsub (const CHAR *str,
! 268: int start,
! 269: int len);
! 270: const CHAR * xmlStrchr (const CHAR *str,
! 271: CHAR val);
! 272: const CHAR * xmlStrstr (const CHAR *str,
! 273: CHAR *val);
! 274: int xmlStrcmp (const CHAR *str1,
! 275: const CHAR *str2);
! 276: int xmlStrncmp (const CHAR *str1,
! 277: const CHAR *str2,
! 278: int len);
! 279: int xmlStrlen (const CHAR *str);
! 280: CHAR * xmlStrcat (CHAR *cur,
! 281: const CHAR *add);
! 282: CHAR * xmlStrncat (CHAR *cur,
! 283: const CHAR *add,
! 284: int len);
1.9 daniel 285:
1.39 daniel 286: /**
287: * Basic parsing Interfaces
1.22 daniel 288: */
1.45 ! daniel 289: xmlDocPtr xmlParseDoc (CHAR *cur);
! 290: xmlDocPtr xmlParseMemory (char *buffer,
! 291: int size);
! 292: xmlDocPtr xmlParseFile (const char *filename);
! 293: int xmlSubstituteEntitiesDefault(int val);
1.22 daniel 294:
1.39 daniel 295: /**
1.22 daniel 296: * Recovery mode
297: */
1.45 ! daniel 298: xmlDocPtr xmlRecoverDoc (CHAR *cur);
! 299: xmlDocPtr xmlRecoverMemory (char *buffer,
! 300: int size);
! 301: xmlDocPtr xmlRecoverFile (const char *filename);
1.22 daniel 302:
1.39 daniel 303: /**
304: * Less common routines and SAX interfaces
1.22 daniel 305: */
1.45 ! daniel 306: int xmlParseDocument (xmlParserCtxtPtr ctxt);
! 307: xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax,
! 308: CHAR *cur,
! 309: int recovery);
! 310: xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax,
! 311: char *buffer,
! 312: int size,
! 313: int recovery);
! 314: xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax,
! 315: const char *filename,
! 316: int recovery);
! 317: xmlDtdPtr xmlParseDTD (const CHAR *ExternalID,
! 318: const CHAR *SystemID);
! 319: xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
! 320: const CHAR *ExternalID,
! 321: const CHAR *SystemID);
! 322: void xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
! 323: void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
! 324: void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
! 325: const CHAR* buffer,
! 326: const char* filename);
! 327: void xmlDefaultSAXHandlerInit(void);
! 328: void htmlDefaultSAXHandlerInit(void);
1.9 daniel 329:
1.45 ! daniel 330: /**
! 331: * Node infos
! 332: */
! 333: const xmlParserNodeInfo*
! 334: xmlParserFindNodeInfo (const xmlParserCtxt* ctxt,
1.24 daniel 335: const xmlNode* node);
1.45 ! daniel 336: void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
! 337: void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1.13 daniel 338: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
339: const xmlNode* node);
1.45 ! daniel 340: void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
! 341: const xmlParserNodeInfo* info);
! 342:
! 343: /*
! 344: * External entities handling actually implemented in xmlIO
! 345: */
1.7 daniel 346:
1.45 ! daniel 347: void xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
! 348: xmlExternalEntityLoader
! 349: xmlGetExternalEntityLoader(void);
! 350: xmlParserInputPtr
! 351: xmlLoadExternalEntity (const char *URL,
! 352: const char *ID,
! 353: xmlParserInputPtr context);
1.7 daniel 354: #ifdef __cplusplus
355: }
356: #endif
1.1 veillard 357:
358: #endif /* __XML_PARSER_H__ */
359:
Webmaster