Annotation of XML/parser.h, revision 1.69
1.1 veillard 1: /*
1.39 daniel 2: * parser.h : Interfaces, constants and types related to the XML parser.
1.6 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.23 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
9: #ifndef __XML_PARSER_H__
10: #define __XML_PARSER_H__
11:
1.2 veillard 12: #include "tree.h"
1.41 daniel 13: #include "valid.h"
1.32 daniel 14: #include "xmlIO.h"
1.55 daniel 15: #include "entities.h"
16:
1.2 veillard 17:
1.7 daniel 18: #ifdef __cplusplus
19: extern "C" {
20: #endif
21:
1.1 veillard 22: /*
23: * Constants.
24: */
25: #define XML_DEFAULT_VERSION "1.0"
26:
1.39 daniel 27: /**
28: * an xmlParserInput is an input flow for the XML processor.
29: * Each entity parsed is associated an xmlParserInput (except the
30: * few predefined ones). This is the case both for internal entities
31: * - in which case the flow is already completely in memory - or
32: * external entities - in which case we use the buf structure for
33: * progressive reading and I18N conversions to the internal UTF-8 format.
34: */
35:
1.49 daniel 36: typedef void (* xmlParserInputDeallocate)(xmlChar *);
1.58 daniel 37: typedef struct _xmlParserInput xmlParserInput;
38: typedef xmlParserInput *xmlParserInputPtr;
39: struct _xmlParserInput {
1.32 daniel 40: /* Input buffer */
41: xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
42:
1.14 daniel 43: const char *filename; /* The file analyzed, if any */
1.36 daniel 44: const char *directory; /* the directory/base of teh file */
1.56 daniel 45: const xmlChar *base; /* Base of the array to parse */
46: const xmlChar *cur; /* Current char being parsed */
47: int length; /* length if known */
1.14 daniel 48: int line; /* Current line */
49: int col; /* Current column */
1.56 daniel 50: int consumed; /* How many xmlChars already consumed */
1.25 daniel 51: xmlParserInputDeallocate free; /* function to deallocate the base */
1.60 daniel 52: const xmlChar *encoding; /* the encoding string for entity */
1.61 daniel 53: const xmlChar *version; /* the version string for entity */
1.63 daniel 54: int standalone; /* Was that entity marked standalone */
1.58 daniel 55: };
1.7 daniel 56:
1.39 daniel 57: /**
58: * the parser can be asked to collect Node informations, i.e. at what
59: * place in the file they were detected.
60: * NOTE: This is off by default and not very well tested.
61: */
1.58 daniel 62: typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
63: typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
64:
65: struct _xmlParserNodeInfo {
66: const struct _xmlNode* node;
1.13 daniel 67: /* Position & line # that text that created the node begins & ends on */
68: unsigned long begin_pos;
69: unsigned long begin_line;
70: unsigned long end_pos;
71: unsigned long end_line;
1.58 daniel 72: };
1.13 daniel 73:
1.58 daniel 74: typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
75: typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
76: struct _xmlParserNodeInfoSeq {
1.13 daniel 77: unsigned long maximum;
78: unsigned long length;
79: xmlParserNodeInfo* buffer;
1.58 daniel 80: };
1.13 daniel 81:
1.39 daniel 82: /**
1.58 daniel 83: * The parser is now working also as a state based parser
84: * The recursive one use the stagte info for entities processing
1.39 daniel 85: */
1.50 daniel 86: typedef enum {
1.56 daniel 87: XML_PARSER_EOF = -1, /* nothing is to be parsed */
88: XML_PARSER_START = 0, /* nothing has been parsed */
89: XML_PARSER_MISC, /* Misc* before int subset */
90: XML_PARSER_PI, /* Whithin a processing instruction */
91: XML_PARSER_DTD, /* within some DTD content */
92: XML_PARSER_PROLOG, /* Misc* after internal subset */
93: XML_PARSER_COMMENT, /* within a comment */
94: XML_PARSER_START_TAG, /* within a start tag */
95: XML_PARSER_CONTENT, /* within the content */
96: XML_PARSER_CDATA_SECTION, /* within a CDATA section */
97: XML_PARSER_END_TAG, /* within a closing tag */
98: XML_PARSER_ENTITY_DECL, /* within an entity declaration */
99: XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
100: XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
1.64 daniel 101: XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
1.56 daniel 102: XML_PARSER_EPILOG /* the Misc* after the last end tag */
1.37 daniel 103: } xmlParserInputState;
104:
1.39 daniel 105: /**
106: * The parser context.
107: * NOTE This doesn't completely defines the parser state, the (current ?)
108: * design of the parser uses recursive function calls since this allow
109: * and easy mapping from the production rules of the specification
110: * to the actual code. The drawback is that the actual function call
111: * also reflect the parser state. However most of the parsing routines
112: * takes as the only argument the parser context pointer, so migrating
113: * to a state based parser for progressive parsing shouldn't be too hard.
114: */
1.58 daniel 115: typedef struct _xmlParserCtxt xmlParserCtxt;
116: typedef xmlParserCtxt *xmlParserCtxtPtr;
117: struct _xmlParserCtxt {
118: struct _xmlSAXHandler *sax; /* The SAX handler */
1.27 daniel 119: void *userData; /* the document being built */
120: xmlDocPtr myDoc; /* the document being built */
1.49 daniel 121: int wellFormed; /* is the document well formed */
1.30 daniel 122: int replaceEntities; /* shall we replace entities ? */
1.49 daniel 123: const xmlChar *version; /* the XML version string */
124: const xmlChar *encoding; /* encoding, if any */
1.37 daniel 125: int standalone; /* standalone document */
126: int html; /* are we parsing an HTML document */
1.16 daniel 127:
1.14 daniel 128: /* Input stream stack */
1.15 daniel 129: xmlParserInputPtr input; /* Current input stream */
1.14 daniel 130: int inputNr; /* Number of current input streams */
131: int inputMax; /* Max number of input streams */
132: xmlParserInputPtr *inputTab; /* stack of inputs */
1.15 daniel 133:
1.39 daniel 134: /* Node analysis stack only used for DOM building */
1.15 daniel 135: xmlNodePtr node; /* Current parsed Node */
136: int nodeNr; /* Depth of the parsing stack */
137: int nodeMax; /* Max depth of the parsing stack */
138: xmlNodePtr *nodeTab; /* array of nodes */
1.14 daniel 139:
1.13 daniel 140: int record_info; /* Whether node info should be kept */
141: xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
1.48 daniel 142:
1.49 daniel 143: int errNo; /* error code */
144:
145: int hasExternalSubset; /* reference and external subset */
146: int hasPErefs; /* the internal subset has PE refs */
147: int external; /* are we parsing an external entity */
148:
149: int valid; /* is the document valid */
150: int validate; /* shall we try to validate ? */
151: xmlValidCtxt vctxt; /* The validity context */
152:
153: xmlParserInputState instate; /* current type of input */
154: int token; /* next char look-ahead */
155:
156: char *directory; /* the data directory */
1.51 daniel 157:
1.66 daniel 158: /* Node name stack */
1.51 daniel 159: xmlChar *name; /* Current parsed Node */
160: int nameNr; /* Depth of the parsing stack */
161: int nameMax; /* Max depth of the parsing stack */
162: xmlChar * *nameTab; /* array of nodes */
163:
1.53 daniel 164: long nbChars; /* number of xmlChar processed */
1.56 daniel 165: long checkIndex; /* used by progressive parsing lookup */
1.67 daniel 166: int keepBlanks; /* ugly but ... */
1.65 daniel 167: int disableSAX; /* SAX callbacks are disabled */
1.62 daniel 168: int inSubset; /* Parsing is in int 1/ext 2 subset */
1.61 daniel 169: xmlChar * intSubName; /* name of subset */
170: xmlChar * extSubURI; /* URI of external subset */
171: xmlChar * extSubSystem; /* SYSTEM ID of external subset */
1.66 daniel 172:
173: /* xml:space values */
174: int * space; /* Should the parser preserve spaces */
175: int spaceNr; /* Depth of the parsing stack */
176: int spaceMax; /* Max depth of the parsing stack */
177: int * spaceTab; /* array of space infos */
1.58 daniel 178: };
1.7 daniel 179:
1.39 daniel 180: /**
1.17 daniel 181: * a SAX Locator.
182: */
1.58 daniel 183: typedef struct _xmlSAXLocator xmlSAXLocator;
184: typedef xmlSAXLocator *xmlSAXLocatorPtr;
185: struct _xmlSAXLocator {
1.49 daniel 186: const xmlChar *(*getPublicId)(void *ctx);
187: const xmlChar *(*getSystemId)(void *ctx);
1.28 daniel 188: int (*getLineNumber)(void *ctx);
189: int (*getColumnNumber)(void *ctx);
1.58 daniel 190: };
1.17 daniel 191:
1.39 daniel 192: /**
193: * a SAX handler is bunch of callbacks called by the parser when processing
194: * of the input generate data or structure informations.
1.17 daniel 195: */
196:
1.28 daniel 197: typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
1.49 daniel 198: const xmlChar *publicId, const xmlChar *systemId);
199: typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
200: const xmlChar *ExternalID, const xmlChar *SystemID);
1.61 daniel 201: typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
202: const xmlChar *ExternalID, const xmlChar *SystemID);
1.28 daniel 203: typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
1.49 daniel 204: const xmlChar *name);
1.39 daniel 205: typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
1.49 daniel 206: const xmlChar *name);
1.28 daniel 207: typedef void (*entityDeclSAXFunc) (void *ctx,
1.49 daniel 208: const xmlChar *name, int type, const xmlChar *publicId,
209: const xmlChar *systemId, xmlChar *content);
210: typedef void (*notationDeclSAXFunc)(void *ctx, const xmlChar *name,
211: const xmlChar *publicId, const xmlChar *systemId);
212: typedef void (*attributeDeclSAXFunc)(void *ctx, const xmlChar *elem,
213: const xmlChar *name, int type, int def,
214: const xmlChar *defaultValue, xmlEnumerationPtr tree);
215: typedef void (*elementDeclSAXFunc)(void *ctx, const xmlChar *name,
1.27 daniel 216: int type, xmlElementContentPtr content);
1.28 daniel 217: typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
1.49 daniel 218: const xmlChar *name, const xmlChar *publicId,
219: const xmlChar *systemId, const xmlChar *notationName);
1.28 daniel 220: typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
1.18 daniel 221: xmlSAXLocatorPtr loc);
1.28 daniel 222: typedef void (*startDocumentSAXFunc) (void *ctx);
223: typedef void (*endDocumentSAXFunc) (void *ctx);
1.49 daniel 224: typedef void (*startElementSAXFunc) (void *ctx, const xmlChar *name,
225: const xmlChar **atts);
226: typedef void (*endElementSAXFunc) (void *ctx, const xmlChar *name);
227: typedef void (*attributeSAXFunc) (void *ctx, const xmlChar *name,
228: const xmlChar *value);
229: typedef void (*referenceSAXFunc) (void *ctx, const xmlChar *name);
230: typedef void (*charactersSAXFunc) (void *ctx, const xmlChar *ch,
1.27 daniel 231: int len);
1.28 daniel 232: typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
1.49 daniel 233: const xmlChar *ch, int len);
1.28 daniel 234: typedef void (*processingInstructionSAXFunc) (void *ctx,
1.49 daniel 235: const xmlChar *target, const xmlChar *data);
236: typedef void (*commentSAXFunc) (void *ctx, const xmlChar *value);
237: typedef void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len);
1.28 daniel 238: typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
239: typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
240: typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
241: typedef int (*isStandaloneSAXFunc) (void *ctx);
242: typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
243: typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
1.18 daniel 244:
1.58 daniel 245: typedef struct _xmlSAXHandler xmlSAXHandler;
246: typedef xmlSAXHandler *xmlSAXHandlerPtr;
247: struct _xmlSAXHandler {
1.27 daniel 248: internalSubsetSAXFunc internalSubset;
249: isStandaloneSAXFunc isStandalone;
250: hasInternalSubsetSAXFunc hasInternalSubset;
251: hasExternalSubsetSAXFunc hasExternalSubset;
1.18 daniel 252: resolveEntitySAXFunc resolveEntity;
1.27 daniel 253: getEntitySAXFunc getEntity;
254: entityDeclSAXFunc entityDecl;
1.18 daniel 255: notationDeclSAXFunc notationDecl;
1.27 daniel 256: attributeDeclSAXFunc attributeDecl;
257: elementDeclSAXFunc elementDecl;
1.18 daniel 258: unparsedEntityDeclSAXFunc unparsedEntityDecl;
259: setDocumentLocatorSAXFunc setDocumentLocator;
260: startDocumentSAXFunc startDocument;
261: endDocumentSAXFunc endDocument;
262: startElementSAXFunc startElement;
263: endElementSAXFunc endElement;
1.27 daniel 264: referenceSAXFunc reference;
1.18 daniel 265: charactersSAXFunc characters;
266: ignorableWhitespaceSAXFunc ignorableWhitespace;
267: processingInstructionSAXFunc processingInstruction;
1.27 daniel 268: commentSAXFunc comment;
1.18 daniel 269: warningSAXFunc warning;
270: errorSAXFunc error;
271: fatalErrorSAXFunc fatalError;
1.39 daniel 272: getParameterEntitySAXFunc getParameterEntity;
1.43 daniel 273: cdataBlockSAXFunc cdataBlock;
1.61 daniel 274: externalSubsetSAXFunc externalSubset;
1.58 daniel 275: };
1.17 daniel 276:
1.39 daniel 277: /**
1.57 daniel 278: * External entity loaders types
279: */
280: typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL,
281: const char *ID,
282: xmlParserCtxtPtr context);
283:
284: /**
1.47 daniel 285: * Global variables: just the default SAX interface tables and XML
286: * version infos.
1.17 daniel 287: */
1.33 daniel 288: extern const char *xmlParserVersion;
289:
1.19 daniel 290: extern xmlSAXLocator xmlDefaultSAXLocator;
291: extern xmlSAXHandler xmlDefaultSAXHandler;
1.33 daniel 292: extern xmlSAXHandler htmlDefaultSAXHandler;
1.47 daniel 293:
294: /**
295: * entity substitution default behaviour.
296: */
297:
298: extern int xmlSubstituteEntitiesDefaultValue;
1.59 daniel 299: extern int xmlGetWarningsDefaultValue;
1.19 daniel 300:
1.35 daniel 301:
1.39 daniel 302: /**
1.52 daniel 303: * Cleanup
304: */
305: void xmlCleanupParser (void);
306:
307: /**
1.35 daniel 308: * Input functions
309: */
1.45 daniel 310: int xmlParserInputRead (xmlParserInputPtr in,
311: int len);
312: int xmlParserInputGrow (xmlParserInputPtr in,
313: int len);
1.17 daniel 314:
1.39 daniel 315: /**
1.49 daniel 316: * xmlChar handling
1.2 veillard 317: */
1.49 daniel 318: xmlChar * xmlStrdup (const xmlChar *cur);
319: xmlChar * xmlStrndup (const xmlChar *cur,
1.45 daniel 320: int len);
1.49 daniel 321: xmlChar * xmlStrsub (const xmlChar *str,
1.45 daniel 322: int start,
323: int len);
1.49 daniel 324: const xmlChar * xmlStrchr (const xmlChar *str,
325: xmlChar val);
326: const xmlChar * xmlStrstr (const xmlChar *str,
327: xmlChar *val);
328: int xmlStrcmp (const xmlChar *str1,
329: const xmlChar *str2);
330: int xmlStrncmp (const xmlChar *str1,
331: const xmlChar *str2,
1.45 daniel 332: int len);
1.49 daniel 333: int xmlStrlen (const xmlChar *str);
334: xmlChar * xmlStrcat (xmlChar *cur,
335: const xmlChar *add);
336: xmlChar * xmlStrncat (xmlChar *cur,
337: const xmlChar *add,
1.45 daniel 338: int len);
1.9 daniel 339:
1.39 daniel 340: /**
341: * Basic parsing Interfaces
1.22 daniel 342: */
1.49 daniel 343: xmlDocPtr xmlParseDoc (xmlChar *cur);
1.45 daniel 344: xmlDocPtr xmlParseMemory (char *buffer,
345: int size);
346: xmlDocPtr xmlParseFile (const char *filename);
347: int xmlSubstituteEntitiesDefault(int val);
1.68 daniel 348: int xmlKeepBlanksDefault (int val);
1.22 daniel 349:
1.39 daniel 350: /**
1.22 daniel 351: * Recovery mode
352: */
1.49 daniel 353: xmlDocPtr xmlRecoverDoc (xmlChar *cur);
1.45 daniel 354: xmlDocPtr xmlRecoverMemory (char *buffer,
355: int size);
356: xmlDocPtr xmlRecoverFile (const char *filename);
1.22 daniel 357:
1.39 daniel 358: /**
359: * Less common routines and SAX interfaces
1.22 daniel 360: */
1.45 daniel 361: int xmlParseDocument (xmlParserCtxtPtr ctxt);
362: xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax,
1.49 daniel 363: xmlChar *cur,
1.45 daniel 364: int recovery);
1.49 daniel 365: int xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
366: void *user_data,
367: const char *filename);
368: int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
369: void *user_data,
370: char *buffer,
371: int size);
1.45 daniel 372: xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax,
373: char *buffer,
374: int size,
375: int recovery);
376: xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax,
377: const char *filename,
378: int recovery);
1.49 daniel 379: xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
380: const xmlChar *SystemID);
1.45 daniel 381: xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
1.49 daniel 382: const xmlChar *ExternalID,
383: const xmlChar *SystemID);
1.66 daniel 384: int xmlParseBalancedChunkMemory(xmlDocPtr doc,
385: xmlSAXHandlerPtr sax,
386: void *user_data,
387: const xmlChar *string,
388: xmlNodePtr *list);
1.69 ! daniel 389: int xmlParseExternalEntity (xmlDocPtr doc,
! 390: xmlSAXHandlerPtr sax,
! 391: void *user_data,
! 392: const char *URL,
! 393: const char *ID,
! 394: xmlNodePtr *list);
1.66 daniel 395:
1.56 daniel 396: /**
397: * SAX initialization routines
398: */
399: void xmlDefaultSAXHandlerInit(void);
400: void htmlDefaultSAXHandlerInit(void);
401:
402: /**
403: * Parser contexts handling.
404: */
1.45 daniel 405: void xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
406: void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
1.56 daniel 407: void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
1.45 daniel 408: void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
1.49 daniel 409: const xmlChar* buffer,
1.45 daniel 410: const char* filename);
1.56 daniel 411: xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
412:
413: /**
414: * Interfaces for the Push mode
415: */
416: xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
417: void *user_data,
418: const char *chunk,
419: int size,
420: const char *filename);
421: int xmlParseChunk (xmlParserCtxtPtr ctxt,
422: const char *chunk,
423: int size,
424: int terminate);
1.9 daniel 425:
1.45 daniel 426: /**
427: * Node infos
428: */
429: const xmlParserNodeInfo*
430: xmlParserFindNodeInfo (const xmlParserCtxt* ctxt,
1.24 daniel 431: const xmlNode* node);
1.45 daniel 432: void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
433: void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1.13 daniel 434: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
435: const xmlNode* node);
1.45 daniel 436: void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
437: const xmlParserNodeInfo* info);
438:
439: /*
440: * External entities handling actually implemented in xmlIO
441: */
1.7 daniel 442:
1.45 daniel 443: void xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
444: xmlExternalEntityLoader
445: xmlGetExternalEntityLoader(void);
446: xmlParserInputPtr
447: xmlLoadExternalEntity (const char *URL,
448: const char *ID,
1.57 daniel 449: xmlParserCtxtPtr context);
1.55 daniel 450:
1.7 daniel 451: #ifdef __cplusplus
452: }
453: #endif
1.1 veillard 454:
455: #endif /* __XML_PARSER_H__ */
456:
Webmaster