Annotation of XML/parser.h, revision 1.66
1.1 veillard 1: /*
1.39 daniel 2: * parser.h : Interfaces, constants and types related to the XML parser.
1.6 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.23 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
9: #ifndef __XML_PARSER_H__
10: #define __XML_PARSER_H__
11:
1.2 veillard 12: #include "tree.h"
1.41 daniel 13: #include "valid.h"
1.32 daniel 14: #include "xmlIO.h"
1.55 daniel 15: #include "entities.h"
16:
1.2 veillard 17:
1.7 daniel 18: #ifdef __cplusplus
19: extern "C" {
20: #endif
21:
1.1 veillard 22: /*
23: * Constants.
24: */
25: #define XML_DEFAULT_VERSION "1.0"
26:
1.39 daniel 27: /**
28: * an xmlParserInput is an input flow for the XML processor.
29: * Each entity parsed is associated an xmlParserInput (except the
30: * few predefined ones). This is the case both for internal entities
31: * - in which case the flow is already completely in memory - or
32: * external entities - in which case we use the buf structure for
33: * progressive reading and I18N conversions to the internal UTF-8 format.
34: */
35:
1.49 daniel 36: typedef void (* xmlParserInputDeallocate)(xmlChar *);
1.58 daniel 37: typedef struct _xmlParserInput xmlParserInput;
38: typedef xmlParserInput *xmlParserInputPtr;
39: struct _xmlParserInput {
1.32 daniel 40: /* Input buffer */
41: xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
42:
1.14 daniel 43: const char *filename; /* The file analyzed, if any */
1.36 daniel 44: const char *directory; /* the directory/base of teh file */
1.56 daniel 45: const xmlChar *base; /* Base of the array to parse */
46: const xmlChar *cur; /* Current char being parsed */
47: int length; /* length if known */
1.14 daniel 48: int line; /* Current line */
49: int col; /* Current column */
1.56 daniel 50: int consumed; /* How many xmlChars already consumed */
1.25 daniel 51: xmlParserInputDeallocate free; /* function to deallocate the base */
1.60 daniel 52: const xmlChar *encoding; /* the encoding string for entity */
1.61 daniel 53: const xmlChar *version; /* the version string for entity */
1.63 daniel 54: int standalone; /* Was that entity marked standalone */
1.58 daniel 55: };
1.7 daniel 56:
1.39 daniel 57: /**
58: * the parser can be asked to collect Node informations, i.e. at what
59: * place in the file they were detected.
60: * NOTE: This is off by default and not very well tested.
61: */
1.58 daniel 62: typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
63: typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
64:
65: struct _xmlParserNodeInfo {
66: const struct _xmlNode* node;
1.13 daniel 67: /* Position & line # that text that created the node begins & ends on */
68: unsigned long begin_pos;
69: unsigned long begin_line;
70: unsigned long end_pos;
71: unsigned long end_line;
1.58 daniel 72: };
1.13 daniel 73:
1.58 daniel 74: typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
75: typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
76: struct _xmlParserNodeInfoSeq {
1.13 daniel 77: unsigned long maximum;
78: unsigned long length;
79: xmlParserNodeInfo* buffer;
1.58 daniel 80: };
1.13 daniel 81:
1.39 daniel 82: /**
1.58 daniel 83: * The parser is now working also as a state based parser
84: * The recursive one use the stagte info for entities processing
1.39 daniel 85: */
1.50 daniel 86: typedef enum {
1.56 daniel 87: XML_PARSER_EOF = -1, /* nothing is to be parsed */
88: XML_PARSER_START = 0, /* nothing has been parsed */
89: XML_PARSER_MISC, /* Misc* before int subset */
90: XML_PARSER_PI, /* Whithin a processing instruction */
91: XML_PARSER_DTD, /* within some DTD content */
92: XML_PARSER_PROLOG, /* Misc* after internal subset */
93: XML_PARSER_COMMENT, /* within a comment */
94: XML_PARSER_START_TAG, /* within a start tag */
95: XML_PARSER_CONTENT, /* within the content */
96: XML_PARSER_CDATA_SECTION, /* within a CDATA section */
97: XML_PARSER_END_TAG, /* within a closing tag */
98: XML_PARSER_ENTITY_DECL, /* within an entity declaration */
99: XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */
100: XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
1.64 daniel 101: XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */
1.56 daniel 102: XML_PARSER_EPILOG /* the Misc* after the last end tag */
1.37 daniel 103: } xmlParserInputState;
104:
1.39 daniel 105: /**
106: * The parser context.
107: * NOTE This doesn't completely defines the parser state, the (current ?)
108: * design of the parser uses recursive function calls since this allow
109: * and easy mapping from the production rules of the specification
110: * to the actual code. The drawback is that the actual function call
111: * also reflect the parser state. However most of the parsing routines
112: * takes as the only argument the parser context pointer, so migrating
113: * to a state based parser for progressive parsing shouldn't be too hard.
114: */
1.58 daniel 115: typedef struct _xmlParserCtxt xmlParserCtxt;
116: typedef xmlParserCtxt *xmlParserCtxtPtr;
117: struct _xmlParserCtxt {
118: struct _xmlSAXHandler *sax; /* The SAX handler */
1.27 daniel 119: void *userData; /* the document being built */
120: xmlDocPtr myDoc; /* the document being built */
1.49 daniel 121: int wellFormed; /* is the document well formed */
1.30 daniel 122: int replaceEntities; /* shall we replace entities ? */
1.49 daniel 123: const xmlChar *version; /* the XML version string */
124: const xmlChar *encoding; /* encoding, if any */
1.37 daniel 125: int standalone; /* standalone document */
126: int html; /* are we parsing an HTML document */
1.16 daniel 127:
1.14 daniel 128: /* Input stream stack */
1.15 daniel 129: xmlParserInputPtr input; /* Current input stream */
1.14 daniel 130: int inputNr; /* Number of current input streams */
131: int inputMax; /* Max number of input streams */
132: xmlParserInputPtr *inputTab; /* stack of inputs */
1.15 daniel 133:
1.39 daniel 134: /* Node analysis stack only used for DOM building */
1.15 daniel 135: xmlNodePtr node; /* Current parsed Node */
136: int nodeNr; /* Depth of the parsing stack */
137: int nodeMax; /* Max depth of the parsing stack */
138: xmlNodePtr *nodeTab; /* array of nodes */
1.14 daniel 139:
1.13 daniel 140: int record_info; /* Whether node info should be kept */
141: xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
1.48 daniel 142:
1.49 daniel 143: int errNo; /* error code */
144:
145: int hasExternalSubset; /* reference and external subset */
146: int hasPErefs; /* the internal subset has PE refs */
147: int external; /* are we parsing an external entity */
148:
149: int valid; /* is the document valid */
150: int validate; /* shall we try to validate ? */
151: xmlValidCtxt vctxt; /* The validity context */
152:
153: xmlParserInputState instate; /* current type of input */
154: int token; /* next char look-ahead */
155:
156: char *directory; /* the data directory */
1.51 daniel 157:
1.66 ! daniel 158: /* Node name stack */
1.51 daniel 159: xmlChar *name; /* Current parsed Node */
160: int nameNr; /* Depth of the parsing stack */
161: int nameMax; /* Max depth of the parsing stack */
162: xmlChar * *nameTab; /* array of nodes */
163:
1.53 daniel 164: long nbChars; /* number of xmlChar processed */
1.56 daniel 165: long checkIndex; /* used by progressive parsing lookup */
1.65 daniel 166: int disableSAX; /* SAX callbacks are disabled */
1.62 daniel 167: int inSubset; /* Parsing is in int 1/ext 2 subset */
1.61 daniel 168: xmlChar * intSubName; /* name of subset */
169: xmlChar * extSubURI; /* URI of external subset */
170: xmlChar * extSubSystem; /* SYSTEM ID of external subset */
1.66 ! daniel 171:
! 172: /* xml:space values */
! 173: int * space; /* Should the parser preserve spaces */
! 174: int spaceNr; /* Depth of the parsing stack */
! 175: int spaceMax; /* Max depth of the parsing stack */
! 176: int * spaceTab; /* array of space infos */
1.58 daniel 177: };
1.7 daniel 178:
1.39 daniel 179: /**
1.17 daniel 180: * a SAX Locator.
181: */
1.58 daniel 182: typedef struct _xmlSAXLocator xmlSAXLocator;
183: typedef xmlSAXLocator *xmlSAXLocatorPtr;
184: struct _xmlSAXLocator {
1.49 daniel 185: const xmlChar *(*getPublicId)(void *ctx);
186: const xmlChar *(*getSystemId)(void *ctx);
1.28 daniel 187: int (*getLineNumber)(void *ctx);
188: int (*getColumnNumber)(void *ctx);
1.58 daniel 189: };
1.17 daniel 190:
1.39 daniel 191: /**
192: * a SAX handler is bunch of callbacks called by the parser when processing
193: * of the input generate data or structure informations.
1.17 daniel 194: */
195:
1.28 daniel 196: typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
1.49 daniel 197: const xmlChar *publicId, const xmlChar *systemId);
198: typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
199: const xmlChar *ExternalID, const xmlChar *SystemID);
1.61 daniel 200: typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
201: const xmlChar *ExternalID, const xmlChar *SystemID);
1.28 daniel 202: typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
1.49 daniel 203: const xmlChar *name);
1.39 daniel 204: typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
1.49 daniel 205: const xmlChar *name);
1.28 daniel 206: typedef void (*entityDeclSAXFunc) (void *ctx,
1.49 daniel 207: const xmlChar *name, int type, const xmlChar *publicId,
208: const xmlChar *systemId, xmlChar *content);
209: typedef void (*notationDeclSAXFunc)(void *ctx, const xmlChar *name,
210: const xmlChar *publicId, const xmlChar *systemId);
211: typedef void (*attributeDeclSAXFunc)(void *ctx, const xmlChar *elem,
212: const xmlChar *name, int type, int def,
213: const xmlChar *defaultValue, xmlEnumerationPtr tree);
214: typedef void (*elementDeclSAXFunc)(void *ctx, const xmlChar *name,
1.27 daniel 215: int type, xmlElementContentPtr content);
1.28 daniel 216: typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
1.49 daniel 217: const xmlChar *name, const xmlChar *publicId,
218: const xmlChar *systemId, const xmlChar *notationName);
1.28 daniel 219: typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
1.18 daniel 220: xmlSAXLocatorPtr loc);
1.28 daniel 221: typedef void (*startDocumentSAXFunc) (void *ctx);
222: typedef void (*endDocumentSAXFunc) (void *ctx);
1.49 daniel 223: typedef void (*startElementSAXFunc) (void *ctx, const xmlChar *name,
224: const xmlChar **atts);
225: typedef void (*endElementSAXFunc) (void *ctx, const xmlChar *name);
226: typedef void (*attributeSAXFunc) (void *ctx, const xmlChar *name,
227: const xmlChar *value);
228: typedef void (*referenceSAXFunc) (void *ctx, const xmlChar *name);
229: typedef void (*charactersSAXFunc) (void *ctx, const xmlChar *ch,
1.27 daniel 230: int len);
1.28 daniel 231: typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
1.49 daniel 232: const xmlChar *ch, int len);
1.28 daniel 233: typedef void (*processingInstructionSAXFunc) (void *ctx,
1.49 daniel 234: const xmlChar *target, const xmlChar *data);
235: typedef void (*commentSAXFunc) (void *ctx, const xmlChar *value);
236: typedef void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len);
1.28 daniel 237: typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
238: typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
239: typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
240: typedef int (*isStandaloneSAXFunc) (void *ctx);
241: typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
242: typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
1.18 daniel 243:
1.58 daniel 244: typedef struct _xmlSAXHandler xmlSAXHandler;
245: typedef xmlSAXHandler *xmlSAXHandlerPtr;
246: struct _xmlSAXHandler {
1.27 daniel 247: internalSubsetSAXFunc internalSubset;
248: isStandaloneSAXFunc isStandalone;
249: hasInternalSubsetSAXFunc hasInternalSubset;
250: hasExternalSubsetSAXFunc hasExternalSubset;
1.18 daniel 251: resolveEntitySAXFunc resolveEntity;
1.27 daniel 252: getEntitySAXFunc getEntity;
253: entityDeclSAXFunc entityDecl;
1.18 daniel 254: notationDeclSAXFunc notationDecl;
1.27 daniel 255: attributeDeclSAXFunc attributeDecl;
256: elementDeclSAXFunc elementDecl;
1.18 daniel 257: unparsedEntityDeclSAXFunc unparsedEntityDecl;
258: setDocumentLocatorSAXFunc setDocumentLocator;
259: startDocumentSAXFunc startDocument;
260: endDocumentSAXFunc endDocument;
261: startElementSAXFunc startElement;
262: endElementSAXFunc endElement;
1.27 daniel 263: referenceSAXFunc reference;
1.18 daniel 264: charactersSAXFunc characters;
265: ignorableWhitespaceSAXFunc ignorableWhitespace;
266: processingInstructionSAXFunc processingInstruction;
1.27 daniel 267: commentSAXFunc comment;
1.18 daniel 268: warningSAXFunc warning;
269: errorSAXFunc error;
270: fatalErrorSAXFunc fatalError;
1.39 daniel 271: getParameterEntitySAXFunc getParameterEntity;
1.43 daniel 272: cdataBlockSAXFunc cdataBlock;
1.61 daniel 273: externalSubsetSAXFunc externalSubset;
1.58 daniel 274: };
1.17 daniel 275:
1.39 daniel 276: /**
1.57 daniel 277: * External entity loaders types
278: */
279: typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL,
280: const char *ID,
281: xmlParserCtxtPtr context);
282:
283: /**
1.47 daniel 284: * Global variables: just the default SAX interface tables and XML
285: * version infos.
1.17 daniel 286: */
1.33 daniel 287: extern const char *xmlParserVersion;
288:
1.19 daniel 289: extern xmlSAXLocator xmlDefaultSAXLocator;
290: extern xmlSAXHandler xmlDefaultSAXHandler;
1.33 daniel 291: extern xmlSAXHandler htmlDefaultSAXHandler;
1.47 daniel 292:
293: /**
294: * entity substitution default behaviour.
295: */
296:
297: extern int xmlSubstituteEntitiesDefaultValue;
1.59 daniel 298: extern int xmlGetWarningsDefaultValue;
1.19 daniel 299:
1.35 daniel 300:
1.39 daniel 301: /**
1.52 daniel 302: * Cleanup
303: */
304: void xmlCleanupParser (void);
305:
306: /**
1.35 daniel 307: * Input functions
308: */
1.45 daniel 309: int xmlParserInputRead (xmlParserInputPtr in,
310: int len);
311: int xmlParserInputGrow (xmlParserInputPtr in,
312: int len);
1.17 daniel 313:
1.39 daniel 314: /**
1.49 daniel 315: * xmlChar handling
1.2 veillard 316: */
1.49 daniel 317: xmlChar * xmlStrdup (const xmlChar *cur);
318: xmlChar * xmlStrndup (const xmlChar *cur,
1.45 daniel 319: int len);
1.49 daniel 320: xmlChar * xmlStrsub (const xmlChar *str,
1.45 daniel 321: int start,
322: int len);
1.49 daniel 323: const xmlChar * xmlStrchr (const xmlChar *str,
324: xmlChar val);
325: const xmlChar * xmlStrstr (const xmlChar *str,
326: xmlChar *val);
327: int xmlStrcmp (const xmlChar *str1,
328: const xmlChar *str2);
329: int xmlStrncmp (const xmlChar *str1,
330: const xmlChar *str2,
1.45 daniel 331: int len);
1.49 daniel 332: int xmlStrlen (const xmlChar *str);
333: xmlChar * xmlStrcat (xmlChar *cur,
334: const xmlChar *add);
335: xmlChar * xmlStrncat (xmlChar *cur,
336: const xmlChar *add,
1.45 daniel 337: int len);
1.9 daniel 338:
1.39 daniel 339: /**
340: * Basic parsing Interfaces
1.22 daniel 341: */
1.49 daniel 342: xmlDocPtr xmlParseDoc (xmlChar *cur);
1.45 daniel 343: xmlDocPtr xmlParseMemory (char *buffer,
344: int size);
345: xmlDocPtr xmlParseFile (const char *filename);
346: int xmlSubstituteEntitiesDefault(int val);
1.22 daniel 347:
1.39 daniel 348: /**
1.22 daniel 349: * Recovery mode
350: */
1.49 daniel 351: xmlDocPtr xmlRecoverDoc (xmlChar *cur);
1.45 daniel 352: xmlDocPtr xmlRecoverMemory (char *buffer,
353: int size);
354: xmlDocPtr xmlRecoverFile (const char *filename);
1.22 daniel 355:
1.39 daniel 356: /**
357: * Less common routines and SAX interfaces
1.22 daniel 358: */
1.45 daniel 359: int xmlParseDocument (xmlParserCtxtPtr ctxt);
360: xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax,
1.49 daniel 361: xmlChar *cur,
1.45 daniel 362: int recovery);
1.49 daniel 363: int xmlSAXUserParseFile (xmlSAXHandlerPtr sax,
364: void *user_data,
365: const char *filename);
366: int xmlSAXUserParseMemory (xmlSAXHandlerPtr sax,
367: void *user_data,
368: char *buffer,
369: int size);
1.45 daniel 370: xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax,
371: char *buffer,
372: int size,
373: int recovery);
374: xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax,
375: const char *filename,
376: int recovery);
1.49 daniel 377: xmlDtdPtr xmlParseDTD (const xmlChar *ExternalID,
378: const xmlChar *SystemID);
1.45 daniel 379: xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
1.49 daniel 380: const xmlChar *ExternalID,
381: const xmlChar *SystemID);
1.66 ! daniel 382: int xmlParseBalancedChunkMemory(xmlDocPtr doc,
! 383: xmlSAXHandlerPtr sax,
! 384: void *user_data,
! 385: const xmlChar *string,
! 386: xmlNodePtr *list);
! 387:
1.56 daniel 388: /**
389: * SAX initialization routines
390: */
391: void xmlDefaultSAXHandlerInit(void);
392: void htmlDefaultSAXHandlerInit(void);
393:
394: /**
395: * Parser contexts handling.
396: */
1.45 daniel 397: void xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
398: void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
1.56 daniel 399: void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
1.45 daniel 400: void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
1.49 daniel 401: const xmlChar* buffer,
1.45 daniel 402: const char* filename);
1.56 daniel 403: xmlParserCtxtPtr xmlCreateDocParserCtxt (xmlChar *cur);
404:
405: /**
406: * Interfaces for the Push mode
407: */
408: xmlParserCtxtPtr xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
409: void *user_data,
410: const char *chunk,
411: int size,
412: const char *filename);
413: int xmlParseChunk (xmlParserCtxtPtr ctxt,
414: const char *chunk,
415: int size,
416: int terminate);
1.9 daniel 417:
1.45 daniel 418: /**
419: * Node infos
420: */
421: const xmlParserNodeInfo*
422: xmlParserFindNodeInfo (const xmlParserCtxt* ctxt,
1.24 daniel 423: const xmlNode* node);
1.45 daniel 424: void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
425: void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1.13 daniel 426: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
427: const xmlNode* node);
1.45 daniel 428: void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
429: const xmlParserNodeInfo* info);
430:
431: /*
432: * External entities handling actually implemented in xmlIO
433: */
1.7 daniel 434:
1.45 daniel 435: void xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
436: xmlExternalEntityLoader
437: xmlGetExternalEntityLoader(void);
438: xmlParserInputPtr
439: xmlLoadExternalEntity (const char *URL,
440: const char *ID,
1.57 daniel 441: xmlParserCtxtPtr context);
1.55 daniel 442:
1.7 daniel 443: #ifdef __cplusplus
444: }
445: #endif
1.1 veillard 446:
447: #endif /* __XML_PARSER_H__ */
448:
Webmaster