Annotation of XML/parser.h, revision 1.48
1.1 veillard 1: /*
1.39 daniel 2: * parser.h : Interfaces, constants and types related to the XML parser.
1.6 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.23 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
9: #ifndef __XML_PARSER_H__
10: #define __XML_PARSER_H__
11:
1.2 veillard 12: #include "tree.h"
1.41 daniel 13: #include "valid.h"
1.32 daniel 14: #include "xmlIO.h"
1.2 veillard 15:
1.7 daniel 16: #ifdef __cplusplus
17: extern "C" {
18: #endif
19:
1.1 veillard 20: /*
21: * Constants.
22: */
23: #define XML_DEFAULT_VERSION "1.0"
24:
1.39 daniel 25: /**
26: * an xmlParserInput is an input flow for the XML processor.
27: * Each entity parsed is associated an xmlParserInput (except the
28: * few predefined ones). This is the case both for internal entities
29: * - in which case the flow is already completely in memory - or
30: * external entities - in which case we use the buf structure for
31: * progressive reading and I18N conversions to the internal UTF-8 format.
32: */
33:
1.25 daniel 34: typedef void (* xmlParserInputDeallocate)(CHAR *);
1.14 daniel 35: typedef struct xmlParserInput {
1.32 daniel 36: /* Input buffer */
37: xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */
38:
1.14 daniel 39: const char *filename; /* The file analyzed, if any */
1.36 daniel 40: const char *directory; /* the directory/base of teh file */
1.14 daniel 41: const CHAR *base; /* Base of the array to parse */
42: const CHAR *cur; /* Current char being parsed */
43: int line; /* Current line */
44: int col; /* Current column */
1.35 daniel 45: int consumed; /* How many CHARs were already consumed */
1.25 daniel 46: xmlParserInputDeallocate free; /* function to deallocate the base */
1.24 daniel 47: } xmlParserInput;
48: typedef xmlParserInput *xmlParserInputPtr;
1.7 daniel 49:
1.45 daniel 50: typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL,
51: const char *ID,
52: xmlParserInputPtr context);
53:
1.39 daniel 54: /**
55: * the parser can be asked to collect Node informations, i.e. at what
56: * place in the file they were detected.
57: * NOTE: This is off by default and not very well tested.
58: */
1.24 daniel 59: typedef struct _xmlParserNodeInfo {
1.13 daniel 60: const struct xmlNode* node;
61: /* Position & line # that text that created the node begins & ends on */
62: unsigned long begin_pos;
63: unsigned long begin_line;
64: unsigned long end_pos;
65: unsigned long end_line;
1.24 daniel 66: } _xmlParserNodeInfo;
67: typedef _xmlParserNodeInfo xmlParserNodeInfo;
1.13 daniel 68:
69: typedef struct xmlParserNodeInfoSeq {
70: unsigned long maximum;
71: unsigned long length;
72: xmlParserNodeInfo* buffer;
1.24 daniel 73: } _xmlParserNodeInfoSeq;
74: typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
75: typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
1.13 daniel 76:
1.39 daniel 77: /**
78: * The parser is not a state based parser, but we need to maintain
79: * minimum state informations, especially for entities processing.
80: */
1.37 daniel 81: typedef enum xmlParserInputState {
82: XML_PARSER_EOF = 0,
83: XML_PARSER_PROLOG,
84: XML_PARSER_CONTENT,
85: XML_PARSER_ENTITY_DECL,
86: XML_PARSER_ENTITY_VALUE,
87: XML_PARSER_ATTRIBUTE_VALUE,
88: XML_PARSER_DTD,
89: XML_PARSER_EPILOG,
1.38 daniel 90: XML_PARSER_COMMENT,
1.46 daniel 91: XML_PARSER_CDATA_SECTION
1.37 daniel 92: } xmlParserInputState;
93:
1.39 daniel 94: /**
95: * The parser context.
96: * NOTE This doesn't completely defines the parser state, the (current ?)
97: * design of the parser uses recursive function calls since this allow
98: * and easy mapping from the production rules of the specification
99: * to the actual code. The drawback is that the actual function call
100: * also reflect the parser state. However most of the parsing routines
101: * takes as the only argument the parser context pointer, so migrating
102: * to a state based parser for progressive parsing shouldn't be too hard.
103: */
1.24 daniel 104: typedef struct _xmlParserCtxt {
1.17 daniel 105: struct xmlSAXHandler *sax; /* The SAX handler */
1.27 daniel 106: void *userData; /* the document being built */
107: xmlDocPtr myDoc; /* the document being built */
1.30 daniel 108: int replaceEntities; /* shall we replace entities ? */
1.37 daniel 109: const CHAR *version; /* the XML version string */
110: const CHAR *encoding; /* encoding, if any */
111: int standalone; /* standalone document */
1.39 daniel 112: int hasExternalSubset; /* reference and external subset */
113: int hasPErefs; /* the internal subset has PE refs */
1.37 daniel 114: int html; /* are we parsing an HTML document */
1.39 daniel 115: int external; /* are we parsing an external entity */
116:
117: int wellFormed; /* is the document well formed */
118: int valid; /* is the document valid */
1.40 daniel 119: int validate; /* shall we try to validate ? */
1.41 daniel 120: xmlValidCtxt vctxt; /* The validity context */
1.16 daniel 121:
1.37 daniel 122: xmlParserInputState instate; /* current type of input */
1.38 daniel 123: int token; /* next char look-ahead */
1.42 daniel 124:
125: char *directory; /* the data directory */
1.37 daniel 126:
1.14 daniel 127: /* Input stream stack */
1.15 daniel 128: xmlParserInputPtr input; /* Current input stream */
1.14 daniel 129: int inputNr; /* Number of current input streams */
130: int inputMax; /* Max number of input streams */
131: xmlParserInputPtr *inputTab; /* stack of inputs */
1.15 daniel 132:
1.39 daniel 133: /* Node analysis stack only used for DOM building */
1.15 daniel 134: xmlNodePtr node; /* Current parsed Node */
135: int nodeNr; /* Depth of the parsing stack */
136: int nodeMax; /* Max depth of the parsing stack */
137: xmlNodePtr *nodeTab; /* array of nodes */
1.14 daniel 138:
1.13 daniel 139: int record_info; /* Whether node info should be kept */
140: xmlParserNodeInfoSeq node_seq; /* info about each node parsed */
1.48 ! daniel 141:
! 142: int errno; /* error code */
1.24 daniel 143: } _xmlParserCtxt;
144: typedef _xmlParserCtxt xmlParserCtxt;
145: typedef xmlParserCtxt *xmlParserCtxtPtr;
1.7 daniel 146:
1.39 daniel 147: /**
1.17 daniel 148: * a SAX Locator.
149: */
150: typedef struct xmlSAXLocator {
1.28 daniel 151: const CHAR *(*getPublicId)(void *ctx);
152: const CHAR *(*getSystemId)(void *ctx);
153: int (*getLineNumber)(void *ctx);
154: int (*getColumnNumber)(void *ctx);
1.24 daniel 155: } _xmlSAXLocator;
156: typedef _xmlSAXLocator xmlSAXLocator;
157: typedef xmlSAXLocator *xmlSAXLocatorPtr;
1.17 daniel 158:
1.39 daniel 159: /**
160: * a SAX handler is bunch of callbacks called by the parser when processing
161: * of the input generate data or structure informations.
1.17 daniel 162: */
163:
1.27 daniel 164: #include "entities.h"
165:
1.28 daniel 166: typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
1.18 daniel 167: const CHAR *publicId, const CHAR *systemId);
1.28 daniel 168: typedef void (*internalSubsetSAXFunc) (void *ctx, const CHAR *name,
1.27 daniel 169: const CHAR *ExternalID, const CHAR *SystemID);
1.28 daniel 170: typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
1.27 daniel 171: const CHAR *name);
1.39 daniel 172: typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
173: const CHAR *name);
1.28 daniel 174: typedef void (*entityDeclSAXFunc) (void *ctx,
1.27 daniel 175: const CHAR *name, int type, const CHAR *publicId,
176: const CHAR *systemId, CHAR *content);
1.28 daniel 177: typedef void (*notationDeclSAXFunc)(void *ctx, const CHAR *name,
1.18 daniel 178: const CHAR *publicId, const CHAR *systemId);
1.28 daniel 179: typedef void (*attributeDeclSAXFunc)(void *ctx, const CHAR *elem,
1.27 daniel 180: const CHAR *name, int type, int def,
181: const CHAR *defaultValue, xmlEnumerationPtr tree);
1.28 daniel 182: typedef void (*elementDeclSAXFunc)(void *ctx, const CHAR *name,
1.27 daniel 183: int type, xmlElementContentPtr content);
1.28 daniel 184: typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
1.18 daniel 185: const CHAR *name, const CHAR *publicId,
186: const CHAR *systemId, const CHAR *notationName);
1.28 daniel 187: typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
1.18 daniel 188: xmlSAXLocatorPtr loc);
1.28 daniel 189: typedef void (*startDocumentSAXFunc) (void *ctx);
190: typedef void (*endDocumentSAXFunc) (void *ctx);
191: typedef void (*startElementSAXFunc) (void *ctx, const CHAR *name,
1.27 daniel 192: const CHAR **atts);
1.28 daniel 193: typedef void (*endElementSAXFunc) (void *ctx, const CHAR *name);
194: typedef void (*attributeSAXFunc) (void *ctx, const CHAR *name,
1.19 daniel 195: const CHAR *value);
1.28 daniel 196: typedef void (*referenceSAXFunc) (void *ctx, const CHAR *name);
197: typedef void (*charactersSAXFunc) (void *ctx, const CHAR *ch,
1.27 daniel 198: int len);
1.28 daniel 199: typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
1.27 daniel 200: const CHAR *ch, int len);
1.28 daniel 201: typedef void (*processingInstructionSAXFunc) (void *ctx,
1.18 daniel 202: const CHAR *target, const CHAR *data);
1.28 daniel 203: typedef void (*commentSAXFunc) (void *ctx, const CHAR *value);
1.43 daniel 204: typedef void (*cdataBlockSAXFunc) (void *ctx, const CHAR *value, int len);
1.28 daniel 205: typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
206: typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
207: typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
208: typedef int (*isStandaloneSAXFunc) (void *ctx);
209: typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
210: typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
1.18 daniel 211:
1.17 daniel 212: typedef struct xmlSAXHandler {
1.27 daniel 213: internalSubsetSAXFunc internalSubset;
214: isStandaloneSAXFunc isStandalone;
215: hasInternalSubsetSAXFunc hasInternalSubset;
216: hasExternalSubsetSAXFunc hasExternalSubset;
1.18 daniel 217: resolveEntitySAXFunc resolveEntity;
1.27 daniel 218: getEntitySAXFunc getEntity;
219: entityDeclSAXFunc entityDecl;
1.18 daniel 220: notationDeclSAXFunc notationDecl;
1.27 daniel 221: attributeDeclSAXFunc attributeDecl;
222: elementDeclSAXFunc elementDecl;
1.18 daniel 223: unparsedEntityDeclSAXFunc unparsedEntityDecl;
224: setDocumentLocatorSAXFunc setDocumentLocator;
225: startDocumentSAXFunc startDocument;
226: endDocumentSAXFunc endDocument;
227: startElementSAXFunc startElement;
228: endElementSAXFunc endElement;
1.27 daniel 229: referenceSAXFunc reference;
1.18 daniel 230: charactersSAXFunc characters;
231: ignorableWhitespaceSAXFunc ignorableWhitespace;
232: processingInstructionSAXFunc processingInstruction;
1.27 daniel 233: commentSAXFunc comment;
1.18 daniel 234: warningSAXFunc warning;
235: errorSAXFunc error;
236: fatalErrorSAXFunc fatalError;
1.39 daniel 237: getParameterEntitySAXFunc getParameterEntity;
1.43 daniel 238: cdataBlockSAXFunc cdataBlock;
1.24 daniel 239: } xmlSAXHandler;
240: typedef xmlSAXHandler *xmlSAXHandlerPtr;
1.17 daniel 241:
1.39 daniel 242: /**
1.47 daniel 243: * Global variables: just the default SAX interface tables and XML
244: * version infos.
1.17 daniel 245: */
1.33 daniel 246: extern const char *xmlParserVersion;
247:
1.19 daniel 248: extern xmlSAXLocator xmlDefaultSAXLocator;
249: extern xmlSAXHandler xmlDefaultSAXHandler;
1.33 daniel 250: extern xmlSAXHandler htmlDefaultSAXHandler;
1.47 daniel 251:
252: /**
253: * entity substitution default behaviour.
254: */
255:
256: extern int xmlSubstituteEntitiesDefaultValue;
257:
1.19 daniel 258:
259: #include "entities.h"
1.26 daniel 260: #include "xml-error.h"
1.35 daniel 261:
1.39 daniel 262: /**
1.35 daniel 263: * Input functions
264: */
265:
1.45 daniel 266: int xmlParserInputRead (xmlParserInputPtr in,
267: int len);
268: int xmlParserInputGrow (xmlParserInputPtr in,
269: int len);
1.17 daniel 270:
1.39 daniel 271: /**
1.22 daniel 272: * CHAR handling
1.2 veillard 273: */
1.45 daniel 274: CHAR * xmlStrdup (const CHAR *cur);
275: CHAR * xmlStrndup (const CHAR *cur,
276: int len);
277: CHAR * xmlStrsub (const CHAR *str,
278: int start,
279: int len);
280: const CHAR * xmlStrchr (const CHAR *str,
281: CHAR val);
282: const CHAR * xmlStrstr (const CHAR *str,
283: CHAR *val);
284: int xmlStrcmp (const CHAR *str1,
285: const CHAR *str2);
286: int xmlStrncmp (const CHAR *str1,
287: const CHAR *str2,
288: int len);
289: int xmlStrlen (const CHAR *str);
290: CHAR * xmlStrcat (CHAR *cur,
291: const CHAR *add);
292: CHAR * xmlStrncat (CHAR *cur,
293: const CHAR *add,
294: int len);
1.9 daniel 295:
1.39 daniel 296: /**
297: * Basic parsing Interfaces
1.22 daniel 298: */
1.45 daniel 299: xmlDocPtr xmlParseDoc (CHAR *cur);
300: xmlDocPtr xmlParseMemory (char *buffer,
301: int size);
302: xmlDocPtr xmlParseFile (const char *filename);
303: int xmlSubstituteEntitiesDefault(int val);
1.22 daniel 304:
1.39 daniel 305: /**
1.22 daniel 306: * Recovery mode
307: */
1.45 daniel 308: xmlDocPtr xmlRecoverDoc (CHAR *cur);
309: xmlDocPtr xmlRecoverMemory (char *buffer,
310: int size);
311: xmlDocPtr xmlRecoverFile (const char *filename);
1.22 daniel 312:
1.39 daniel 313: /**
314: * Less common routines and SAX interfaces
1.22 daniel 315: */
1.45 daniel 316: int xmlParseDocument (xmlParserCtxtPtr ctxt);
317: xmlDocPtr xmlSAXParseDoc (xmlSAXHandlerPtr sax,
318: CHAR *cur,
319: int recovery);
320: xmlDocPtr xmlSAXParseMemory (xmlSAXHandlerPtr sax,
321: char *buffer,
322: int size,
323: int recovery);
324: xmlDocPtr xmlSAXParseFile (xmlSAXHandlerPtr sax,
325: const char *filename,
326: int recovery);
327: xmlDtdPtr xmlParseDTD (const CHAR *ExternalID,
328: const CHAR *SystemID);
329: xmlDtdPtr xmlSAXParseDTD (xmlSAXHandlerPtr sax,
330: const CHAR *ExternalID,
331: const CHAR *SystemID);
332: void xmlInitParserCtxt (xmlParserCtxtPtr ctxt);
333: void xmlClearParserCtxt (xmlParserCtxtPtr ctxt);
334: void xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
335: const CHAR* buffer,
336: const char* filename);
337: void xmlDefaultSAXHandlerInit(void);
338: void htmlDefaultSAXHandlerInit(void);
1.9 daniel 339:
1.45 daniel 340: /**
341: * Node infos
342: */
343: const xmlParserNodeInfo*
344: xmlParserFindNodeInfo (const xmlParserCtxt* ctxt,
1.24 daniel 345: const xmlNode* node);
1.45 daniel 346: void xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
347: void xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq);
1.13 daniel 348: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
349: const xmlNode* node);
1.45 daniel 350: void xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt,
351: const xmlParserNodeInfo* info);
352:
353: /*
354: * External entities handling actually implemented in xmlIO
355: */
1.7 daniel 356:
1.45 daniel 357: void xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
358: xmlExternalEntityLoader
359: xmlGetExternalEntityLoader(void);
360: xmlParserInputPtr
361: xmlLoadExternalEntity (const char *URL,
362: const char *ID,
363: xmlParserInputPtr context);
1.7 daniel 364: #ifdef __cplusplus
365: }
366: #endif
1.1 veillard 367:
368: #endif /* __XML_PARSER_H__ */
369:
Webmaster