Annotation of libwww/modules/expat/xmlparse/xmlparse.c, revision 1.4.2.1.2.1
1.1 frystyk 1: /*
2: The contents of this file are subject to the Mozilla Public License
1.3 kahan 3: Version 1.1 (the "License"); you may not use this file except in
1.1 frystyk 4: compliance with the License. You may obtain a copy of the License at
5: http://www.mozilla.org/MPL/
6:
7: Software distributed under the License is distributed on an "AS IS"
8: basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9: License for the specific language governing rights and limitations
10: under the License.
11:
12: The Original Code is expat.
13:
14: The Initial Developer of the Original Code is James Clark.
1.3 kahan 15: Portions created by James Clark are Copyright (C) 1998, 1999
1.1 frystyk 16: James Clark. All Rights Reserved.
17:
18: Contributor(s):
1.3 kahan 19:
20: Alternatively, the contents of this file may be used under the terms
21: of the GNU General Public License (the "GPL"), in which case the
22: provisions of the GPL are applicable instead of those above. If you
23: wish to allow use of your version of this file only under the terms of
24: the GPL and not to allow others to use your version of this file under
25: the MPL, indicate your decision by deleting the provisions above and
26: replace them with the notice and other provisions required by the
27: GPL. If you do not delete the provisions above, a recipient may use
28: your version of this file under either the MPL or the GPL.
1.1 frystyk 29: */
30:
31: #include "xmldef.h"
1.3 kahan 32: #include "xmlparse.h"
1.1 frystyk 33:
34: #ifdef XML_UNICODE
35: #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
36: #define XmlConvert XmlUtf16Convert
37: #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
1.3 kahan 38: #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
1.1 frystyk 39: #define XmlEncode XmlUtf16Encode
40: #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
41: typedef unsigned short ICHAR;
42: #else
43: #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
44: #define XmlConvert XmlUtf8Convert
45: #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
1.3 kahan 46: #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
1.1 frystyk 47: #define XmlEncode XmlUtf8Encode
48: #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
49: typedef char ICHAR;
50: #endif
51:
1.3 kahan 52:
53: #ifndef XML_NS
54:
55: #define XmlInitEncodingNS XmlInitEncoding
56: #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
57: #undef XmlGetInternalEncodingNS
58: #define XmlGetInternalEncodingNS XmlGetInternalEncoding
59: #define XmlParseXmlDeclNS XmlParseXmlDecl
60:
61: #endif
62:
1.1 frystyk 63: #ifdef XML_UNICODE_WCHAR_T
64: #define XML_T(x) L ## x
65: #else
66: #define XML_T(x) x
67: #endif
68:
69: /* Round up n to be a multiple of sz, where sz is a power of 2. */
70: #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
71:
72: #include "xmltok.h"
73: #include "xmlrole.h"
74: #include "hashtable.h"
75:
76: #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
77: #define INIT_DATA_BUF_SIZE 1024
78: #define INIT_ATTS_SIZE 16
79: #define INIT_BLOCK_SIZE 1024
80: #define INIT_BUFFER_SIZE 1024
81:
1.3 kahan 82: #define EXPAND_SPARE 24
83:
84: typedef struct binding {
85: struct prefix *prefix;
86: struct binding *nextTagBinding;
87: struct binding *prevPrefixBinding;
88: const struct attribute_id *attId;
89: XML_Char *uri;
90: int uriLen;
91: int uriAlloc;
92: } BINDING;
93:
94: typedef struct prefix {
95: const XML_Char *name;
96: BINDING *binding;
97: } PREFIX;
98:
99: typedef struct {
100: const XML_Char *str;
101: const XML_Char *localPart;
102: int uriLen;
103: } TAG_NAME;
104:
1.1 frystyk 105: typedef struct tag {
106: struct tag *parent;
107: const char *rawName;
108: int rawNameLength;
1.3 kahan 109: TAG_NAME name;
1.1 frystyk 110: char *buf;
111: char *bufEnd;
1.3 kahan 112: BINDING *bindings;
1.1 frystyk 113: } TAG;
114:
115: typedef struct {
116: const XML_Char *name;
117: const XML_Char *textPtr;
118: int textLen;
119: const XML_Char *systemId;
120: const XML_Char *base;
121: const XML_Char *publicId;
122: const XML_Char *notation;
123: char open;
124: } ENTITY;
125:
126: typedef struct block {
127: struct block *next;
128: int size;
129: XML_Char s[1];
130: } BLOCK;
131:
132: typedef struct {
133: BLOCK *blocks;
134: BLOCK *freeBlocks;
135: const XML_Char *end;
136: XML_Char *ptr;
137: XML_Char *start;
138: } STRING_POOL;
139:
140: /* The XML_Char before the name is used to determine whether
141: an attribute has been specified. */
1.3 kahan 142: typedef struct attribute_id {
1.1 frystyk 143: XML_Char *name;
1.3 kahan 144: PREFIX *prefix;
1.1 frystyk 145: char maybeTokenized;
1.3 kahan 146: char xmlns;
1.1 frystyk 147: } ATTRIBUTE_ID;
148:
149: typedef struct {
150: const ATTRIBUTE_ID *id;
151: char isCdata;
152: const XML_Char *value;
153: } DEFAULT_ATTRIBUTE;
154:
155: typedef struct {
156: const XML_Char *name;
1.3 kahan 157: PREFIX *prefix;
1.1 frystyk 158: int nDefaultAtts;
159: int allocDefaultAtts;
160: DEFAULT_ATTRIBUTE *defaultAtts;
161: } ELEMENT_TYPE;
162:
163: typedef struct {
164: HASH_TABLE generalEntities;
165: HASH_TABLE elementTypes;
166: HASH_TABLE attributeIds;
1.3 kahan 167: HASH_TABLE prefixes;
1.1 frystyk 168: STRING_POOL pool;
169: int complete;
170: int standalone;
1.4 kahan 171: #ifdef XML_DTD
172: HASH_TABLE paramEntities;
173: #endif /* XML_DTD */
1.3 kahan 174: PREFIX defaultPrefix;
1.1 frystyk 175: } DTD;
176:
1.3 kahan 177: typedef struct open_internal_entity {
178: const char *internalEventPtr;
179: const char *internalEventEndPtr;
180: struct open_internal_entity *next;
181: ENTITY *entity;
182: } OPEN_INTERNAL_ENTITY;
183:
1.1 frystyk 184: typedef enum XML_Error Processor(XML_Parser parser,
185: const char *start,
186: const char *end,
187: const char **endPtr);
188:
189: static Processor prologProcessor;
190: static Processor prologInitProcessor;
191: static Processor contentProcessor;
192: static Processor cdataSectionProcessor;
1.4 kahan 193: #ifdef XML_DTD
194: static Processor ignoreSectionProcessor;
195: #endif /* XML_DTD */
1.1 frystyk 196: static Processor epilogProcessor;
197: static Processor errorProcessor;
198: static Processor externalEntityInitProcessor;
199: static Processor externalEntityInitProcessor2;
200: static Processor externalEntityInitProcessor3;
201: static Processor externalEntityContentProcessor;
202:
203: static enum XML_Error
204: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
205: static enum XML_Error
206: processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
207: static enum XML_Error
208: initializeEncoding(XML_Parser parser);
209: static enum XML_Error
1.4 kahan 210: doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
211: const char *end, int tok, const char *next, const char **nextPtr);
212: static enum XML_Error
213: processInternalParamEntity(XML_Parser parser, ENTITY *entity);
214: static enum XML_Error
1.1 frystyk 215: doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
216: const char *start, const char *end, const char **endPtr);
217: static enum XML_Error
218: doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
1.4 kahan 219: #ifdef XML_DTD
220: static enum XML_Error
221: doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
222: #endif /* XML_DTD */
1.3 kahan 223: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s,
224: TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
225: static
226: int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr);
1.1 frystyk 227: static int
228: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
229: static enum XML_Error
230: storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
231: STRING_POOL *);
232: static enum XML_Error
233: appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
234: STRING_POOL *);
235: static ATTRIBUTE_ID *
236: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.3 kahan 237: static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
1.1 frystyk 238: static enum XML_Error
1.4 kahan 239: storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.1 frystyk 240: static int
241: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.3 kahan 242: static int
243: reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.1 frystyk 244: static void
245: reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
246:
1.3 kahan 247: static const XML_Char *getContext(XML_Parser parser);
248: static int setContext(XML_Parser parser, const XML_Char *context);
1.1 frystyk 249: static void normalizePublicId(XML_Char *s);
250: static int dtdInit(DTD *);
251: static void dtdDestroy(DTD *);
252: static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
1.4 kahan 253: static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
254: #ifdef XML_DTD
255: static void dtdSwap(DTD *, DTD *);
256: #endif /* XML_DTD */
1.1 frystyk 257: static void poolInit(STRING_POOL *);
258: static void poolClear(STRING_POOL *);
259: static void poolDestroy(STRING_POOL *);
260: static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
261: const char *ptr, const char *end);
262: static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
263: const char *ptr, const char *end);
264: static int poolGrow(STRING_POOL *pool);
265: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
266: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
267:
268: #define poolStart(pool) ((pool)->start)
269: #define poolEnd(pool) ((pool)->ptr)
270: #define poolLength(pool) ((pool)->ptr - (pool)->start)
271: #define poolChop(pool) ((void)--(pool->ptr))
272: #define poolLastChar(pool) (((pool)->ptr)[-1])
273: #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
274: #define poolFinish(pool) ((pool)->start = (pool)->ptr)
275: #define poolAppendChar(pool, c) \
276: (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
277: ? 0 \
278: : ((*((pool)->ptr)++ = c), 1))
279:
280: typedef struct {
281: /* The first member must be userData so that the XML_GetUserData macro works. */
1.3 kahan 282: void *m_userData;
283: void *m_handlerArg;
284: char *m_buffer;
1.1 frystyk 285: /* first character to be parsed */
1.3 kahan 286: const char *m_bufferPtr;
1.1 frystyk 287: /* past last character to be parsed */
1.3 kahan 288: char *m_bufferEnd;
1.1 frystyk 289: /* allocated end of buffer */
1.3 kahan 290: const char *m_bufferLim;
291: long m_parseEndByteIndex;
292: const char *m_parseEndPtr;
293: XML_Char *m_dataBuf;
294: XML_Char *m_dataBufEnd;
295: XML_StartElementHandler m_startElementHandler;
296: XML_EndElementHandler m_endElementHandler;
297: XML_CharacterDataHandler m_characterDataHandler;
298: XML_ProcessingInstructionHandler m_processingInstructionHandler;
299: XML_CommentHandler m_commentHandler;
300: XML_StartCdataSectionHandler m_startCdataSectionHandler;
301: XML_EndCdataSectionHandler m_endCdataSectionHandler;
302: XML_DefaultHandler m_defaultHandler;
1.4 kahan 303: XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
304: XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
1.3 kahan 305: XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
306: XML_NotationDeclHandler m_notationDeclHandler;
307: XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
308: XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
309: XML_NotStandaloneHandler m_notStandaloneHandler;
310: XML_ExternalEntityRefHandler m_externalEntityRefHandler;
311: void *m_externalEntityRefHandlerArg;
312: XML_UnknownEncodingHandler m_unknownEncodingHandler;
313: const ENCODING *m_encoding;
314: INIT_ENCODING m_initEncoding;
1.4 kahan 315: const ENCODING *m_internalEncoding;
1.3 kahan 316: const XML_Char *m_protocolEncodingName;
317: int m_ns;
318: void *m_unknownEncodingMem;
319: void *m_unknownEncodingData;
320: void *m_unknownEncodingHandlerData;
321: void (*m_unknownEncodingRelease)(void *);
322: PROLOG_STATE m_prologState;
323: Processor *m_processor;
324: enum XML_Error m_errorCode;
325: const char *m_eventPtr;
326: const char *m_eventEndPtr;
327: const char *m_positionPtr;
328: OPEN_INTERNAL_ENTITY *m_openInternalEntities;
329: int m_defaultExpandInternalEntities;
330: int m_tagLevel;
331: ENTITY *m_declEntity;
332: const XML_Char *m_declNotationName;
333: const XML_Char *m_declNotationPublicId;
334: ELEMENT_TYPE *m_declElementType;
335: ATTRIBUTE_ID *m_declAttributeId;
336: char m_declAttributeIsCdata;
337: DTD m_dtd;
1.4 kahan 338: const XML_Char *m_curBase;
1.3 kahan 339: TAG *m_tagStack;
340: TAG *m_freeTagList;
341: BINDING *m_inheritedBindings;
342: BINDING *m_freeBindingList;
343: int m_attsSize;
344: int m_nSpecifiedAtts;
345: ATTRIBUTE *m_atts;
346: POSITION m_position;
347: STRING_POOL m_tempPool;
348: STRING_POOL m_temp2Pool;
349: char *m_groupConnector;
350: unsigned m_groupSize;
351: int m_hadExternalDoctype;
352: XML_Char m_namespaceSeparator;
1.4 kahan 353: #ifdef XML_DTD
354: enum XML_ParamEntityParsing m_paramEntityParsing;
355: XML_Parser m_parentParser;
356: #endif
1.1 frystyk 357: } Parser;
358:
1.3 kahan 359: #define userData (((Parser *)parser)->m_userData)
360: #define handlerArg (((Parser *)parser)->m_handlerArg)
361: #define startElementHandler (((Parser *)parser)->m_startElementHandler)
362: #define endElementHandler (((Parser *)parser)->m_endElementHandler)
363: #define characterDataHandler (((Parser *)parser)->m_characterDataHandler)
364: #define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler)
365: #define commentHandler (((Parser *)parser)->m_commentHandler)
366: #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler)
367: #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler)
368: #define defaultHandler (((Parser *)parser)->m_defaultHandler)
1.4 kahan 369: #define startDoctypeDeclHandler (((Parser *)parser)->m_startDoctypeDeclHandler)
370: #define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler)
1.3 kahan 371: #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler)
372: #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler)
373: #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler)
374: #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler)
375: #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler)
376: #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler)
377: #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg)
378: #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler)
379: #define encoding (((Parser *)parser)->m_encoding)
380: #define initEncoding (((Parser *)parser)->m_initEncoding)
1.4 kahan 381: #define internalEncoding (((Parser *)parser)->m_internalEncoding)
1.3 kahan 382: #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem)
383: #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData)
1.1 frystyk 384: #define unknownEncodingHandlerData \
1.3 kahan 385: (((Parser *)parser)->m_unknownEncodingHandlerData)
386: #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease)
387: #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName)
388: #define ns (((Parser *)parser)->m_ns)
389: #define prologState (((Parser *)parser)->m_prologState)
390: #define processor (((Parser *)parser)->m_processor)
391: #define errorCode (((Parser *)parser)->m_errorCode)
392: #define eventPtr (((Parser *)parser)->m_eventPtr)
393: #define eventEndPtr (((Parser *)parser)->m_eventEndPtr)
394: #define positionPtr (((Parser *)parser)->m_positionPtr)
395: #define position (((Parser *)parser)->m_position)
396: #define openInternalEntities (((Parser *)parser)->m_openInternalEntities)
397: #define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities)
398: #define tagLevel (((Parser *)parser)->m_tagLevel)
399: #define buffer (((Parser *)parser)->m_buffer)
400: #define bufferPtr (((Parser *)parser)->m_bufferPtr)
401: #define bufferEnd (((Parser *)parser)->m_bufferEnd)
402: #define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex)
403: #define parseEndPtr (((Parser *)parser)->m_parseEndPtr)
404: #define bufferLim (((Parser *)parser)->m_bufferLim)
405: #define dataBuf (((Parser *)parser)->m_dataBuf)
406: #define dataBufEnd (((Parser *)parser)->m_dataBufEnd)
407: #define dtd (((Parser *)parser)->m_dtd)
1.4 kahan 408: #define curBase (((Parser *)parser)->m_curBase)
1.3 kahan 409: #define declEntity (((Parser *)parser)->m_declEntity)
410: #define declNotationName (((Parser *)parser)->m_declNotationName)
411: #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId)
412: #define declElementType (((Parser *)parser)->m_declElementType)
413: #define declAttributeId (((Parser *)parser)->m_declAttributeId)
414: #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata)
415: #define freeTagList (((Parser *)parser)->m_freeTagList)
416: #define freeBindingList (((Parser *)parser)->m_freeBindingList)
417: #define inheritedBindings (((Parser *)parser)->m_inheritedBindings)
418: #define tagStack (((Parser *)parser)->m_tagStack)
419: #define atts (((Parser *)parser)->m_atts)
420: #define attsSize (((Parser *)parser)->m_attsSize)
421: #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts)
422: #define tempPool (((Parser *)parser)->m_tempPool)
423: #define temp2Pool (((Parser *)parser)->m_temp2Pool)
424: #define groupConnector (((Parser *)parser)->m_groupConnector)
425: #define groupSize (((Parser *)parser)->m_groupSize)
426: #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype)
427: #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator)
1.4 kahan 428: #ifdef XML_DTD
429: #define parentParser (((Parser *)parser)->m_parentParser)
430: #define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing)
431: #endif /* XML_DTD */
1.3 kahan 432:
433: #ifdef _MSC_VER
434: #ifdef _DEBUG
435: Parser *asParser(XML_Parser parser)
436: {
437: return parser;
438: }
439: #endif
440: #endif
1.1 frystyk 441:
442: XML_Parser XML_ParserCreate(const XML_Char *encodingName)
443: {
444: XML_Parser parser = malloc(sizeof(Parser));
445: if (!parser)
446: return parser;
447: processor = prologInitProcessor;
448: XmlPrologStateInit(&prologState);
449: userData = 0;
450: handlerArg = 0;
451: startElementHandler = 0;
452: endElementHandler = 0;
453: characterDataHandler = 0;
454: processingInstructionHandler = 0;
1.3 kahan 455: commentHandler = 0;
456: startCdataSectionHandler = 0;
457: endCdataSectionHandler = 0;
1.1 frystyk 458: defaultHandler = 0;
1.4 kahan 459: startDoctypeDeclHandler = 0;
460: endDoctypeDeclHandler = 0;
1.1 frystyk 461: unparsedEntityDeclHandler = 0;
462: notationDeclHandler = 0;
1.3 kahan 463: startNamespaceDeclHandler = 0;
464: endNamespaceDeclHandler = 0;
465: notStandaloneHandler = 0;
1.1 frystyk 466: externalEntityRefHandler = 0;
1.3 kahan 467: externalEntityRefHandlerArg = parser;
1.1 frystyk 468: unknownEncodingHandler = 0;
469: buffer = 0;
470: bufferPtr = 0;
471: bufferEnd = 0;
472: parseEndByteIndex = 0;
473: parseEndPtr = 0;
474: bufferLim = 0;
475: declElementType = 0;
476: declAttributeId = 0;
477: declEntity = 0;
478: declNotationName = 0;
479: declNotationPublicId = 0;
480: memset(&position, 0, sizeof(POSITION));
481: errorCode = XML_ERROR_NONE;
482: eventPtr = 0;
483: eventEndPtr = 0;
484: positionPtr = 0;
1.3 kahan 485: openInternalEntities = 0;
1.1 frystyk 486: tagLevel = 0;
487: tagStack = 0;
488: freeTagList = 0;
1.3 kahan 489: freeBindingList = 0;
490: inheritedBindings = 0;
1.1 frystyk 491: attsSize = INIT_ATTS_SIZE;
492: atts = malloc(attsSize * sizeof(ATTRIBUTE));
1.3 kahan 493: nSpecifiedAtts = 0;
1.1 frystyk 494: dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
495: groupSize = 0;
496: groupConnector = 0;
497: hadExternalDoctype = 0;
498: unknownEncodingMem = 0;
499: unknownEncodingRelease = 0;
500: unknownEncodingData = 0;
501: unknownEncodingHandlerData = 0;
1.3 kahan 502: namespaceSeparator = '!';
1.4 kahan 503: #ifdef XML_DTD
504: parentParser = 0;
505: paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
506: #endif
1.3 kahan 507: ns = 0;
1.1 frystyk 508: poolInit(&tempPool);
509: poolInit(&temp2Pool);
510: protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
1.4 kahan 511: curBase = 0;
1.1 frystyk 512: if (!dtdInit(&dtd) || !atts || !dataBuf
513: || (encodingName && !protocolEncodingName)) {
514: XML_ParserFree(parser);
515: return 0;
516: }
517: dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
518: XmlInitEncoding(&initEncoding, &encoding, 0);
1.4 kahan 519: internalEncoding = XmlGetInternalEncoding();
1.1 frystyk 520: return parser;
521: }
522:
1.3 kahan 523: XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
524: {
525: static
526: const XML_Char implicitContext[] = {
527: XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='),
528: XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'),
529: XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'),
530: XML_T('.'), XML_T('w'), XML_T('3'),
531: XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'),
532: XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'),
533: XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'),
534: XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'),
535: XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'),
536: XML_T('\0')
537: };
538:
539: XML_Parser parser = XML_ParserCreate(encodingName);
540: if (parser) {
541: XmlInitEncodingNS(&initEncoding, &encoding, 0);
542: ns = 1;
1.4 kahan 543: internalEncoding = XmlGetInternalEncodingNS();
1.3 kahan 544: namespaceSeparator = nsSep;
545: }
546: if (!setContext(parser, implicitContext)) {
547: XML_ParserFree(parser);
548: return 0;
549: }
550: return parser;
551: }
552:
553: int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
554: {
555: if (!encodingName)
556: protocolEncodingName = 0;
557: else {
558: protocolEncodingName = poolCopyString(&tempPool, encodingName);
559: if (!protocolEncodingName)
560: return 0;
561: }
562: return 1;
563: }
564:
1.1 frystyk 565: XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
1.3 kahan 566: const XML_Char *context,
1.1 frystyk 567: const XML_Char *encodingName)
568: {
569: XML_Parser parser = oldParser;
570: DTD *oldDtd = &dtd;
571: XML_StartElementHandler oldStartElementHandler = startElementHandler;
572: XML_EndElementHandler oldEndElementHandler = endElementHandler;
573: XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
574: XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
1.3 kahan 575: XML_CommentHandler oldCommentHandler = commentHandler;
576: XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler;
577: XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler;
1.1 frystyk 578: XML_DefaultHandler oldDefaultHandler = defaultHandler;
1.3 kahan 579: XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
580: XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
581: XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
1.1 frystyk 582: XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
583: XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
584: void *oldUserData = userData;
585: void *oldHandlerArg = handlerArg;
1.3 kahan 586: int oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
587: void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
1.4 kahan 588: #ifdef XML_DTD
589: int oldParamEntityParsing = paramEntityParsing;
590: #endif
1.3 kahan 591: parser = (ns
592: ? XML_ParserCreateNS(encodingName, namespaceSeparator)
593: : XML_ParserCreate(encodingName));
1.1 frystyk 594: if (!parser)
595: return 0;
596: startElementHandler = oldStartElementHandler;
597: endElementHandler = oldEndElementHandler;
598: characterDataHandler = oldCharacterDataHandler;
599: processingInstructionHandler = oldProcessingInstructionHandler;
1.3 kahan 600: commentHandler = oldCommentHandler;
601: startCdataSectionHandler = oldStartCdataSectionHandler;
602: endCdataSectionHandler = oldEndCdataSectionHandler;
1.1 frystyk 603: defaultHandler = oldDefaultHandler;
1.3 kahan 604: startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
605: endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
606: notStandaloneHandler = oldNotStandaloneHandler;
1.1 frystyk 607: externalEntityRefHandler = oldExternalEntityRefHandler;
608: unknownEncodingHandler = oldUnknownEncodingHandler;
609: userData = oldUserData;
610: if (oldUserData == oldHandlerArg)
611: handlerArg = userData;
612: else
613: handlerArg = parser;
1.3 kahan 614: if (oldExternalEntityRefHandlerArg != oldParser)
615: externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
616: defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1.4 kahan 617: #ifdef XML_DTD
618: paramEntityParsing = oldParamEntityParsing;
619: if (context) {
620: #endif /* XML_DTD */
621: if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) {
622: XML_ParserFree(parser);
623: return 0;
624: }
625: processor = externalEntityInitProcessor;
626: #ifdef XML_DTD
627: }
628: else {
629: dtdSwap(&dtd, oldDtd);
630: parentParser = oldParser;
631: XmlPrologStateInitExternalEntity(&prologState);
632: dtd.complete = 1;
633: hadExternalDoctype = 1;
1.1 frystyk 634: }
1.4 kahan 635: #endif /* XML_DTD */
1.1 frystyk 636: return parser;
637: }
638:
1.3 kahan 639: static
640: void destroyBindings(BINDING *bindings)
641: {
642: for (;;) {
643: BINDING *b = bindings;
644: if (!b)
645: break;
646: bindings = b->nextTagBinding;
647: free(b->uri);
648: free(b);
649: }
650: }
651:
1.1 frystyk 652: void XML_ParserFree(XML_Parser parser)
653: {
654: for (;;) {
655: TAG *p;
656: if (tagStack == 0) {
657: if (freeTagList == 0)
658: break;
659: tagStack = freeTagList;
660: freeTagList = 0;
661: }
662: p = tagStack;
663: tagStack = tagStack->parent;
664: free(p->buf);
1.3 kahan 665: destroyBindings(p->bindings);
1.1 frystyk 666: free(p);
667: }
1.3 kahan 668: destroyBindings(freeBindingList);
669: destroyBindings(inheritedBindings);
1.1 frystyk 670: poolDestroy(&tempPool);
671: poolDestroy(&temp2Pool);
1.4 kahan 672: #ifdef XML_DTD
673: if (parentParser) {
674: if (hadExternalDoctype)
675: dtd.complete = 0;
676: dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd);
677: }
678: #endif /* XML_DTD */
1.1 frystyk 679: dtdDestroy(&dtd);
680: free((void *)atts);
681: free(groupConnector);
682: free(buffer);
683: free(dataBuf);
684: free(unknownEncodingMem);
685: if (unknownEncodingRelease)
686: unknownEncodingRelease(unknownEncodingData);
687: free(parser);
688: }
689:
690: void XML_UseParserAsHandlerArg(XML_Parser parser)
691: {
692: handlerArg = parser;
693: }
694:
695: void XML_SetUserData(XML_Parser parser, void *p)
696: {
697: if (handlerArg == userData)
698: handlerArg = userData = p;
699: else
700: userData = p;
701: }
702:
703: int XML_SetBase(XML_Parser parser, const XML_Char *p)
704: {
705: if (p) {
706: p = poolCopyString(&dtd.pool, p);
707: if (!p)
708: return 0;
1.4 kahan 709: curBase = p;
1.1 frystyk 710: }
711: else
1.4 kahan 712: curBase = 0;
1.1 frystyk 713: return 1;
714: }
715:
716: const XML_Char *XML_GetBase(XML_Parser parser)
717: {
1.4 kahan 718: return curBase;
1.1 frystyk 719: }
720:
1.3 kahan 721: int XML_GetSpecifiedAttributeCount(XML_Parser parser)
722: {
723: return nSpecifiedAtts;
724: }
725:
1.1 frystyk 726: void XML_SetElementHandler(XML_Parser parser,
727: XML_StartElementHandler start,
728: XML_EndElementHandler end)
729: {
730: startElementHandler = start;
731: endElementHandler = end;
732: }
733:
734: void XML_SetCharacterDataHandler(XML_Parser parser,
735: XML_CharacterDataHandler handler)
736: {
737: characterDataHandler = handler;
738: }
739:
740: void XML_SetProcessingInstructionHandler(XML_Parser parser,
741: XML_ProcessingInstructionHandler handler)
742: {
743: processingInstructionHandler = handler;
744: }
745:
1.3 kahan 746: void XML_SetCommentHandler(XML_Parser parser,
747: XML_CommentHandler handler)
748: {
749: commentHandler = handler;
750: }
751:
752: void XML_SetCdataSectionHandler(XML_Parser parser,
753: XML_StartCdataSectionHandler start,
754: XML_EndCdataSectionHandler end)
755: {
756: startCdataSectionHandler = start;
757: endCdataSectionHandler = end;
758: }
759:
1.1 frystyk 760: void XML_SetDefaultHandler(XML_Parser parser,
761: XML_DefaultHandler handler)
762: {
763: defaultHandler = handler;
1.3 kahan 764: defaultExpandInternalEntities = 0;
765: }
766:
767: void XML_SetDefaultHandlerExpand(XML_Parser parser,
768: XML_DefaultHandler handler)
769: {
770: defaultHandler = handler;
771: defaultExpandInternalEntities = 1;
1.1 frystyk 772: }
773:
1.4 kahan 774: void XML_SetDoctypeDeclHandler(XML_Parser parser,
775: XML_StartDoctypeDeclHandler start,
776: XML_EndDoctypeDeclHandler end)
777: {
778: startDoctypeDeclHandler = start;
779: endDoctypeDeclHandler = end;
780: }
781:
1.1 frystyk 782: void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
783: XML_UnparsedEntityDeclHandler handler)
784: {
785: unparsedEntityDeclHandler = handler;
786: }
787:
788: void XML_SetNotationDeclHandler(XML_Parser parser,
789: XML_NotationDeclHandler handler)
790: {
791: notationDeclHandler = handler;
792: }
793:
1.3 kahan 794: void XML_SetNamespaceDeclHandler(XML_Parser parser,
795: XML_StartNamespaceDeclHandler start,
796: XML_EndNamespaceDeclHandler end)
797: {
798: startNamespaceDeclHandler = start;
799: endNamespaceDeclHandler = end;
800: }
801:
802: void XML_SetNotStandaloneHandler(XML_Parser parser,
803: XML_NotStandaloneHandler handler)
804: {
805: notStandaloneHandler = handler;
806: }
807:
1.1 frystyk 808: void XML_SetExternalEntityRefHandler(XML_Parser parser,
809: XML_ExternalEntityRefHandler handler)
810: {
811: externalEntityRefHandler = handler;
812: }
813:
1.3 kahan 814: void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
815: {
816: if (arg)
817: externalEntityRefHandlerArg = arg;
818: else
819: externalEntityRefHandlerArg = parser;
820: }
821:
1.1 frystyk 822: void XML_SetUnknownEncodingHandler(XML_Parser parser,
823: XML_UnknownEncodingHandler handler,
824: void *data)
825: {
826: unknownEncodingHandler = handler;
827: unknownEncodingHandlerData = data;
828: }
829:
1.4 kahan 830: int XML_SetParamEntityParsing(XML_Parser parser,
831: enum XML_ParamEntityParsing parsing)
832: {
833: #ifdef XML_DTD
834: paramEntityParsing = parsing;
835: return 1;
836: #else
837: return parsing == XML_PARAM_ENTITY_PARSING_NEVER;
838: #endif
839: }
840:
1.1 frystyk 841: int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
842: {
843: if (len == 0) {
844: if (!isFinal)
845: return 1;
1.3 kahan 846: positionPtr = bufferPtr;
1.1 frystyk 847: errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
848: if (errorCode == XML_ERROR_NONE)
849: return 1;
850: eventEndPtr = eventPtr;
1.4 kahan 851: processor = errorProcessor;
1.1 frystyk 852: return 0;
853: }
854: else if (bufferPtr == bufferEnd) {
855: const char *end;
856: int nLeftOver;
857: parseEndByteIndex += len;
858: positionPtr = s;
859: if (isFinal) {
860: errorCode = processor(parser, s, parseEndPtr = s + len, 0);
861: if (errorCode == XML_ERROR_NONE)
862: return 1;
863: eventEndPtr = eventPtr;
1.4 kahan 864: processor = errorProcessor;
1.1 frystyk 865: return 0;
866: }
867: errorCode = processor(parser, s, parseEndPtr = s + len, &end);
868: if (errorCode != XML_ERROR_NONE) {
869: eventEndPtr = eventPtr;
1.4 kahan 870: processor = errorProcessor;
1.1 frystyk 871: return 0;
872: }
873: XmlUpdatePosition(encoding, positionPtr, end, &position);
874: nLeftOver = s + len - end;
875: if (nLeftOver) {
876: if (buffer == 0 || nLeftOver > bufferLim - buffer) {
877: /* FIXME avoid integer overflow */
878: buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
1.4 kahan 879: /* FIXME storage leak if realloc fails */
1.1 frystyk 880: if (!buffer) {
881: errorCode = XML_ERROR_NO_MEMORY;
882: eventPtr = eventEndPtr = 0;
1.4 kahan 883: processor = errorProcessor;
1.1 frystyk 884: return 0;
885: }
886: bufferLim = buffer + len * 2;
887: }
888: memcpy(buffer, end, nLeftOver);
889: bufferPtr = buffer;
890: bufferEnd = buffer + nLeftOver;
891: }
892: return 1;
893: }
894: else {
895: memcpy(XML_GetBuffer(parser, len), s, len);
896: return XML_ParseBuffer(parser, len, isFinal);
897: }
898: }
899:
900: int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
901: {
902: const char *start = bufferPtr;
903: positionPtr = start;
904: bufferEnd += len;
905: parseEndByteIndex += len;
906: errorCode = processor(parser, start, parseEndPtr = bufferEnd,
907: isFinal ? (const char **)0 : &bufferPtr);
908: if (errorCode == XML_ERROR_NONE) {
909: if (!isFinal)
910: XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
911: return 1;
912: }
913: else {
914: eventEndPtr = eventPtr;
1.4 kahan 915: processor = errorProcessor;
1.1 frystyk 916: return 0;
917: }
918: }
919:
920: void *XML_GetBuffer(XML_Parser parser, int len)
921: {
922: if (len > bufferLim - bufferEnd) {
923: /* FIXME avoid integer overflow */
924: int neededSize = len + (bufferEnd - bufferPtr);
925: if (neededSize <= bufferLim - buffer) {
926: memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
927: bufferEnd = buffer + (bufferEnd - bufferPtr);
928: bufferPtr = buffer;
929: }
930: else {
931: char *newBuf;
932: int bufferSize = bufferLim - bufferPtr;
933: if (bufferSize == 0)
934: bufferSize = INIT_BUFFER_SIZE;
935: do {
936: bufferSize *= 2;
937: } while (bufferSize < neededSize);
938: newBuf = malloc(bufferSize);
939: if (newBuf == 0) {
940: errorCode = XML_ERROR_NO_MEMORY;
941: return 0;
942: }
943: bufferLim = newBuf + bufferSize;
944: if (bufferPtr) {
945: memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
946: free(buffer);
947: }
948: bufferEnd = newBuf + (bufferEnd - bufferPtr);
949: bufferPtr = buffer = newBuf;
950: }
951: }
952: return bufferEnd;
953: }
954:
955: enum XML_Error XML_GetErrorCode(XML_Parser parser)
956: {
957: return errorCode;
958: }
959:
960: long XML_GetCurrentByteIndex(XML_Parser parser)
961: {
962: if (eventPtr)
963: return parseEndByteIndex - (parseEndPtr - eventPtr);
964: return -1;
965: }
966:
1.3 kahan 967: int XML_GetCurrentByteCount(XML_Parser parser)
968: {
969: if (eventEndPtr && eventPtr)
970: return eventEndPtr - eventPtr;
971: return 0;
972: }
973:
1.1 frystyk 974: int XML_GetCurrentLineNumber(XML_Parser parser)
975: {
976: if (eventPtr) {
977: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
978: positionPtr = eventPtr;
979: }
980: return position.lineNumber + 1;
981: }
982:
983: int XML_GetCurrentColumnNumber(XML_Parser parser)
984: {
985: if (eventPtr) {
986: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
987: positionPtr = eventPtr;
988: }
989: return position.columnNumber;
990: }
991:
992: void XML_DefaultCurrent(XML_Parser parser)
993: {
1.3 kahan 994: if (defaultHandler) {
995: if (openInternalEntities)
996: reportDefault(parser,
1.4 kahan 997: internalEncoding,
1.3 kahan 998: openInternalEntities->internalEventPtr,
999: openInternalEntities->internalEventEndPtr);
1000: else
1001: reportDefault(parser, encoding, eventPtr, eventEndPtr);
1002: }
1.1 frystyk 1003: }
1004:
1005: const XML_LChar *XML_ErrorString(int code)
1006: {
1007: static const XML_LChar *message[] = {
1008: 0,
1009: XML_T("out of memory"),
1010: XML_T("syntax error"),
1011: XML_T("no element found"),
1012: XML_T("not well-formed"),
1013: XML_T("unclosed token"),
1014: XML_T("unclosed token"),
1015: XML_T("mismatched tag"),
1016: XML_T("duplicate attribute"),
1017: XML_T("junk after document element"),
1018: XML_T("illegal parameter entity reference"),
1019: XML_T("undefined entity"),
1020: XML_T("recursive entity reference"),
1021: XML_T("asynchronous entity"),
1022: XML_T("reference to invalid character number"),
1023: XML_T("reference to binary entity"),
1024: XML_T("reference to external entity in attribute"),
1025: XML_T("xml processing instruction not at start of external entity"),
1026: XML_T("unknown encoding"),
1027: XML_T("encoding specified in XML declaration is incorrect"),
1028: XML_T("unclosed CDATA section"),
1.3 kahan 1029: XML_T("error in processing external entity reference"),
1030: XML_T("document is not standalone")
1.1 frystyk 1031: };
1032: if (code > 0 && code < sizeof(message)/sizeof(message[0]))
1033: return message[code];
1034: return 0;
1035: }
1036:
1037: static
1038: enum XML_Error contentProcessor(XML_Parser parser,
1039: const char *start,
1040: const char *end,
1041: const char **endPtr)
1042: {
1043: return doContent(parser, 0, encoding, start, end, endPtr);
1044: }
1045:
1046: static
1047: enum XML_Error externalEntityInitProcessor(XML_Parser parser,
1048: const char *start,
1049: const char *end,
1050: const char **endPtr)
1051: {
1052: enum XML_Error result = initializeEncoding(parser);
1053: if (result != XML_ERROR_NONE)
1054: return result;
1055: processor = externalEntityInitProcessor2;
1056: return externalEntityInitProcessor2(parser, start, end, endPtr);
1057: }
1058:
1059: static
1060: enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
1061: const char *start,
1062: const char *end,
1063: const char **endPtr)
1064: {
1065: const char *next;
1066: int tok = XmlContentTok(encoding, start, end, &next);
1067: switch (tok) {
1068: case XML_TOK_BOM:
1069: start = next;
1070: break;
1071: case XML_TOK_PARTIAL:
1072: if (endPtr) {
1073: *endPtr = start;
1074: return XML_ERROR_NONE;
1075: }
1076: eventPtr = start;
1077: return XML_ERROR_UNCLOSED_TOKEN;
1078: case XML_TOK_PARTIAL_CHAR:
1079: if (endPtr) {
1080: *endPtr = start;
1081: return XML_ERROR_NONE;
1082: }
1083: eventPtr = start;
1084: return XML_ERROR_PARTIAL_CHAR;
1085: }
1086: processor = externalEntityInitProcessor3;
1087: return externalEntityInitProcessor3(parser, start, end, endPtr);
1088: }
1089:
1090: static
1091: enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
1092: const char *start,
1093: const char *end,
1094: const char **endPtr)
1095: {
1096: const char *next;
1097: int tok = XmlContentTok(encoding, start, end, &next);
1098: switch (tok) {
1099: case XML_TOK_XML_DECL:
1100: {
1101: enum XML_Error result = processXmlDecl(parser, 1, start, next);
1102: if (result != XML_ERROR_NONE)
1103: return result;
1104: start = next;
1105: }
1106: break;
1107: case XML_TOK_PARTIAL:
1108: if (endPtr) {
1109: *endPtr = start;
1110: return XML_ERROR_NONE;
1111: }
1112: eventPtr = start;
1113: return XML_ERROR_UNCLOSED_TOKEN;
1114: case XML_TOK_PARTIAL_CHAR:
1115: if (endPtr) {
1116: *endPtr = start;
1117: return XML_ERROR_NONE;
1118: }
1119: eventPtr = start;
1120: return XML_ERROR_PARTIAL_CHAR;
1121: }
1122: processor = externalEntityContentProcessor;
1123: tagLevel = 1;
1124: return doContent(parser, 1, encoding, start, end, endPtr);
1125: }
1126:
1127: static
1128: enum XML_Error externalEntityContentProcessor(XML_Parser parser,
1129: const char *start,
1130: const char *end,
1131: const char **endPtr)
1132: {
1133: return doContent(parser, 1, encoding, start, end, endPtr);
1134: }
1135:
1136: static enum XML_Error
1137: doContent(XML_Parser parser,
1138: int startTagLevel,
1139: const ENCODING *enc,
1140: const char *s,
1141: const char *end,
1142: const char **nextPtr)
1143: {
1144: const char **eventPP;
1145: const char **eventEndPP;
1146: if (enc == encoding) {
1147: eventPP = &eventPtr;
1148: eventEndPP = &eventEndPtr;
1149: }
1.3 kahan 1150: else {
1151: eventPP = &(openInternalEntities->internalEventPtr);
1152: eventEndPP = &(openInternalEntities->internalEventEndPtr);
1153: }
1154: *eventPP = s;
1.1 frystyk 1155: for (;;) {
1.3 kahan 1156: const char *next = s; /* XmlContentTok doesn't always set the last arg */
1.1 frystyk 1157: int tok = XmlContentTok(enc, s, end, &next);
1158: *eventEndPP = next;
1159: switch (tok) {
1160: case XML_TOK_TRAILING_CR:
1161: if (nextPtr) {
1162: *nextPtr = s;
1163: return XML_ERROR_NONE;
1164: }
1165: *eventEndPP = end;
1166: if (characterDataHandler) {
1.3 kahan 1167: XML_Char c = 0xA;
1.1 frystyk 1168: characterDataHandler(handlerArg, &c, 1);
1169: }
1170: else if (defaultHandler)
1171: reportDefault(parser, enc, s, end);
1172: if (startTagLevel == 0)
1173: return XML_ERROR_NO_ELEMENTS;
1174: if (tagLevel != startTagLevel)
1175: return XML_ERROR_ASYNC_ENTITY;
1176: return XML_ERROR_NONE;
1177: case XML_TOK_NONE:
1178: if (nextPtr) {
1179: *nextPtr = s;
1180: return XML_ERROR_NONE;
1181: }
1182: if (startTagLevel > 0) {
1183: if (tagLevel != startTagLevel)
1184: return XML_ERROR_ASYNC_ENTITY;
1185: return XML_ERROR_NONE;
1186: }
1187: return XML_ERROR_NO_ELEMENTS;
1188: case XML_TOK_INVALID:
1189: *eventPP = next;
1190: return XML_ERROR_INVALID_TOKEN;
1191: case XML_TOK_PARTIAL:
1192: if (nextPtr) {
1193: *nextPtr = s;
1194: return XML_ERROR_NONE;
1195: }
1196: return XML_ERROR_UNCLOSED_TOKEN;
1197: case XML_TOK_PARTIAL_CHAR:
1198: if (nextPtr) {
1199: *nextPtr = s;
1200: return XML_ERROR_NONE;
1201: }
1202: return XML_ERROR_PARTIAL_CHAR;
1203: case XML_TOK_ENTITY_REF:
1204: {
1205: const XML_Char *name;
1206: ENTITY *entity;
1207: XML_Char ch = XmlPredefinedEntityName(enc,
1208: s + enc->minBytesPerChar,
1209: next - enc->minBytesPerChar);
1210: if (ch) {
1211: if (characterDataHandler)
1212: characterDataHandler(handlerArg, &ch, 1);
1213: else if (defaultHandler)
1214: reportDefault(parser, enc, s, next);
1215: break;
1216: }
1217: name = poolStoreString(&dtd.pool, enc,
1218: s + enc->minBytesPerChar,
1219: next - enc->minBytesPerChar);
1220: if (!name)
1221: return XML_ERROR_NO_MEMORY;
1222: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1223: poolDiscard(&dtd.pool);
1224: if (!entity) {
1225: if (dtd.complete || dtd.standalone)
1226: return XML_ERROR_UNDEFINED_ENTITY;
1227: if (defaultHandler)
1228: reportDefault(parser, enc, s, next);
1229: break;
1230: }
1231: if (entity->open)
1232: return XML_ERROR_RECURSIVE_ENTITY_REF;
1233: if (entity->notation)
1234: return XML_ERROR_BINARY_ENTITY_REF;
1235: if (entity) {
1236: if (entity->textPtr) {
1237: enum XML_Error result;
1.3 kahan 1238: OPEN_INTERNAL_ENTITY openEntity;
1239: if (defaultHandler && !defaultExpandInternalEntities) {
1.1 frystyk 1240: reportDefault(parser, enc, s, next);
1241: break;
1242: }
1243: entity->open = 1;
1.3 kahan 1244: openEntity.next = openInternalEntities;
1245: openInternalEntities = &openEntity;
1246: openEntity.entity = entity;
1247: openEntity.internalEventPtr = 0;
1248: openEntity.internalEventEndPtr = 0;
1.1 frystyk 1249: result = doContent(parser,
1250: tagLevel,
1.4 kahan 1251: internalEncoding,
1.1 frystyk 1252: (char *)entity->textPtr,
1253: (char *)(entity->textPtr + entity->textLen),
1254: 0);
1255: entity->open = 0;
1.3 kahan 1256: openInternalEntities = openEntity.next;
1.1 frystyk 1257: if (result)
1258: return result;
1259: }
1260: else if (externalEntityRefHandler) {
1.3 kahan 1261: const XML_Char *context;
1.1 frystyk 1262: entity->open = 1;
1.3 kahan 1263: context = getContext(parser);
1.1 frystyk 1264: entity->open = 0;
1.3 kahan 1265: if (!context)
1.1 frystyk 1266: return XML_ERROR_NO_MEMORY;
1.3 kahan 1267: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
1268: context,
1.4 kahan 1269: entity->base,
1.3 kahan 1270: entity->systemId,
1271: entity->publicId))
1.1 frystyk 1272: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
1273: poolDiscard(&tempPool);
1274: }
1275: else if (defaultHandler)
1276: reportDefault(parser, enc, s, next);
1277: }
1278: break;
1279: }
1280: case XML_TOK_START_TAG_WITH_ATTS:
1281: if (!startElementHandler) {
1.3 kahan 1282: enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1.1 frystyk 1283: if (result)
1284: return result;
1285: }
1286: /* fall through */
1287: case XML_TOK_START_TAG_NO_ATTS:
1288: {
1289: TAG *tag;
1290: if (freeTagList) {
1291: tag = freeTagList;
1292: freeTagList = freeTagList->parent;
1293: }
1294: else {
1295: tag = malloc(sizeof(TAG));
1296: if (!tag)
1297: return XML_ERROR_NO_MEMORY;
1298: tag->buf = malloc(INIT_TAG_BUF_SIZE);
1299: if (!tag->buf)
1300: return XML_ERROR_NO_MEMORY;
1301: tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
1302: }
1.3 kahan 1303: tag->bindings = 0;
1.1 frystyk 1304: tag->parent = tagStack;
1305: tagStack = tag;
1.3 kahan 1306: tag->name.localPart = 0;
1.1 frystyk 1307: tag->rawName = s + enc->minBytesPerChar;
1308: tag->rawNameLength = XmlNameLength(enc, tag->rawName);
1309: if (nextPtr) {
1.3 kahan 1310: /* Need to guarantee that:
1311: tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */
1312: if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) {
1.1 frystyk 1313: int bufSize = tag->rawNameLength * 4;
1314: bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
1315: tag->buf = realloc(tag->buf, bufSize);
1316: if (!tag->buf)
1317: return XML_ERROR_NO_MEMORY;
1318: tag->bufEnd = tag->buf + bufSize;
1319: }
1320: memcpy(tag->buf, tag->rawName, tag->rawNameLength);
1321: tag->rawName = tag->buf;
1322: }
1323: ++tagLevel;
1324: if (startElementHandler) {
1325: enum XML_Error result;
1326: XML_Char *toPtr;
1327: for (;;) {
1328: const char *rawNameEnd = tag->rawName + tag->rawNameLength;
1329: const char *fromPtr = tag->rawName;
1330: int bufSize;
1331: if (nextPtr)
1332: toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
1333: else
1334: toPtr = (XML_Char *)tag->buf;
1.3 kahan 1335: tag->name.str = toPtr;
1.1 frystyk 1336: XmlConvert(enc,
1337: &fromPtr, rawNameEnd,
1338: (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
1339: if (fromPtr == rawNameEnd)
1340: break;
1341: bufSize = (tag->bufEnd - tag->buf) << 1;
1342: tag->buf = realloc(tag->buf, bufSize);
1343: if (!tag->buf)
1344: return XML_ERROR_NO_MEMORY;
1345: tag->bufEnd = tag->buf + bufSize;
1346: if (nextPtr)
1347: tag->rawName = tag->buf;
1348: }
1349: *toPtr = XML_T('\0');
1.3 kahan 1350: result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
1.1 frystyk 1351: if (result)
1352: return result;
1.3 kahan 1353: startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts);
1.1 frystyk 1354: poolClear(&tempPool);
1355: }
1356: else {
1.3 kahan 1357: tag->name.str = 0;
1.1 frystyk 1358: if (defaultHandler)
1359: reportDefault(parser, enc, s, next);
1360: }
1361: break;
1362: }
1363: case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1364: if (!startElementHandler) {
1.3 kahan 1365: enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1.1 frystyk 1366: if (result)
1367: return result;
1368: }
1369: /* fall through */
1370: case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1371: if (startElementHandler || endElementHandler) {
1372: const char *rawName = s + enc->minBytesPerChar;
1.3 kahan 1373: enum XML_Error result;
1374: BINDING *bindings = 0;
1375: TAG_NAME name;
1376: name.str = poolStoreString(&tempPool, enc, rawName,
1377: rawName + XmlNameLength(enc, rawName));
1378: if (!name.str)
1.1 frystyk 1379: return XML_ERROR_NO_MEMORY;
1380: poolFinish(&tempPool);
1.3 kahan 1381: result = storeAtts(parser, enc, s, &name, &bindings);
1382: if (result)
1383: return result;
1384: poolFinish(&tempPool);
1385: if (startElementHandler)
1386: startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
1.1 frystyk 1387: if (endElementHandler) {
1388: if (startElementHandler)
1389: *eventPP = *eventEndPP;
1.3 kahan 1390: endElementHandler(handlerArg, name.str);
1.1 frystyk 1391: }
1392: poolClear(&tempPool);
1.3 kahan 1393: while (bindings) {
1394: BINDING *b = bindings;
1395: if (endNamespaceDeclHandler)
1396: endNamespaceDeclHandler(handlerArg, b->prefix->name);
1397: bindings = bindings->nextTagBinding;
1398: b->nextTagBinding = freeBindingList;
1399: freeBindingList = b;
1400: b->prefix->binding = b->prevPrefixBinding;
1401: }
1.1 frystyk 1402: }
1403: else if (defaultHandler)
1404: reportDefault(parser, enc, s, next);
1405: if (tagLevel == 0)
1406: return epilogProcessor(parser, next, end, nextPtr);
1407: break;
1408: case XML_TOK_END_TAG:
1409: if (tagLevel == startTagLevel)
1410: return XML_ERROR_ASYNC_ENTITY;
1411: else {
1412: int len;
1413: const char *rawName;
1414: TAG *tag = tagStack;
1415: tagStack = tag->parent;
1416: tag->parent = freeTagList;
1417: freeTagList = tag;
1418: rawName = s + enc->minBytesPerChar*2;
1419: len = XmlNameLength(enc, rawName);
1420: if (len != tag->rawNameLength
1421: || memcmp(tag->rawName, rawName, len) != 0) {
1422: *eventPP = rawName;
1423: return XML_ERROR_TAG_MISMATCH;
1424: }
1425: --tagLevel;
1.3 kahan 1426: if (endElementHandler && tag->name.str) {
1427: if (tag->name.localPart) {
1428: XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen;
1429: const XML_Char *from = tag->name.localPart;
1430: while ((*to++ = *from++) != 0)
1431: ;
1.1 frystyk 1432: }
1.3 kahan 1433: endElementHandler(handlerArg, tag->name.str);
1.1 frystyk 1434: }
1435: else if (defaultHandler)
1436: reportDefault(parser, enc, s, next);
1.3 kahan 1437: while (tag->bindings) {
1438: BINDING *b = tag->bindings;
1439: if (endNamespaceDeclHandler)
1440: endNamespaceDeclHandler(handlerArg, b->prefix->name);
1441: tag->bindings = tag->bindings->nextTagBinding;
1442: b->nextTagBinding = freeBindingList;
1443: freeBindingList = b;
1444: b->prefix->binding = b->prevPrefixBinding;
1445: }
1.1 frystyk 1446: if (tagLevel == 0)
1447: return epilogProcessor(parser, next, end, nextPtr);
1448: }
1449: break;
1450: case XML_TOK_CHAR_REF:
1451: {
1452: int n = XmlCharRefNumber(enc, s);
1453: if (n < 0)
1454: return XML_ERROR_BAD_CHAR_REF;
1455: if (characterDataHandler) {
1456: XML_Char buf[XML_ENCODE_MAX];
1457: characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1458: }
1459: else if (defaultHandler)
1460: reportDefault(parser, enc, s, next);
1461: }
1462: break;
1463: case XML_TOK_XML_DECL:
1464: return XML_ERROR_MISPLACED_XML_PI;
1465: case XML_TOK_DATA_NEWLINE:
1466: if (characterDataHandler) {
1.3 kahan 1467: XML_Char c = 0xA;
1.1 frystyk 1468: characterDataHandler(handlerArg, &c, 1);
1469: }
1470: else if (defaultHandler)
1471: reportDefault(parser, enc, s, next);
1472: break;
1473: case XML_TOK_CDATA_SECT_OPEN:
1474: {
1475: enum XML_Error result;
1.3 kahan 1476: if (startCdataSectionHandler)
1477: startCdataSectionHandler(handlerArg);
1478: #if 0
1479: /* Suppose you doing a transformation on a document that involves
1480: changing only the character data. You set up a defaultHandler
1481: and a characterDataHandler. The defaultHandler simply copies
1482: characters through. The characterDataHandler does the transformation
1483: and writes the characters out escaping them as necessary. This case
1484: will fail to work if we leave out the following two lines (because &
1485: and < inside CDATA sections will be incorrectly escaped).
1486:
1487: However, now we have a start/endCdataSectionHandler, so it seems
1488: easier to let the user deal with this. */
1489:
1490: else if (characterDataHandler)
1.1 frystyk 1491: characterDataHandler(handlerArg, dataBuf, 0);
1.3 kahan 1492: #endif
1.1 frystyk 1493: else if (defaultHandler)
1494: reportDefault(parser, enc, s, next);
1495: result = doCdataSection(parser, enc, &next, end, nextPtr);
1496: if (!next) {
1497: processor = cdataSectionProcessor;
1498: return result;
1499: }
1500: }
1501: break;
1502: case XML_TOK_TRAILING_RSQB:
1503: if (nextPtr) {
1504: *nextPtr = s;
1505: return XML_ERROR_NONE;
1506: }
1507: if (characterDataHandler) {
1508: if (MUST_CONVERT(enc, s)) {
1509: ICHAR *dataPtr = (ICHAR *)dataBuf;
1510: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1511: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1512: }
1513: else
1514: characterDataHandler(handlerArg,
1515: (XML_Char *)s,
1516: (XML_Char *)end - (XML_Char *)s);
1517: }
1518: else if (defaultHandler)
1519: reportDefault(parser, enc, s, end);
1520: if (startTagLevel == 0) {
1521: *eventPP = end;
1522: return XML_ERROR_NO_ELEMENTS;
1523: }
1524: if (tagLevel != startTagLevel) {
1525: *eventPP = end;
1526: return XML_ERROR_ASYNC_ENTITY;
1527: }
1528: return XML_ERROR_NONE;
1529: case XML_TOK_DATA_CHARS:
1530: if (characterDataHandler) {
1531: if (MUST_CONVERT(enc, s)) {
1532: for (;;) {
1533: ICHAR *dataPtr = (ICHAR *)dataBuf;
1534: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1535: *eventEndPP = s;
1536: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1537: if (s == next)
1538: break;
1539: *eventPP = s;
1540: }
1541: }
1542: else
1543: characterDataHandler(handlerArg,
1544: (XML_Char *)s,
1545: (XML_Char *)next - (XML_Char *)s);
1546: }
1547: else if (defaultHandler)
1548: reportDefault(parser, enc, s, next);
1549: break;
1550: case XML_TOK_PI:
1551: if (!reportProcessingInstruction(parser, enc, s, next))
1552: return XML_ERROR_NO_MEMORY;
1553: break;
1.3 kahan 1554: case XML_TOK_COMMENT:
1555: if (!reportComment(parser, enc, s, next))
1556: return XML_ERROR_NO_MEMORY;
1557: break;
1.1 frystyk 1558: default:
1559: if (defaultHandler)
1560: reportDefault(parser, enc, s, next);
1561: break;
1562: }
1563: *eventPP = s = next;
1564: }
1565: /* not reached */
1566: }
1567:
1.3 kahan 1568: /* If tagNamePtr is non-null, build a real list of attributes,
1.1 frystyk 1569: otherwise just check the attributes for well-formedness. */
1570:
1571: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1.4 kahan 1572: const char *attStr, TAG_NAME *tagNamePtr,
1.3 kahan 1573: BINDING **bindingsPtr)
1.1 frystyk 1574: {
1575: ELEMENT_TYPE *elementType = 0;
1576: int nDefaultAtts = 0;
1.4 kahan 1577: const XML_Char **appAtts; /* the attribute list to pass to the application */
1.3 kahan 1578: int attIndex = 0;
1.1 frystyk 1579: int i;
1580: int n;
1.3 kahan 1581: int nPrefixes = 0;
1582: BINDING *binding;
1583: const XML_Char *localPart;
1584:
1.4 kahan 1585: /* lookup the element type name */
1.3 kahan 1586: if (tagNamePtr) {
1587: elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0);
1588: if (!elementType) {
1589: tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str);
1590: if (!tagNamePtr->str)
1591: return XML_ERROR_NO_MEMORY;
1592: elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE));
1593: if (!elementType)
1594: return XML_ERROR_NO_MEMORY;
1595: if (ns && !setElementTypePrefix(parser, elementType))
1596: return XML_ERROR_NO_MEMORY;
1597: }
1598: nDefaultAtts = elementType->nDefaultAtts;
1.1 frystyk 1599: }
1.4 kahan 1600: /* get the attributes from the tokenizer */
1601: n = XmlGetAttributes(enc, attStr, attsSize, atts);
1.1 frystyk 1602: if (n + nDefaultAtts > attsSize) {
1603: int oldAttsSize = attsSize;
1604: attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1605: atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
1606: if (!atts)
1607: return XML_ERROR_NO_MEMORY;
1608: if (n > oldAttsSize)
1.4 kahan 1609: XmlGetAttributes(enc, attStr, n, atts);
1.1 frystyk 1610: }
1611: appAtts = (const XML_Char **)atts;
1612: for (i = 0; i < n; i++) {
1.4 kahan 1613: /* add the name and value to the attribute list */
1.1 frystyk 1614: ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1.3 kahan 1615: atts[i].name
1616: + XmlNameLength(enc, atts[i].name));
1.1 frystyk 1617: if (!attId)
1618: return XML_ERROR_NO_MEMORY;
1.4 kahan 1619: /* detect duplicate attributes */
1.1 frystyk 1620: if ((attId->name)[-1]) {
1621: if (enc == encoding)
1622: eventPtr = atts[i].name;
1623: return XML_ERROR_DUPLICATE_ATTRIBUTE;
1624: }
1625: (attId->name)[-1] = 1;
1.3 kahan 1626: appAtts[attIndex++] = attId->name;
1.1 frystyk 1627: if (!atts[i].normalized) {
1628: enum XML_Error result;
1629: int isCdata = 1;
1630:
1.4 kahan 1631: /* figure out whether declared as other than CDATA */
1.1 frystyk 1632: if (attId->maybeTokenized) {
1633: int j;
1634: for (j = 0; j < nDefaultAtts; j++) {
1635: if (attId == elementType->defaultAtts[j].id) {
1636: isCdata = elementType->defaultAtts[j].isCdata;
1637: break;
1638: }
1639: }
1640: }
1641:
1.4 kahan 1642: /* normalize the attribute value */
1.1 frystyk 1643: result = storeAttributeValue(parser, enc, isCdata,
1644: atts[i].valuePtr, atts[i].valueEnd,
1645: &tempPool);
1646: if (result)
1647: return result;
1.3 kahan 1648: if (tagNamePtr) {
1649: appAtts[attIndex] = poolStart(&tempPool);
1.1 frystyk 1650: poolFinish(&tempPool);
1651: }
1652: else
1653: poolDiscard(&tempPool);
1654: }
1.3 kahan 1655: else if (tagNamePtr) {
1.4 kahan 1656: /* the value did not need normalizing */
1.3 kahan 1657: appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
1658: if (appAtts[attIndex] == 0)
1.1 frystyk 1659: return XML_ERROR_NO_MEMORY;
1660: poolFinish(&tempPool);
1661: }
1.4 kahan 1662: /* handle prefixed attribute names */
1.3 kahan 1663: if (attId->prefix && tagNamePtr) {
1664: if (attId->xmlns) {
1.4 kahan 1665: /* deal with namespace declarations here */
1.3 kahan 1666: if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr))
1667: return XML_ERROR_NO_MEMORY;
1668: --attIndex;
1669: }
1670: else {
1.4 kahan 1671: /* deal with other prefixed names later */
1.3 kahan 1672: attIndex++;
1673: nPrefixes++;
1674: (attId->name)[-1] = 2;
1675: }
1676: }
1677: else
1678: attIndex++;
1.1 frystyk 1679: }
1.3 kahan 1680: nSpecifiedAtts = attIndex;
1.4 kahan 1681: /* do attribute defaulting */
1.3 kahan 1682: if (tagNamePtr) {
1.1 frystyk 1683: int j;
1684: for (j = 0; j < nDefaultAtts; j++) {
1685: const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
1686: if (!(da->id->name)[-1] && da->value) {
1.3 kahan 1687: if (da->id->prefix) {
1688: if (da->id->xmlns) {
1689: if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr))
1690: return XML_ERROR_NO_MEMORY;
1691: }
1692: else {
1693: (da->id->name)[-1] = 2;
1694: nPrefixes++;
1695: appAtts[attIndex++] = da->id->name;
1696: appAtts[attIndex++] = da->value;
1697: }
1698: }
1699: else {
1700: (da->id->name)[-1] = 1;
1701: appAtts[attIndex++] = da->id->name;
1702: appAtts[attIndex++] = da->value;
1703: }
1704: }
1705: }
1706: appAtts[attIndex] = 0;
1707: }
1708: i = 0;
1709: if (nPrefixes) {
1.4 kahan 1710: /* expand prefixed attribute names */
1.3 kahan 1711: for (; i < attIndex; i += 2) {
1712: if (appAtts[i][-1] == 2) {
1713: ATTRIBUTE_ID *id;
1714: ((XML_Char *)(appAtts[i]))[-1] = 0;
1715: id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0);
1716: if (id->prefix->binding) {
1717: int j;
1718: const BINDING *b = id->prefix->binding;
1719: const XML_Char *s = appAtts[i];
1720: for (j = 0; j < b->uriLen; j++) {
1721: if (!poolAppendChar(&tempPool, b->uri[j]))
1722: return XML_ERROR_NO_MEMORY;
1723: }
1724: while (*s++ != ':')
1725: ;
1726: do {
1727: if (!poolAppendChar(&tempPool, *s))
1728: return XML_ERROR_NO_MEMORY;
1729: } while (*s++);
1730: appAtts[i] = poolStart(&tempPool);
1731: poolFinish(&tempPool);
1732: }
1733: if (!--nPrefixes)
1734: break;
1.1 frystyk 1735: }
1.3 kahan 1736: else
1737: ((XML_Char *)(appAtts[i]))[-1] = 0;
1.1 frystyk 1738: }
1739: }
1.4 kahan 1740: /* clear the flags that say whether attributes were specified */
1.3 kahan 1741: for (; i < attIndex; i += 2)
1742: ((XML_Char *)(appAtts[i]))[-1] = 0;
1743: if (!tagNamePtr)
1744: return XML_ERROR_NONE;
1745: for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
1746: binding->attId->name[-1] = 0;
1.4 kahan 1747: /* expand the element type name */
1.3 kahan 1748: if (elementType->prefix) {
1749: binding = elementType->prefix->binding;
1750: if (!binding)
1751: return XML_ERROR_NONE;
1752: localPart = tagNamePtr->str;
1753: while (*localPart++ != XML_T(':'))
1754: ;
1755: }
1756: else if (dtd.defaultPrefix.binding) {
1757: binding = dtd.defaultPrefix.binding;
1758: localPart = tagNamePtr->str;
1759: }
1760: else
1761: return XML_ERROR_NONE;
1762: tagNamePtr->localPart = localPart;
1763: tagNamePtr->uriLen = binding->uriLen;
1764: i = binding->uriLen;
1765: do {
1766: if (i == binding->uriAlloc) {
1.4 kahan 1767: binding->uri = realloc(binding->uri, (binding->uriAlloc *= 2) * sizeof(XML_Char));
1.3 kahan 1768: if (!binding->uri)
1769: return XML_ERROR_NO_MEMORY;
1770: }
1771: binding->uri[i++] = *localPart;
1772: } while (*localPart++);
1773: tagNamePtr->str = binding->uri;
1.1 frystyk 1774: return XML_ERROR_NONE;
1775: }
1776:
1.3 kahan 1777: static
1778: int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr)
1779: {
1780: BINDING *b;
1781: int len;
1782: for (len = 0; uri[len]; len++)
1783: ;
1784: if (namespaceSeparator)
1785: len++;
1786: if (freeBindingList) {
1787: b = freeBindingList;
1788: if (len > b->uriAlloc) {
1.4 kahan 1789: b->uri = realloc(b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
1.3 kahan 1790: if (!b->uri)
1791: return 0;
1792: b->uriAlloc = len + EXPAND_SPARE;
1793: }
1794: freeBindingList = b->nextTagBinding;
1795: }
1796: else {
1797: b = malloc(sizeof(BINDING));
1798: if (!b)
1799: return 0;
1.4 kahan 1800: b->uri = malloc(sizeof(XML_Char) * (len + EXPAND_SPARE));
1.3 kahan 1801: if (!b->uri) {
1802: free(b);
1803: return 0;
1804: }
1.4 kahan 1805: b->uriAlloc = len + EXPAND_SPARE;
1.3 kahan 1806: }
1807: b->uriLen = len;
1808: memcpy(b->uri, uri, len * sizeof(XML_Char));
1809: if (namespaceSeparator)
1810: b->uri[len - 1] = namespaceSeparator;
1811: b->prefix = prefix;
1812: b->attId = attId;
1813: b->prevPrefixBinding = prefix->binding;
1814: if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix)
1815: prefix->binding = 0;
1816: else
1817: prefix->binding = b;
1818: b->nextTagBinding = *bindingsPtr;
1819: *bindingsPtr = b;
1820: if (startNamespaceDeclHandler)
1821: startNamespaceDeclHandler(handlerArg, prefix->name,
1822: prefix->binding ? uri : 0);
1823: return 1;
1824: }
1825:
1.1 frystyk 1826: /* The idea here is to avoid using stack for each CDATA section when
1827: the whole file is parsed with one call. */
1828:
1829: static
1830: enum XML_Error cdataSectionProcessor(XML_Parser parser,
1831: const char *start,
1832: const char *end,
1833: const char **endPtr)
1834: {
1835: enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
1836: if (start) {
1837: processor = contentProcessor;
1838: return contentProcessor(parser, start, end, endPtr);
1839: }
1840: return result;
1841: }
1842:
1843: /* startPtr gets set to non-null is the section is closed, and to null if
1844: the section is not yet closed. */
1845:
1846: static
1847: enum XML_Error doCdataSection(XML_Parser parser,
1848: const ENCODING *enc,
1849: const char **startPtr,
1850: const char *end,
1851: const char **nextPtr)
1852: {
1853: const char *s = *startPtr;
1854: const char **eventPP;
1855: const char **eventEndPP;
1856: if (enc == encoding) {
1857: eventPP = &eventPtr;
1858: *eventPP = s;
1859: eventEndPP = &eventEndPtr;
1860: }
1.3 kahan 1861: else {
1862: eventPP = &(openInternalEntities->internalEventPtr);
1863: eventEndPP = &(openInternalEntities->internalEventEndPtr);
1864: }
1865: *eventPP = s;
1.1 frystyk 1866: *startPtr = 0;
1867: for (;;) {
1868: const char *next;
1869: int tok = XmlCdataSectionTok(enc, s, end, &next);
1870: *eventEndPP = next;
1871: switch (tok) {
1872: case XML_TOK_CDATA_SECT_CLOSE:
1.3 kahan 1873: if (endCdataSectionHandler)
1874: endCdataSectionHandler(handlerArg);
1875: #if 0
1876: /* see comment under XML_TOK_CDATA_SECT_OPEN */
1877: else if (characterDataHandler)
1.1 frystyk 1878: characterDataHandler(handlerArg, dataBuf, 0);
1.3 kahan 1879: #endif
1.1 frystyk 1880: else if (defaultHandler)
1881: reportDefault(parser, enc, s, next);
1882: *startPtr = next;
1883: return XML_ERROR_NONE;
1884: case XML_TOK_DATA_NEWLINE:
1885: if (characterDataHandler) {
1.3 kahan 1886: XML_Char c = 0xA;
1.1 frystyk 1887: characterDataHandler(handlerArg, &c, 1);
1888: }
1889: else if (defaultHandler)
1890: reportDefault(parser, enc, s, next);
1891: break;
1892: case XML_TOK_DATA_CHARS:
1893: if (characterDataHandler) {
1894: if (MUST_CONVERT(enc, s)) {
1895: for (;;) {
1896: ICHAR *dataPtr = (ICHAR *)dataBuf;
1897: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1898: *eventEndPP = next;
1899: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1900: if (s == next)
1901: break;
1902: *eventPP = s;
1903: }
1904: }
1905: else
1906: characterDataHandler(handlerArg,
1907: (XML_Char *)s,
1908: (XML_Char *)next - (XML_Char *)s);
1909: }
1910: else if (defaultHandler)
1911: reportDefault(parser, enc, s, next);
1912: break;
1913: case XML_TOK_INVALID:
1914: *eventPP = next;
1915: return XML_ERROR_INVALID_TOKEN;
1916: case XML_TOK_PARTIAL_CHAR:
1917: if (nextPtr) {
1918: *nextPtr = s;
1919: return XML_ERROR_NONE;
1920: }
1921: return XML_ERROR_PARTIAL_CHAR;
1922: case XML_TOK_PARTIAL:
1923: case XML_TOK_NONE:
1924: if (nextPtr) {
1925: *nextPtr = s;
1926: return XML_ERROR_NONE;
1927: }
1928: return XML_ERROR_UNCLOSED_CDATA_SECTION;
1929: default:
1930: abort();
1931: }
1932: *eventPP = s = next;
1933: }
1934: /* not reached */
1935: }
1936:
1.4 kahan 1937: #ifdef XML_DTD
1938:
1939: /* The idea here is to avoid using stack for each IGNORE section when
1940: the whole file is parsed with one call. */
1941:
1942: static
1943: enum XML_Error ignoreSectionProcessor(XML_Parser parser,
1944: const char *start,
1945: const char *end,
1946: const char **endPtr)
1947: {
1948: enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr);
1949: if (start) {
1950: processor = prologProcessor;
1951: return prologProcessor(parser, start, end, endPtr);
1952: }
1953: return result;
1954: }
1955:
1956: /* startPtr gets set to non-null is the section is closed, and to null if
1957: the section is not yet closed. */
1958:
1959: static
1960: enum XML_Error doIgnoreSection(XML_Parser parser,
1961: const ENCODING *enc,
1962: const char **startPtr,
1963: const char *end,
1964: const char **nextPtr)
1965: {
1966: const char *next;
1967: int tok;
1968: const char *s = *startPtr;
1969: const char **eventPP;
1970: const char **eventEndPP;
1971: if (enc == encoding) {
1972: eventPP = &eventPtr;
1973: *eventPP = s;
1974: eventEndPP = &eventEndPtr;
1975: }
1976: else {
1977: eventPP = &(openInternalEntities->internalEventPtr);
1978: eventEndPP = &(openInternalEntities->internalEventEndPtr);
1979: }
1980: *eventPP = s;
1981: *startPtr = 0;
1982: tok = XmlIgnoreSectionTok(enc, s, end, &next);
1983: *eventEndPP = next;
1984: switch (tok) {
1985: case XML_TOK_IGNORE_SECT:
1986: if (defaultHandler)
1987: reportDefault(parser, enc, s, next);
1988: *startPtr = next;
1989: return XML_ERROR_NONE;
1990: case XML_TOK_INVALID:
1991: *eventPP = next;
1992: return XML_ERROR_INVALID_TOKEN;
1993: case XML_TOK_PARTIAL_CHAR:
1994: if (nextPtr) {
1995: *nextPtr = s;
1996: return XML_ERROR_NONE;
1997: }
1998: return XML_ERROR_PARTIAL_CHAR;
1999: case XML_TOK_PARTIAL:
2000: case XML_TOK_NONE:
2001: if (nextPtr) {
2002: *nextPtr = s;
2003: return XML_ERROR_NONE;
2004: }
2005: return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
2006: default:
2007: abort();
2008: }
2009: /* not reached */
2010: }
2011:
2012: #endif /* XML_DTD */
2013:
1.1 frystyk 2014: static enum XML_Error
2015: initializeEncoding(XML_Parser parser)
2016: {
2017: const char *s;
2018: #ifdef XML_UNICODE
2019: char encodingBuf[128];
2020: if (!protocolEncodingName)
2021: s = 0;
2022: else {
2023: int i;
2024: for (i = 0; protocolEncodingName[i]; i++) {
2025: if (i == sizeof(encodingBuf) - 1
2026: || protocolEncodingName[i] >= 0x80
2027: || protocolEncodingName[i] < 0) {
2028: encodingBuf[0] = '\0';
2029: break;
2030: }
2031: encodingBuf[i] = (char)protocolEncodingName[i];
2032: }
2033: encodingBuf[i] = '\0';
2034: s = encodingBuf;
2035: }
2036: #else
2037: s = protocolEncodingName;
2038: #endif
1.3 kahan 2039: if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
1.1 frystyk 2040: return XML_ERROR_NONE;
2041: return handleUnknownEncoding(parser, protocolEncodingName);
2042: }
2043:
2044: static enum XML_Error
2045: processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
2046: const char *s, const char *next)
2047: {
2048: const char *encodingName = 0;
2049: const ENCODING *newEncoding = 0;
2050: const char *version;
2051: int standalone = -1;
1.3 kahan 2052: if (!(ns
2053: ? XmlParseXmlDeclNS
2054: : XmlParseXmlDecl)(isGeneralTextEntity,
2055: encoding,
2056: s,
2057: next,
2058: &eventPtr,
2059: &version,
2060: &encodingName,
2061: &newEncoding,
2062: &standalone))
1.1 frystyk 2063: return XML_ERROR_SYNTAX;
1.4 kahan 2064: if (!isGeneralTextEntity && standalone == 1) {
1.1 frystyk 2065: dtd.standalone = 1;
1.4 kahan 2066: #ifdef XML_DTD
2067: if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
2068: paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
2069: #endif /* XML_DTD */
2070: }
1.1 frystyk 2071: if (defaultHandler)
2072: reportDefault(parser, encoding, s, next);
2073: if (!protocolEncodingName) {
2074: if (newEncoding) {
2075: if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
2076: eventPtr = encodingName;
2077: return XML_ERROR_INCORRECT_ENCODING;
2078: }
2079: encoding = newEncoding;
2080: }
2081: else if (encodingName) {
2082: enum XML_Error result;
2083: const XML_Char *s = poolStoreString(&tempPool,
2084: encoding,
2085: encodingName,
2086: encodingName
2087: + XmlNameLength(encoding, encodingName));
2088: if (!s)
2089: return XML_ERROR_NO_MEMORY;
2090: result = handleUnknownEncoding(parser, s);
2091: poolDiscard(&tempPool);
2092: if (result == XML_ERROR_UNKNOWN_ENCODING)
2093: eventPtr = encodingName;
2094: return result;
2095: }
2096: }
2097: return XML_ERROR_NONE;
2098: }
2099:
2100: static enum XML_Error
2101: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
2102: {
2103: if (unknownEncodingHandler) {
2104: XML_Encoding info;
2105: int i;
2106: for (i = 0; i < 256; i++)
2107: info.map[i] = -1;
2108: info.convert = 0;
2109: info.data = 0;
2110: info.release = 0;
2111: if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
2112: ENCODING *enc;
2113: unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding());
2114: if (!unknownEncodingMem) {
2115: if (info.release)
2116: info.release(info.data);
2117: return XML_ERROR_NO_MEMORY;
2118: }
1.3 kahan 2119: enc = (ns
2120: ? XmlInitUnknownEncodingNS
2121: : XmlInitUnknownEncoding)(unknownEncodingMem,
2122: info.map,
2123: info.convert,
2124: info.data);
1.1 frystyk 2125: if (enc) {
2126: unknownEncodingData = info.data;
2127: unknownEncodingRelease = info.release;
2128: encoding = enc;
2129: return XML_ERROR_NONE;
2130: }
2131: }
2132: if (info.release)
2133: info.release(info.data);
2134: }
2135: return XML_ERROR_UNKNOWN_ENCODING;
2136: }
2137:
2138: static enum XML_Error
2139: prologInitProcessor(XML_Parser parser,
2140: const char *s,
2141: const char *end,
2142: const char **nextPtr)
2143: {
2144: enum XML_Error result = initializeEncoding(parser);
2145: if (result != XML_ERROR_NONE)
2146: return result;
2147: processor = prologProcessor;
2148: return prologProcessor(parser, s, end, nextPtr);
2149: }
2150:
2151: static enum XML_Error
2152: prologProcessor(XML_Parser parser,
2153: const char *s,
2154: const char *end,
2155: const char **nextPtr)
2156: {
1.4 kahan 2157: const char *next;
2158: int tok = XmlPrologTok(encoding, s, end, &next);
2159: return doProlog(parser, encoding, s, end, tok, next, nextPtr);
2160: }
2161:
2162: static enum XML_Error
2163: doProlog(XML_Parser parser,
2164: const ENCODING *enc,
2165: const char *s,
2166: const char *end,
2167: int tok,
2168: const char *next,
2169: const char **nextPtr)
2170: {
2171: #ifdef XML_DTD
2172: static const XML_Char externalSubsetName[] = { '#' , '\0' };
2173: #endif /* XML_DTD */
2174:
2175: const char **eventPP;
2176: const char **eventEndPP;
2177: if (enc == encoding) {
2178: eventPP = &eventPtr;
2179: eventEndPP = &eventEndPtr;
2180: }
2181: else {
2182: eventPP = &(openInternalEntities->internalEventPtr);
2183: eventEndPP = &(openInternalEntities->internalEventEndPtr);
2184: }
1.1 frystyk 2185: for (;;) {
1.4 kahan 2186: int role;
2187: *eventPP = s;
2188: *eventEndPP = next;
1.1 frystyk 2189: if (tok <= 0) {
2190: if (nextPtr != 0 && tok != XML_TOK_INVALID) {
2191: *nextPtr = s;
2192: return XML_ERROR_NONE;
2193: }
2194: switch (tok) {
2195: case XML_TOK_INVALID:
1.4 kahan 2196: *eventPP = next;
1.1 frystyk 2197: return XML_ERROR_INVALID_TOKEN;
2198: case XML_TOK_PARTIAL:
2199: return XML_ERROR_UNCLOSED_TOKEN;
2200: case XML_TOK_PARTIAL_CHAR:
2201: return XML_ERROR_PARTIAL_CHAR;
1.4 kahan 2202: case XML_TOK_NONE:
2203: #ifdef XML_DTD
2204: if (enc != encoding)
2205: return XML_ERROR_NONE;
2206: if (parentParser) {
2207: if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
2208: == XML_ROLE_ERROR)
2209: return XML_ERROR_SYNTAX;
2210: hadExternalDoctype = 0;
2211: return XML_ERROR_NONE;
2212: }
2213: #endif /* XML_DTD */
1.1 frystyk 2214: return XML_ERROR_NO_ELEMENTS;
2215: default:
1.4 kahan 2216: tok = -tok;
2217: next = end;
2218: break;
1.1 frystyk 2219: }
2220: }
1.4 kahan 2221: role = XmlTokenRole(&prologState, tok, s, next, enc);
2222: switch (role) {
1.1 frystyk 2223: case XML_ROLE_XML_DECL:
2224: {
2225: enum XML_Error result = processXmlDecl(parser, 0, s, next);
2226: if (result != XML_ERROR_NONE)
2227: return result;
1.4 kahan 2228: enc = encoding;
2229: }
2230: break;
2231: case XML_ROLE_DOCTYPE_NAME:
2232: if (startDoctypeDeclHandler) {
2233: const XML_Char *name = poolStoreString(&tempPool, enc, s, next);
2234: if (!name)
2235: return XML_ERROR_NO_MEMORY;
2236: startDoctypeDeclHandler(handlerArg, name);
2237: poolClear(&tempPool);
1.1 frystyk 2238: }
2239: break;
1.4 kahan 2240: #ifdef XML_DTD
2241: case XML_ROLE_TEXT_DECL:
2242: {
2243: enum XML_Error result = processXmlDecl(parser, 1, s, next);
2244: if (result != XML_ERROR_NONE)
2245: return result;
2246: enc = encoding;
2247: }
1.1 frystyk 2248: break;
1.4 kahan 2249: #endif /* XML_DTD */
1.1 frystyk 2250: case XML_ROLE_DOCTYPE_PUBLIC_ID:
1.4 kahan 2251: #ifdef XML_DTD
2252: declEntity = (ENTITY *)lookup(&dtd.paramEntities,
2253: externalSubsetName,
2254: sizeof(ENTITY));
2255: if (!declEntity)
2256: return XML_ERROR_NO_MEMORY;
2257: #endif /* XML_DTD */
2258: /* fall through */
1.1 frystyk 2259: case XML_ROLE_ENTITY_PUBLIC_ID:
1.4 kahan 2260: if (!XmlIsPublicId(enc, s, next, eventPP))
1.1 frystyk 2261: return XML_ERROR_SYNTAX;
2262: if (declEntity) {
2263: XML_Char *tem = poolStoreString(&dtd.pool,
1.4 kahan 2264: enc,
2265: s + enc->minBytesPerChar,
2266: next - enc->minBytesPerChar);
1.1 frystyk 2267: if (!tem)
2268: return XML_ERROR_NO_MEMORY;
2269: normalizePublicId(tem);
2270: declEntity->publicId = tem;
2271: poolFinish(&dtd.pool);
2272: }
2273: break;
1.4 kahan 2274: case XML_ROLE_DOCTYPE_CLOSE:
2275: if (dtd.complete && hadExternalDoctype) {
2276: dtd.complete = 0;
2277: #ifdef XML_DTD
2278: if (paramEntityParsing && externalEntityRefHandler) {
2279: ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities,
2280: externalSubsetName,
2281: 0);
2282: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
2283: 0,
2284: entity->base,
2285: entity->systemId,
2286: entity->publicId))
2287: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2288: }
2289: #endif /* XML_DTD */
2290: if (!dtd.complete
2291: && !dtd.standalone
2292: && notStandaloneHandler
2293: && !notStandaloneHandler(handlerArg))
2294: return XML_ERROR_NOT_STANDALONE;
2295: }
2296: if (endDoctypeDeclHandler)
2297: endDoctypeDeclHandler(handlerArg);
2298: break;
1.1 frystyk 2299: case XML_ROLE_INSTANCE_START:
2300: processor = contentProcessor;
2301: return contentProcessor(parser, s, end, nextPtr);
2302: case XML_ROLE_ATTLIST_ELEMENT_NAME:
2303: {
1.4 kahan 2304: const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next);
1.1 frystyk 2305: if (!name)
2306: return XML_ERROR_NO_MEMORY;
2307: declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
2308: if (!declElementType)
2309: return XML_ERROR_NO_MEMORY;
2310: if (declElementType->name != name)
2311: poolDiscard(&dtd.pool);
1.3 kahan 2312: else {
1.1 frystyk 2313: poolFinish(&dtd.pool);
1.3 kahan 2314: if (!setElementTypePrefix(parser, declElementType))
2315: return XML_ERROR_NO_MEMORY;
2316: }
1.1 frystyk 2317: break;
2318: }
2319: case XML_ROLE_ATTRIBUTE_NAME:
1.4 kahan 2320: declAttributeId = getAttributeId(parser, enc, s, next);
1.1 frystyk 2321: if (!declAttributeId)
2322: return XML_ERROR_NO_MEMORY;
2323: declAttributeIsCdata = 0;
2324: break;
2325: case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
2326: declAttributeIsCdata = 1;
2327: break;
2328: case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
2329: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
2330: if (dtd.complete
2331: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
2332: return XML_ERROR_NO_MEMORY;
2333: break;
2334: case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
2335: case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
2336: {
2337: const XML_Char *attVal;
2338: enum XML_Error result
1.4 kahan 2339: = storeAttributeValue(parser, enc, declAttributeIsCdata,
2340: s + enc->minBytesPerChar,
2341: next - enc->minBytesPerChar,
1.1 frystyk 2342: &dtd.pool);
2343: if (result)
2344: return result;
2345: attVal = poolStart(&dtd.pool);
2346: poolFinish(&dtd.pool);
2347: if (dtd.complete
2348: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
2349: return XML_ERROR_NO_MEMORY;
2350: break;
2351: }
2352: case XML_ROLE_ENTITY_VALUE:
2353: {
1.4 kahan 2354: enum XML_Error result = storeEntityValue(parser, enc,
2355: s + enc->minBytesPerChar,
2356: next - enc->minBytesPerChar);
2357: if (declEntity) {
2358: declEntity->textPtr = poolStart(&dtd.pool);
2359: declEntity->textLen = poolLength(&dtd.pool);
2360: poolFinish(&dtd.pool);
2361: }
2362: else
2363: poolDiscard(&dtd.pool);
1.1 frystyk 2364: if (result != XML_ERROR_NONE)
2365: return result;
2366: }
2367: break;
1.4 kahan 2368: case XML_ROLE_DOCTYPE_SYSTEM_ID:
2369: if (!dtd.standalone
2370: #ifdef XML_DTD
2371: && !paramEntityParsing
2372: #endif /* XML_DTD */
2373: && notStandaloneHandler
2374: && !notStandaloneHandler(handlerArg))
2375: return XML_ERROR_NOT_STANDALONE;
2376: hadExternalDoctype = 1;
2377: #ifndef XML_DTD
2378: break;
2379: #else /* XML_DTD */
2380: if (!declEntity) {
2381: declEntity = (ENTITY *)lookup(&dtd.paramEntities,
2382: externalSubsetName,
2383: sizeof(ENTITY));
2384: if (!declEntity)
2385: return XML_ERROR_NO_MEMORY;
2386: }
2387: /* fall through */
2388: #endif /* XML_DTD */
1.1 frystyk 2389: case XML_ROLE_ENTITY_SYSTEM_ID:
2390: if (declEntity) {
1.4 kahan 2391: declEntity->systemId = poolStoreString(&dtd.pool, enc,
2392: s + enc->minBytesPerChar,
2393: next - enc->minBytesPerChar);
1.1 frystyk 2394: if (!declEntity->systemId)
2395: return XML_ERROR_NO_MEMORY;
1.4 kahan 2396: declEntity->base = curBase;
1.1 frystyk 2397: poolFinish(&dtd.pool);
2398: }
2399: break;
2400: case XML_ROLE_ENTITY_NOTATION_NAME:
2401: if (declEntity) {
1.4 kahan 2402: declEntity->notation = poolStoreString(&dtd.pool, enc, s, next);
1.1 frystyk 2403: if (!declEntity->notation)
2404: return XML_ERROR_NO_MEMORY;
2405: poolFinish(&dtd.pool);
2406: if (unparsedEntityDeclHandler) {
1.4 kahan 2407: *eventEndPP = s;
1.1 frystyk 2408: unparsedEntityDeclHandler(handlerArg,
2409: declEntity->name,
2410: declEntity->base,
2411: declEntity->systemId,
2412: declEntity->publicId,
2413: declEntity->notation);
2414: }
2415:
2416: }
2417: break;
2418: case XML_ROLE_GENERAL_ENTITY_NAME:
2419: {
2420: const XML_Char *name;
1.4 kahan 2421: if (XmlPredefinedEntityName(enc, s, next)) {
1.1 frystyk 2422: declEntity = 0;
2423: break;
2424: }
1.4 kahan 2425: name = poolStoreString(&dtd.pool, enc, s, next);
1.1 frystyk 2426: if (!name)
2427: return XML_ERROR_NO_MEMORY;
2428: if (dtd.complete) {
2429: declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
2430: if (!declEntity)
2431: return XML_ERROR_NO_MEMORY;
2432: if (declEntity->name != name) {
2433: poolDiscard(&dtd.pool);
2434: declEntity = 0;
2435: }
2436: else
2437: poolFinish(&dtd.pool);
2438: }
2439: else {
2440: poolDiscard(&dtd.pool);
2441: declEntity = 0;
2442: }
2443: }
2444: break;
2445: case XML_ROLE_PARAM_ENTITY_NAME:
1.4 kahan 2446: #ifdef XML_DTD
2447: if (dtd.complete) {
2448: const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next);
2449: if (!name)
2450: return XML_ERROR_NO_MEMORY;
2451: declEntity = (ENTITY *)lookup(&dtd.paramEntities, name, sizeof(ENTITY));
2452: if (!declEntity)
2453: return XML_ERROR_NO_MEMORY;
2454: if (declEntity->name != name) {
2455: poolDiscard(&dtd.pool);
2456: declEntity = 0;
2457: }
2458: else
2459: poolFinish(&dtd.pool);
2460: }
2461: #else /* not XML_DTD */
1.1 frystyk 2462: declEntity = 0;
1.4 kahan 2463: #endif /* not XML_DTD */
1.1 frystyk 2464: break;
2465: case XML_ROLE_NOTATION_NAME:
2466: declNotationPublicId = 0;
2467: declNotationName = 0;
2468: if (notationDeclHandler) {
1.4 kahan 2469: declNotationName = poolStoreString(&tempPool, enc, s, next);
1.1 frystyk 2470: if (!declNotationName)
2471: return XML_ERROR_NO_MEMORY;
2472: poolFinish(&tempPool);
2473: }
2474: break;
2475: case XML_ROLE_NOTATION_PUBLIC_ID:
1.4 kahan 2476: if (!XmlIsPublicId(enc, s, next, eventPP))
1.1 frystyk 2477: return XML_ERROR_SYNTAX;
2478: if (declNotationName) {
2479: XML_Char *tem = poolStoreString(&tempPool,
1.4 kahan 2480: enc,
2481: s + enc->minBytesPerChar,
2482: next - enc->minBytesPerChar);
1.1 frystyk 2483: if (!tem)
2484: return XML_ERROR_NO_MEMORY;
2485: normalizePublicId(tem);
2486: declNotationPublicId = tem;
2487: poolFinish(&tempPool);
2488: }
2489: break;
2490: case XML_ROLE_NOTATION_SYSTEM_ID:
2491: if (declNotationName && notationDeclHandler) {
2492: const XML_Char *systemId
1.4 kahan 2493: = poolStoreString(&tempPool, enc,
2494: s + enc->minBytesPerChar,
2495: next - enc->minBytesPerChar);
1.1 frystyk 2496: if (!systemId)
2497: return XML_ERROR_NO_MEMORY;
1.4 kahan 2498: *eventEndPP = s;
1.1 frystyk 2499: notationDeclHandler(handlerArg,
2500: declNotationName,
1.4 kahan 2501: curBase,
1.1 frystyk 2502: systemId,
2503: declNotationPublicId);
2504: }
2505: poolClear(&tempPool);
2506: break;
2507: case XML_ROLE_NOTATION_NO_SYSTEM_ID:
2508: if (declNotationPublicId && notationDeclHandler) {
1.4 kahan 2509: *eventEndPP = s;
1.1 frystyk 2510: notationDeclHandler(handlerArg,
2511: declNotationName,
1.4 kahan 2512: curBase,
1.1 frystyk 2513: 0,
2514: declNotationPublicId);
2515: }
2516: poolClear(&tempPool);
2517: break;
2518: case XML_ROLE_ERROR:
2519: switch (tok) {
2520: case XML_TOK_PARAM_ENTITY_REF:
2521: return XML_ERROR_PARAM_ENTITY_REF;
2522: case XML_TOK_XML_DECL:
2523: return XML_ERROR_MISPLACED_XML_PI;
2524: default:
2525: return XML_ERROR_SYNTAX;
2526: }
1.4 kahan 2527: #ifdef XML_DTD
2528: case XML_ROLE_IGNORE_SECT:
2529: {
2530: enum XML_Error result;
2531: if (defaultHandler)
2532: reportDefault(parser, enc, s, next);
2533: result = doIgnoreSection(parser, enc, &next, end, nextPtr);
2534: if (!next) {
2535: processor = ignoreSectionProcessor;
2536: return result;
2537: }
2538: }
2539: break;
2540: #endif /* XML_DTD */
1.1 frystyk 2541: case XML_ROLE_GROUP_OPEN:
2542: if (prologState.level >= groupSize) {
2543: if (groupSize)
2544: groupConnector = realloc(groupConnector, groupSize *= 2);
2545: else
2546: groupConnector = malloc(groupSize = 32);
2547: if (!groupConnector)
2548: return XML_ERROR_NO_MEMORY;
2549: }
2550: groupConnector[prologState.level] = 0;
2551: break;
2552: case XML_ROLE_GROUP_SEQUENCE:
1.4 kahan 2553: if (groupConnector[prologState.level] == '|')
1.1 frystyk 2554: return XML_ERROR_SYNTAX;
2555: groupConnector[prologState.level] = ',';
2556: break;
2557: case XML_ROLE_GROUP_CHOICE:
1.4 kahan 2558: if (groupConnector[prologState.level] == ',')
1.1 frystyk 2559: return XML_ERROR_SYNTAX;
2560: groupConnector[prologState.level] = '|';
2561: break;
2562: case XML_ROLE_PARAM_ENTITY_REF:
1.4 kahan 2563: #ifdef XML_DTD
2564: case XML_ROLE_INNER_PARAM_ENTITY_REF:
2565: if (paramEntityParsing
2566: && (dtd.complete || role == XML_ROLE_INNER_PARAM_ENTITY_REF)) {
2567: const XML_Char *name;
2568: ENTITY *entity;
2569: name = poolStoreString(&dtd.pool, enc,
2570: s + enc->minBytesPerChar,
2571: next - enc->minBytesPerChar);
2572: if (!name)
2573: return XML_ERROR_NO_MEMORY;
2574: entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
2575: poolDiscard(&dtd.pool);
2576: if (!entity) {
2577: /* FIXME what to do if !dtd.complete? */
2578: return XML_ERROR_UNDEFINED_ENTITY;
2579: }
2580: if (entity->open)
2581: return XML_ERROR_RECURSIVE_ENTITY_REF;
2582: if (entity->textPtr) {
2583: enum XML_Error result;
2584: result = processInternalParamEntity(parser, entity);
2585: if (result != XML_ERROR_NONE)
2586: return result;
2587: break;
2588: }
2589: if (role == XML_ROLE_INNER_PARAM_ENTITY_REF)
2590: return XML_ERROR_PARAM_ENTITY_REF;
2591: if (externalEntityRefHandler) {
2592: dtd.complete = 0;
2593: entity->open = 1;
2594: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
2595: 0,
2596: entity->base,
2597: entity->systemId,
2598: entity->publicId)) {
2599: entity->open = 0;
2600: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2601: }
2602: entity->open = 0;
2603: if (dtd.complete)
2604: break;
2605: }
2606: }
2607: #endif /* XML_DTD */
1.3 kahan 2608: if (!dtd.standalone
2609: && notStandaloneHandler
2610: && !notStandaloneHandler(handlerArg))
2611: return XML_ERROR_NOT_STANDALONE;
1.1 frystyk 2612: dtd.complete = 0;
1.4 kahan 2613: if (defaultHandler)
2614: reportDefault(parser, enc, s, next);
1.1 frystyk 2615: break;
2616: case XML_ROLE_NONE:
2617: switch (tok) {
2618: case XML_TOK_PI:
1.4 kahan 2619: if (!reportProcessingInstruction(parser, enc, s, next))
1.1 frystyk 2620: return XML_ERROR_NO_MEMORY;
2621: break;
1.3 kahan 2622: case XML_TOK_COMMENT:
1.4 kahan 2623: if (!reportComment(parser, enc, s, next))
1.3 kahan 2624: return XML_ERROR_NO_MEMORY;
2625: break;
1.1 frystyk 2626: }
2627: break;
2628: }
2629: if (defaultHandler) {
2630: switch (tok) {
2631: case XML_TOK_PI:
1.3 kahan 2632: case XML_TOK_COMMENT:
1.1 frystyk 2633: case XML_TOK_BOM:
2634: case XML_TOK_XML_DECL:
1.4 kahan 2635: #ifdef XML_DTD
2636: case XML_TOK_IGNORE_SECT:
2637: #endif /* XML_DTD */
2638: case XML_TOK_PARAM_ENTITY_REF:
1.1 frystyk 2639: break;
2640: default:
1.4 kahan 2641: #ifdef XML_DTD
2642: if (role != XML_ROLE_IGNORE_SECT)
2643: #endif /* XML_DTD */
2644: reportDefault(parser, enc, s, next);
1.1 frystyk 2645: }
2646: }
2647: s = next;
1.4 kahan 2648: tok = XmlPrologTok(enc, s, end, &next);
1.1 frystyk 2649: }
2650: /* not reached */
2651: }
2652:
2653: static
2654: enum XML_Error epilogProcessor(XML_Parser parser,
2655: const char *s,
2656: const char *end,
2657: const char **nextPtr)
2658: {
2659: processor = epilogProcessor;
2660: eventPtr = s;
2661: for (;;) {
2662: const char *next;
2663: int tok = XmlPrologTok(encoding, s, end, &next);
2664: eventEndPtr = next;
2665: switch (tok) {
1.4 kahan 2666: case -XML_TOK_PROLOG_S:
1.1 frystyk 2667: if (defaultHandler) {
2668: eventEndPtr = end;
2669: reportDefault(parser, encoding, s, end);
2670: }
2671: /* fall through */
2672: case XML_TOK_NONE:
2673: if (nextPtr)
2674: *nextPtr = end;
2675: return XML_ERROR_NONE;
2676: case XML_TOK_PROLOG_S:
2677: if (defaultHandler)
2678: reportDefault(parser, encoding, s, next);
2679: break;
2680: case XML_TOK_PI:
2681: if (!reportProcessingInstruction(parser, encoding, s, next))
2682: return XML_ERROR_NO_MEMORY;
2683: break;
1.3 kahan 2684: case XML_TOK_COMMENT:
2685: if (!reportComment(parser, encoding, s, next))
2686: return XML_ERROR_NO_MEMORY;
2687: break;
1.1 frystyk 2688: case XML_TOK_INVALID:
2689: eventPtr = next;
2690: return XML_ERROR_INVALID_TOKEN;
2691: case XML_TOK_PARTIAL:
2692: if (nextPtr) {
2693: *nextPtr = s;
2694: return XML_ERROR_NONE;
2695: }
2696: return XML_ERROR_UNCLOSED_TOKEN;
2697: case XML_TOK_PARTIAL_CHAR:
2698: if (nextPtr) {
2699: *nextPtr = s;
2700: return XML_ERROR_NONE;
2701: }
2702: return XML_ERROR_PARTIAL_CHAR;
2703: default:
2704: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
2705: }
2706: eventPtr = s = next;
2707: }
2708: }
2709:
1.4 kahan 2710: #ifdef XML_DTD
2711:
2712: static enum XML_Error
2713: processInternalParamEntity(XML_Parser parser, ENTITY *entity)
2714: {
2715: const char *s, *end, *next;
2716: int tok;
2717: enum XML_Error result;
2718: OPEN_INTERNAL_ENTITY openEntity;
2719: entity->open = 1;
2720: openEntity.next = openInternalEntities;
2721: openInternalEntities = &openEntity;
2722: openEntity.entity = entity;
2723: openEntity.internalEventPtr = 0;
2724: openEntity.internalEventEndPtr = 0;
2725: s = (char *)entity->textPtr;
2726: end = (char *)(entity->textPtr + entity->textLen);
2727: tok = XmlPrologTok(internalEncoding, s, end, &next);
2728: result = doProlog(parser, internalEncoding, s, end, tok, next, 0);
2729: entity->open = 0;
2730: openInternalEntities = openEntity.next;
2731: return result;
2732: }
2733:
2734: #endif /* XML_DTD */
2735:
1.1 frystyk 2736: static
2737: enum XML_Error errorProcessor(XML_Parser parser,
2738: const char *s,
2739: const char *end,
2740: const char **nextPtr)
2741: {
2742: return errorCode;
2743: }
2744:
2745: static enum XML_Error
2746: storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2747: const char *ptr, const char *end,
2748: STRING_POOL *pool)
2749: {
2750: enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
2751: if (result)
2752: return result;
1.3 kahan 2753: if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
1.1 frystyk 2754: poolChop(pool);
2755: if (!poolAppendChar(pool, XML_T('\0')))
2756: return XML_ERROR_NO_MEMORY;
2757: return XML_ERROR_NONE;
2758: }
2759:
2760: static enum XML_Error
2761: appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2762: const char *ptr, const char *end,
2763: STRING_POOL *pool)
2764: {
2765: for (;;) {
2766: const char *next;
2767: int tok = XmlAttributeValueTok(enc, ptr, end, &next);
2768: switch (tok) {
2769: case XML_TOK_NONE:
2770: return XML_ERROR_NONE;
2771: case XML_TOK_INVALID:
2772: if (enc == encoding)
2773: eventPtr = next;
2774: return XML_ERROR_INVALID_TOKEN;
2775: case XML_TOK_PARTIAL:
2776: if (enc == encoding)
2777: eventPtr = ptr;
2778: return XML_ERROR_INVALID_TOKEN;
2779: case XML_TOK_CHAR_REF:
2780: {
2781: XML_Char buf[XML_ENCODE_MAX];
2782: int i;
2783: int n = XmlCharRefNumber(enc, ptr);
2784: if (n < 0) {
2785: if (enc == encoding)
2786: eventPtr = ptr;
2787: return XML_ERROR_BAD_CHAR_REF;
2788: }
2789: if (!isCdata
2790: && n == 0x20 /* space */
1.3 kahan 2791: && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
1.1 frystyk 2792: break;
2793: n = XmlEncode(n, (ICHAR *)buf);
2794: if (!n) {
2795: if (enc == encoding)
2796: eventPtr = ptr;
2797: return XML_ERROR_BAD_CHAR_REF;
2798: }
2799: for (i = 0; i < n; i++) {
2800: if (!poolAppendChar(pool, buf[i]))
2801: return XML_ERROR_NO_MEMORY;
2802: }
2803: }
2804: break;
2805: case XML_TOK_DATA_CHARS:
2806: if (!poolAppend(pool, enc, ptr, next))
2807: return XML_ERROR_NO_MEMORY;
2808: break;
2809: break;
2810: case XML_TOK_TRAILING_CR:
2811: next = ptr + enc->minBytesPerChar;
2812: /* fall through */
2813: case XML_TOK_ATTRIBUTE_VALUE_S:
2814: case XML_TOK_DATA_NEWLINE:
1.3 kahan 2815: if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
1.1 frystyk 2816: break;
1.3 kahan 2817: if (!poolAppendChar(pool, 0x20))
1.1 frystyk 2818: return XML_ERROR_NO_MEMORY;
2819: break;
2820: case XML_TOK_ENTITY_REF:
2821: {
2822: const XML_Char *name;
2823: ENTITY *entity;
2824: XML_Char ch = XmlPredefinedEntityName(enc,
2825: ptr + enc->minBytesPerChar,
2826: next - enc->minBytesPerChar);
2827: if (ch) {
2828: if (!poolAppendChar(pool, ch))
2829: return XML_ERROR_NO_MEMORY;
2830: break;
2831: }
2832: name = poolStoreString(&temp2Pool, enc,
2833: ptr + enc->minBytesPerChar,
2834: next - enc->minBytesPerChar);
2835: if (!name)
2836: return XML_ERROR_NO_MEMORY;
2837: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
2838: poolDiscard(&temp2Pool);
2839: if (!entity) {
2840: if (dtd.complete) {
2841: if (enc == encoding)
2842: eventPtr = ptr;
2843: return XML_ERROR_UNDEFINED_ENTITY;
2844: }
1.4.2.1 kahan 2845: /* Laurent Carcone (lolo@w3.org) 14/March/2001
2846: ** For attributes, Expat is skipping the entity names it
2847: ** doesn't know. This patch keeps those names untranslated.
2848: **/
2849: else {
2850: int i;
1.4.2.1.2.1! kahan 2851: if (!poolAppendChar(pool, (unsigned char) (128)))
1.4.2.1 kahan 2852: return XML_ERROR_NO_MEMORY;
2853: for (i = 0; name[i] != XML_T('\0'); i++) {
2854: if (!poolAppendChar(pool,name[i]))
2855: return XML_ERROR_NO_MEMORY;
2856: }
2857: if (!poolAppendChar(pool, ';'))
2858: return XML_ERROR_NO_MEMORY;
2859: }
2860: /* End of patch */
1.1 frystyk 2861: }
2862: else if (entity->open) {
2863: if (enc == encoding)
2864: eventPtr = ptr;
2865: return XML_ERROR_RECURSIVE_ENTITY_REF;
2866: }
2867: else if (entity->notation) {
2868: if (enc == encoding)
2869: eventPtr = ptr;
2870: return XML_ERROR_BINARY_ENTITY_REF;
2871: }
2872: else if (!entity->textPtr) {
2873: if (enc == encoding)
2874: eventPtr = ptr;
2875: return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
2876: }
2877: else {
2878: enum XML_Error result;
2879: const XML_Char *textEnd = entity->textPtr + entity->textLen;
2880: entity->open = 1;
1.4 kahan 2881: result = appendAttributeValue(parser, internalEncoding, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
1.1 frystyk 2882: entity->open = 0;
2883: if (result)
2884: return result;
2885: }
2886: }
2887: break;
2888: default:
2889: abort();
2890: }
2891: ptr = next;
2892: }
2893: /* not reached */
2894: }
2895:
2896: static
2897: enum XML_Error storeEntityValue(XML_Parser parser,
1.4 kahan 2898: const ENCODING *enc,
1.1 frystyk 2899: const char *entityTextPtr,
2900: const char *entityTextEnd)
2901: {
2902: STRING_POOL *pool = &(dtd.pool);
2903: for (;;) {
2904: const char *next;
1.4 kahan 2905: int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
1.1 frystyk 2906: switch (tok) {
2907: case XML_TOK_PARAM_ENTITY_REF:
1.4 kahan 2908: #ifdef XML_DTD
2909: if (parentParser || enc != encoding) {
2910: enum XML_Error result;
2911: const XML_Char *name;
2912: ENTITY *entity;
2913: name = poolStoreString(&tempPool, enc,
2914: entityTextPtr + enc->minBytesPerChar,
2915: next - enc->minBytesPerChar);
2916: if (!name)
2917: return XML_ERROR_NO_MEMORY;
2918: entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
2919: poolDiscard(&tempPool);
2920: if (!entity) {
2921: if (enc == encoding)
2922: eventPtr = entityTextPtr;
2923: return XML_ERROR_UNDEFINED_ENTITY;
2924: }
2925: if (entity->open) {
2926: if (enc == encoding)
2927: eventPtr = entityTextPtr;
2928: return XML_ERROR_RECURSIVE_ENTITY_REF;
2929: }
2930: if (entity->systemId) {
2931: if (enc == encoding)
2932: eventPtr = entityTextPtr;
2933: return XML_ERROR_PARAM_ENTITY_REF;
2934: }
2935: entity->open = 1;
2936: result = storeEntityValue(parser,
2937: internalEncoding,
2938: (char *)entity->textPtr,
2939: (char *)(entity->textPtr + entity->textLen));
2940: entity->open = 0;
2941: if (result)
2942: return result;
2943: break;
2944: }
2945: #endif /* XML_DTD */
1.1 frystyk 2946: eventPtr = entityTextPtr;
2947: return XML_ERROR_SYNTAX;
2948: case XML_TOK_NONE:
2949: return XML_ERROR_NONE;
2950: case XML_TOK_ENTITY_REF:
2951: case XML_TOK_DATA_CHARS:
1.4 kahan 2952: if (!poolAppend(pool, enc, entityTextPtr, next))
1.1 frystyk 2953: return XML_ERROR_NO_MEMORY;
2954: break;
2955: case XML_TOK_TRAILING_CR:
1.4 kahan 2956: next = entityTextPtr + enc->minBytesPerChar;
1.1 frystyk 2957: /* fall through */
2958: case XML_TOK_DATA_NEWLINE:
2959: if (pool->end == pool->ptr && !poolGrow(pool))
2960: return XML_ERROR_NO_MEMORY;
1.3 kahan 2961: *(pool->ptr)++ = 0xA;
1.1 frystyk 2962: break;
2963: case XML_TOK_CHAR_REF:
2964: {
2965: XML_Char buf[XML_ENCODE_MAX];
2966: int i;
1.4 kahan 2967: int n = XmlCharRefNumber(enc, entityTextPtr);
1.1 frystyk 2968: if (n < 0) {
1.4 kahan 2969: if (enc == encoding)
2970: eventPtr = entityTextPtr;
1.1 frystyk 2971: return XML_ERROR_BAD_CHAR_REF;
2972: }
2973: n = XmlEncode(n, (ICHAR *)buf);
2974: if (!n) {
1.4 kahan 2975: if (enc == encoding)
2976: eventPtr = entityTextPtr;
1.1 frystyk 2977: return XML_ERROR_BAD_CHAR_REF;
2978: }
2979: for (i = 0; i < n; i++) {
2980: if (pool->end == pool->ptr && !poolGrow(pool))
2981: return XML_ERROR_NO_MEMORY;
2982: *(pool->ptr)++ = buf[i];
2983: }
2984: }
2985: break;
2986: case XML_TOK_PARTIAL:
1.4 kahan 2987: if (enc == encoding)
2988: eventPtr = entityTextPtr;
1.1 frystyk 2989: return XML_ERROR_INVALID_TOKEN;
2990: case XML_TOK_INVALID:
1.4 kahan 2991: if (enc == encoding)
2992: eventPtr = next;
1.1 frystyk 2993: return XML_ERROR_INVALID_TOKEN;
2994: default:
2995: abort();
2996: }
2997: entityTextPtr = next;
2998: }
2999: /* not reached */
3000: }
3001:
3002: static void
3003: normalizeLines(XML_Char *s)
3004: {
3005: XML_Char *p;
3006: for (;; s++) {
3007: if (*s == XML_T('\0'))
3008: return;
1.3 kahan 3009: if (*s == 0xD)
1.1 frystyk 3010: break;
3011: }
3012: p = s;
3013: do {
1.3 kahan 3014: if (*s == 0xD) {
3015: *p++ = 0xA;
3016: if (*++s == 0xA)
1.1 frystyk 3017: s++;
3018: }
3019: else
3020: *p++ = *s++;
3021: } while (*s);
3022: *p = XML_T('\0');
3023: }
3024:
3025: static int
3026: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3027: {
3028: const XML_Char *target;
3029: XML_Char *data;
3030: const char *tem;
3031: if (!processingInstructionHandler) {
3032: if (defaultHandler)
3033: reportDefault(parser, enc, start, end);
3034: return 1;
3035: }
3036: start += enc->minBytesPerChar * 2;
3037: tem = start + XmlNameLength(enc, start);
3038: target = poolStoreString(&tempPool, enc, start, tem);
3039: if (!target)
3040: return 0;
3041: poolFinish(&tempPool);
3042: data = poolStoreString(&tempPool, enc,
3043: XmlSkipS(enc, tem),
3044: end - enc->minBytesPerChar*2);
3045: if (!data)
3046: return 0;
3047: normalizeLines(data);
3048: processingInstructionHandler(handlerArg, target, data);
3049: poolClear(&tempPool);
3050: return 1;
3051: }
3052:
1.3 kahan 3053: static int
3054: reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3055: {
3056: XML_Char *data;
3057: if (!commentHandler) {
3058: if (defaultHandler)
3059: reportDefault(parser, enc, start, end);
3060: return 1;
3061: }
3062: data = poolStoreString(&tempPool,
3063: enc,
3064: start + enc->minBytesPerChar * 4,
3065: end - enc->minBytesPerChar * 3);
3066: if (!data)
3067: return 0;
3068: normalizeLines(data);
3069: commentHandler(handlerArg, data);
3070: poolClear(&tempPool);
3071: return 1;
3072: }
3073:
1.1 frystyk 3074: static void
3075: reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
3076: {
3077: if (MUST_CONVERT(enc, s)) {
1.3 kahan 3078: const char **eventPP;
3079: const char **eventEndPP;
3080: if (enc == encoding) {
3081: eventPP = &eventPtr;
3082: eventEndPP = &eventEndPtr;
3083: }
3084: else {
3085: eventPP = &(openInternalEntities->internalEventPtr);
3086: eventEndPP = &(openInternalEntities->internalEventEndPtr);
3087: }
3088: do {
1.1 frystyk 3089: ICHAR *dataPtr = (ICHAR *)dataBuf;
3090: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1.3 kahan 3091: *eventEndPP = s;
3092: defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
3093: *eventPP = s;
3094: } while (s != end);
1.1 frystyk 3095: }
3096: else
3097: defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
3098: }
3099:
3100:
3101: static int
3102: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
3103: {
3104: DEFAULT_ATTRIBUTE *att;
1.4 kahan 3105: if (value) {
3106: /* The handling of default attributes gets messed up if we have
3107: a default which duplicates a non-default. */
3108: int i;
3109: for (i = 0; i < type->nDefaultAtts; i++)
3110: if (attId == type->defaultAtts[i].id)
3111: return 1;
3112: }
1.1 frystyk 3113: if (type->nDefaultAtts == type->allocDefaultAtts) {
3114: if (type->allocDefaultAtts == 0) {
3115: type->allocDefaultAtts = 8;
3116: type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3117: }
3118: else {
3119: type->allocDefaultAtts *= 2;
3120: type->defaultAtts = realloc(type->defaultAtts,
3121: type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3122: }
3123: if (!type->defaultAtts)
3124: return 0;
3125: }
3126: att = type->defaultAtts + type->nDefaultAtts;
3127: att->id = attId;
3128: att->value = value;
3129: att->isCdata = isCdata;
3130: if (!isCdata)
3131: attId->maybeTokenized = 1;
3132: type->nDefaultAtts += 1;
3133: return 1;
3134: }
3135:
1.3 kahan 3136: static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
3137: {
3138: const XML_Char *name;
3139: for (name = elementType->name; *name; name++) {
3140: if (*name == XML_T(':')) {
3141: PREFIX *prefix;
3142: const XML_Char *s;
3143: for (s = elementType->name; s != name; s++) {
3144: if (!poolAppendChar(&dtd.pool, *s))
3145: return 0;
3146: }
3147: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3148: return 0;
3149: prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3150: if (!prefix)
3151: return 0;
3152: if (prefix->name == poolStart(&dtd.pool))
3153: poolFinish(&dtd.pool);
3154: else
3155: poolDiscard(&dtd.pool);
3156: elementType->prefix = prefix;
3157:
3158: }
3159: }
3160: return 1;
3161: }
3162:
1.1 frystyk 3163: static ATTRIBUTE_ID *
3164: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3165: {
3166: ATTRIBUTE_ID *id;
3167: const XML_Char *name;
3168: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3169: return 0;
3170: name = poolStoreString(&dtd.pool, enc, start, end);
3171: if (!name)
3172: return 0;
3173: ++name;
3174: id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
3175: if (!id)
3176: return 0;
3177: if (id->name != name)
3178: poolDiscard(&dtd.pool);
1.3 kahan 3179: else {
1.1 frystyk 3180: poolFinish(&dtd.pool);
1.3 kahan 3181: if (!ns)
3182: ;
3183: else if (name[0] == 'x'
3184: && name[1] == 'm'
3185: && name[2] == 'l'
3186: && name[3] == 'n'
3187: && name[4] == 's'
3188: && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) {
3189: if (name[5] == '\0')
3190: id->prefix = &dtd.defaultPrefix;
3191: else
3192: id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX));
3193: id->xmlns = 1;
3194: }
3195: else {
3196: int i;
3197: for (i = 0; name[i]; i++) {
3198: if (name[i] == XML_T(':')) {
3199: int j;
3200: for (j = 0; j < i; j++) {
3201: if (!poolAppendChar(&dtd.pool, name[j]))
3202: return 0;
3203: }
3204: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3205: return 0;
3206: id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3207: if (id->prefix->name == poolStart(&dtd.pool))
3208: poolFinish(&dtd.pool);
3209: else
3210: poolDiscard(&dtd.pool);
3211: break;
3212: }
3213: }
3214: }
3215: }
1.1 frystyk 3216: return id;
3217: }
3218:
1.3 kahan 3219: #define CONTEXT_SEP XML_T('\f')
3220:
1.1 frystyk 3221: static
1.3 kahan 3222: const XML_Char *getContext(XML_Parser parser)
1.1 frystyk 3223: {
3224: HASH_TABLE_ITER iter;
1.3 kahan 3225: int needSep = 0;
3226:
3227: if (dtd.defaultPrefix.binding) {
3228: int i;
3229: int len;
3230: if (!poolAppendChar(&tempPool, XML_T('=')))
3231: return 0;
3232: len = dtd.defaultPrefix.binding->uriLen;
3233: if (namespaceSeparator != XML_T('\0'))
3234: len--;
3235: for (i = 0; i < len; i++)
3236: if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i]))
3237: return 0;
3238: needSep = 1;
3239: }
3240:
3241: hashTableIterInit(&iter, &(dtd.prefixes));
3242: for (;;) {
3243: int i;
3244: int len;
3245: const XML_Char *s;
3246: PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
3247: if (!prefix)
3248: break;
3249: if (!prefix->binding)
3250: continue;
3251: if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
3252: return 0;
3253: for (s = prefix->name; *s; s++)
3254: if (!poolAppendChar(&tempPool, *s))
3255: return 0;
3256: if (!poolAppendChar(&tempPool, XML_T('=')))
3257: return 0;
3258: len = prefix->binding->uriLen;
3259: if (namespaceSeparator != XML_T('\0'))
3260: len--;
3261: for (i = 0; i < len; i++)
3262: if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
3263: return 0;
3264: needSep = 1;
3265: }
3266:
1.1 frystyk 3267:
3268: hashTableIterInit(&iter, &(dtd.generalEntities));
3269: for (;;) {
3270: const XML_Char *s;
3271: ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
3272: if (!e)
3273: break;
3274: if (!e->open)
3275: continue;
1.3 kahan 3276: if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
1.1 frystyk 3277: return 0;
3278: for (s = e->name; *s; s++)
3279: if (!poolAppendChar(&tempPool, *s))
3280: return 0;
1.3 kahan 3281: needSep = 1;
1.1 frystyk 3282: }
3283:
3284: if (!poolAppendChar(&tempPool, XML_T('\0')))
3285: return 0;
3286: return tempPool.start;
3287: }
3288:
3289: static
1.3 kahan 3290: int setContext(XML_Parser parser, const XML_Char *context)
1.1 frystyk 3291: {
1.3 kahan 3292: const XML_Char *s = context;
3293:
3294: while (*context != XML_T('\0')) {
3295: if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
1.1 frystyk 3296: ENTITY *e;
3297: if (!poolAppendChar(&tempPool, XML_T('\0')))
3298: return 0;
3299: e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
3300: if (e)
3301: e->open = 1;
1.3 kahan 3302: if (*s != XML_T('\0'))
1.1 frystyk 3303: s++;
1.3 kahan 3304: context = s;
3305: poolDiscard(&tempPool);
3306: }
3307: else if (*s == '=') {
3308: PREFIX *prefix;
3309: if (poolLength(&tempPool) == 0)
3310: prefix = &dtd.defaultPrefix;
3311: else {
3312: if (!poolAppendChar(&tempPool, XML_T('\0')))
3313: return 0;
3314: prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX));
3315: if (!prefix)
3316: return 0;
3317: if (prefix->name == poolStart(&tempPool))
3318: poolFinish(&tempPool);
3319: else
3320: poolDiscard(&tempPool);
3321: }
3322: for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++)
3323: if (!poolAppendChar(&tempPool, *context))
3324: return 0;
3325: if (!poolAppendChar(&tempPool, XML_T('\0')))
3326: return 0;
3327: if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings))
3328: return 0;
1.1 frystyk 3329: poolDiscard(&tempPool);
1.3 kahan 3330: if (*context != XML_T('\0'))
3331: ++context;
3332: s = context;
1.1 frystyk 3333: }
3334: else {
3335: if (!poolAppendChar(&tempPool, *s))
3336: return 0;
3337: s++;
3338: }
3339: }
3340: return 1;
3341: }
3342:
3343:
3344: static
3345: void normalizePublicId(XML_Char *publicId)
3346: {
3347: XML_Char *p = publicId;
3348: XML_Char *s;
3349: for (s = publicId; *s; s++) {
3350: switch (*s) {
1.3 kahan 3351: case 0x20:
3352: case 0xD:
3353: case 0xA:
3354: if (p != publicId && p[-1] != 0x20)
3355: *p++ = 0x20;
1.1 frystyk 3356: break;
3357: default:
3358: *p++ = *s;
3359: }
3360: }
1.3 kahan 3361: if (p != publicId && p[-1] == 0x20)
1.1 frystyk 3362: --p;
3363: *p = XML_T('\0');
3364: }
3365:
3366: static int dtdInit(DTD *p)
3367: {
3368: poolInit(&(p->pool));
3369: hashTableInit(&(p->generalEntities));
3370: hashTableInit(&(p->elementTypes));
3371: hashTableInit(&(p->attributeIds));
1.3 kahan 3372: hashTableInit(&(p->prefixes));
1.1 frystyk 3373: p->complete = 1;
3374: p->standalone = 0;
1.4 kahan 3375: #ifdef XML_DTD
3376: hashTableInit(&(p->paramEntities));
3377: #endif /* XML_DTD */
1.3 kahan 3378: p->defaultPrefix.name = 0;
3379: p->defaultPrefix.binding = 0;
1.1 frystyk 3380: return 1;
3381: }
3382:
1.4 kahan 3383: #ifdef XML_DTD
3384:
3385: static void dtdSwap(DTD *p1, DTD *p2)
3386: {
3387: DTD tem;
3388: memcpy(&tem, p1, sizeof(DTD));
3389: memcpy(p1, p2, sizeof(DTD));
3390: memcpy(p2, &tem, sizeof(DTD));
3391: }
3392:
3393: #endif /* XML_DTD */
3394:
1.1 frystyk 3395: static void dtdDestroy(DTD *p)
3396: {
3397: HASH_TABLE_ITER iter;
3398: hashTableIterInit(&iter, &(p->elementTypes));
3399: for (;;) {
3400: ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3401: if (!e)
3402: break;
3403: if (e->allocDefaultAtts != 0)
3404: free(e->defaultAtts);
3405: }
3406: hashTableDestroy(&(p->generalEntities));
1.4 kahan 3407: #ifdef XML_DTD
3408: hashTableDestroy(&(p->paramEntities));
3409: #endif /* XML_DTD */
1.1 frystyk 3410: hashTableDestroy(&(p->elementTypes));
3411: hashTableDestroy(&(p->attributeIds));
1.3 kahan 3412: hashTableDestroy(&(p->prefixes));
1.1 frystyk 3413: poolDestroy(&(p->pool));
3414: }
3415:
3416: /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
3417: The new DTD has already been initialized. */
3418:
3419: static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
3420: {
3421: HASH_TABLE_ITER iter;
3422:
1.3 kahan 3423: /* Copy the prefix table. */
3424:
3425: hashTableIterInit(&iter, &(oldDtd->prefixes));
3426: for (;;) {
3427: const XML_Char *name;
3428: const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
3429: if (!oldP)
3430: break;
3431: name = poolCopyString(&(newDtd->pool), oldP->name);
3432: if (!name)
3433: return 0;
3434: if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX)))
3435: return 0;
3436: }
3437:
1.1 frystyk 3438: hashTableIterInit(&iter, &(oldDtd->attributeIds));
3439:
3440: /* Copy the attribute id table. */
3441:
3442: for (;;) {
3443: ATTRIBUTE_ID *newA;
3444: const XML_Char *name;
3445: const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
3446:
3447: if (!oldA)
3448: break;
3449: /* Remember to allocate the scratch byte before the name. */
3450: if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
3451: return 0;
3452: name = poolCopyString(&(newDtd->pool), oldA->name);
3453: if (!name)
3454: return 0;
3455: ++name;
3456: newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
3457: if (!newA)
3458: return 0;
3459: newA->maybeTokenized = oldA->maybeTokenized;
1.3 kahan 3460: if (oldA->prefix) {
3461: newA->xmlns = oldA->xmlns;
3462: if (oldA->prefix == &oldDtd->defaultPrefix)
3463: newA->prefix = &newDtd->defaultPrefix;
3464: else
3465: newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0);
3466: }
1.1 frystyk 3467: }
3468:
3469: /* Copy the element type table. */
3470:
3471: hashTableIterInit(&iter, &(oldDtd->elementTypes));
3472:
3473: for (;;) {
3474: int i;
3475: ELEMENT_TYPE *newE;
3476: const XML_Char *name;
3477: const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3478: if (!oldE)
3479: break;
3480: name = poolCopyString(&(newDtd->pool), oldE->name);
3481: if (!name)
3482: return 0;
3483: newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
3484: if (!newE)
3485: return 0;
1.3 kahan 3486: if (oldE->nDefaultAtts) {
3487: newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
3488: if (!newE->defaultAtts)
3489: return 0;
3490: }
1.1 frystyk 3491: newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
1.3 kahan 3492: if (oldE->prefix)
3493: newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0);
1.1 frystyk 3494: for (i = 0; i < newE->nDefaultAtts; i++) {
3495: newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
3496: newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
3497: if (oldE->defaultAtts[i].value) {
3498: newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
3499: if (!newE->defaultAtts[i].value)
3500: return 0;
3501: }
3502: else
3503: newE->defaultAtts[i].value = 0;
3504: }
3505: }
3506:
1.4 kahan 3507: /* Copy the entity tables. */
3508: if (!copyEntityTable(&(newDtd->generalEntities),
3509: &(newDtd->pool),
3510: &(oldDtd->generalEntities)))
3511: return 0;
1.1 frystyk 3512:
1.4 kahan 3513: #ifdef XML_DTD
3514: if (!copyEntityTable(&(newDtd->paramEntities),
3515: &(newDtd->pool),
3516: &(oldDtd->paramEntities)))
3517: return 0;
3518: #endif /* XML_DTD */
3519:
3520: newDtd->complete = oldDtd->complete;
3521: newDtd->standalone = oldDtd->standalone;
3522: return 1;
3523: }
3524:
3525: static int copyEntityTable(HASH_TABLE *newTable,
3526: STRING_POOL *newPool,
3527: const HASH_TABLE *oldTable)
3528: {
3529: HASH_TABLE_ITER iter;
3530: const XML_Char *cachedOldBase = 0;
3531: const XML_Char *cachedNewBase = 0;
3532:
3533: hashTableIterInit(&iter, oldTable);
1.1 frystyk 3534:
3535: for (;;) {
3536: ENTITY *newE;
3537: const XML_Char *name;
3538: const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
3539: if (!oldE)
3540: break;
1.4 kahan 3541: name = poolCopyString(newPool, oldE->name);
1.1 frystyk 3542: if (!name)
3543: return 0;
1.4 kahan 3544: newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY));
1.1 frystyk 3545: if (!newE)
3546: return 0;
3547: if (oldE->systemId) {
1.4 kahan 3548: const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
1.1 frystyk 3549: if (!tem)
3550: return 0;
3551: newE->systemId = tem;
3552: if (oldE->base) {
1.4 kahan 3553: if (oldE->base == cachedOldBase)
3554: newE->base = cachedNewBase;
3555: else {
3556: cachedOldBase = oldE->base;
3557: tem = poolCopyString(newPool, cachedOldBase);
3558: if (!tem)
3559: return 0;
3560: cachedNewBase = newE->base = tem;
3561: }
1.1 frystyk 3562: }
3563: }
3564: else {
1.4 kahan 3565: const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
1.1 frystyk 3566: if (!tem)
3567: return 0;
3568: newE->textPtr = tem;
3569: newE->textLen = oldE->textLen;
3570: }
3571: if (oldE->notation) {
1.4 kahan 3572: const XML_Char *tem = poolCopyString(newPool, oldE->notation);
1.1 frystyk 3573: if (!tem)
3574: return 0;
3575: newE->notation = tem;
3576: }
3577: }
3578: return 1;
3579: }
3580:
3581: static
3582: void poolInit(STRING_POOL *pool)
3583: {
3584: pool->blocks = 0;
3585: pool->freeBlocks = 0;
3586: pool->start = 0;
3587: pool->ptr = 0;
3588: pool->end = 0;
3589: }
3590:
3591: static
3592: void poolClear(STRING_POOL *pool)
3593: {
3594: if (!pool->freeBlocks)
3595: pool->freeBlocks = pool->blocks;
3596: else {
3597: BLOCK *p = pool->blocks;
3598: while (p) {
3599: BLOCK *tem = p->next;
3600: p->next = pool->freeBlocks;
3601: pool->freeBlocks = p;
3602: p = tem;
3603: }
3604: }
3605: pool->blocks = 0;
3606: pool->start = 0;
3607: pool->ptr = 0;
3608: pool->end = 0;
3609: }
3610:
3611: static
3612: void poolDestroy(STRING_POOL *pool)
3613: {
3614: BLOCK *p = pool->blocks;
3615: while (p) {
3616: BLOCK *tem = p->next;
3617: free(p);
3618: p = tem;
3619: }
3620: pool->blocks = 0;
3621: p = pool->freeBlocks;
3622: while (p) {
3623: BLOCK *tem = p->next;
3624: free(p);
3625: p = tem;
3626: }
3627: pool->freeBlocks = 0;
3628: pool->ptr = 0;
3629: pool->start = 0;
3630: pool->end = 0;
3631: }
3632:
3633: static
3634: XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
3635: const char *ptr, const char *end)
3636: {
3637: if (!pool->ptr && !poolGrow(pool))
3638: return 0;
3639: for (;;) {
3640: XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
3641: if (ptr == end)
3642: break;
3643: if (!poolGrow(pool))
3644: return 0;
3645: }
3646: return pool->start;
3647: }
3648:
3649: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
3650: {
3651: do {
3652: if (!poolAppendChar(pool, *s))
3653: return 0;
3654: } while (*s++);
3655: s = pool->start;
3656: poolFinish(pool);
3657: return s;
3658: }
3659:
3660: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
3661: {
3662: if (!pool->ptr && !poolGrow(pool))
3663: return 0;
3664: for (; n > 0; --n, s++) {
3665: if (!poolAppendChar(pool, *s))
3666: return 0;
3667:
3668: }
3669: s = pool->start;
3670: poolFinish(pool);
3671: return s;
3672: }
3673:
3674: static
3675: XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
3676: const char *ptr, const char *end)
3677: {
3678: if (!poolAppend(pool, enc, ptr, end))
3679: return 0;
3680: if (pool->ptr == pool->end && !poolGrow(pool))
3681: return 0;
3682: *(pool->ptr)++ = 0;
3683: return pool->start;
3684: }
3685:
3686: static
3687: int poolGrow(STRING_POOL *pool)
3688: {
3689: if (pool->freeBlocks) {
3690: if (pool->start == 0) {
3691: pool->blocks = pool->freeBlocks;
3692: pool->freeBlocks = pool->freeBlocks->next;
3693: pool->blocks->next = 0;
3694: pool->start = pool->blocks->s;
3695: pool->end = pool->start + pool->blocks->size;
3696: pool->ptr = pool->start;
3697: return 1;
3698: }
3699: if (pool->end - pool->start < pool->freeBlocks->size) {
3700: BLOCK *tem = pool->freeBlocks->next;
3701: pool->freeBlocks->next = pool->blocks;
3702: pool->blocks = pool->freeBlocks;
3703: pool->freeBlocks = tem;
3704: memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
3705: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3706: pool->start = pool->blocks->s;
3707: pool->end = pool->start + pool->blocks->size;
3708: return 1;
3709: }
3710: }
3711: if (pool->blocks && pool->start == pool->blocks->s) {
3712: int blockSize = (pool->end - pool->start)*2;
3713: pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3714: if (!pool->blocks)
3715: return 0;
3716: pool->blocks->size = blockSize;
3717: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3718: pool->start = pool->blocks->s;
3719: pool->end = pool->start + blockSize;
3720: }
3721: else {
3722: BLOCK *tem;
3723: int blockSize = pool->end - pool->start;
3724: if (blockSize < INIT_BLOCK_SIZE)
3725: blockSize = INIT_BLOCK_SIZE;
3726: else
3727: blockSize *= 2;
3728: tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3729: if (!tem)
3730: return 0;
3731: tem->size = blockSize;
3732: tem->next = pool->blocks;
3733: pool->blocks = tem;
3734: memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
3735: pool->ptr = tem->s + (pool->ptr - pool->start);
3736: pool->start = tem->s;
3737: pool->end = tem->s + blockSize;
3738: }
3739: return 1;
3740: }
Webmaster