Annotation of libwww/modules/expat/xmlparse/xmlparse.c, revision 1.4.2.1.2.2
1.1 frystyk 1: /*
2: The contents of this file are subject to the Mozilla Public License
1.3 kahan 3: Version 1.1 (the "License"); you may not use this file except in
1.1 frystyk 4: compliance with the License. You may obtain a copy of the License at
5: http://www.mozilla.org/MPL/
6:
7: Software distributed under the License is distributed on an "AS IS"
8: basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9: License for the specific language governing rights and limitations
10: under the License.
11:
12: The Original Code is expat.
13:
14: The Initial Developer of the Original Code is James Clark.
1.3 kahan 15: Portions created by James Clark are Copyright (C) 1998, 1999
1.1 frystyk 16: James Clark. All Rights Reserved.
17:
18: Contributor(s):
1.3 kahan 19:
20: Alternatively, the contents of this file may be used under the terms
21: of the GNU General Public License (the "GPL"), in which case the
22: provisions of the GPL are applicable instead of those above. If you
23: wish to allow use of your version of this file only under the terms of
24: the GPL and not to allow others to use your version of this file under
25: the MPL, indicate your decision by deleting the provisions above and
26: replace them with the notice and other provisions required by the
27: GPL. If you do not delete the provisions above, a recipient may use
28: your version of this file under either the MPL or the GPL.
1.1 frystyk 29: */
30:
31: #include "xmldef.h"
1.3 kahan 32: #include "xmlparse.h"
1.1 frystyk 33:
34: #ifdef XML_UNICODE
35: #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
36: #define XmlConvert XmlUtf16Convert
37: #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
1.3 kahan 38: #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
1.1 frystyk 39: #define XmlEncode XmlUtf16Encode
40: #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
41: typedef unsigned short ICHAR;
42: #else
43: #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
44: #define XmlConvert XmlUtf8Convert
45: #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
1.3 kahan 46: #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
1.1 frystyk 47: #define XmlEncode XmlUtf8Encode
48: #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
49: typedef char ICHAR;
50: #endif
51:
1.3 kahan 52:
53: #ifndef XML_NS
54:
55: #define XmlInitEncodingNS XmlInitEncoding
56: #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
57: #undef XmlGetInternalEncodingNS
58: #define XmlGetInternalEncodingNS XmlGetInternalEncoding
59: #define XmlParseXmlDeclNS XmlParseXmlDecl
60:
61: #endif
62:
1.1 frystyk 63: #ifdef XML_UNICODE_WCHAR_T
64: #define XML_T(x) L ## x
65: #else
66: #define XML_T(x) x
67: #endif
68:
69: /* Round up n to be a multiple of sz, where sz is a power of 2. */
70: #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
71:
72: #include "xmltok.h"
73: #include "xmlrole.h"
74: #include "hashtable.h"
75:
76: #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
77: #define INIT_DATA_BUF_SIZE 1024
78: #define INIT_ATTS_SIZE 16
79: #define INIT_BLOCK_SIZE 1024
80: #define INIT_BUFFER_SIZE 1024
81:
1.3 kahan 82: #define EXPAND_SPARE 24
83:
84: typedef struct binding {
85: struct prefix *prefix;
86: struct binding *nextTagBinding;
87: struct binding *prevPrefixBinding;
88: const struct attribute_id *attId;
89: XML_Char *uri;
90: int uriLen;
91: int uriAlloc;
92: } BINDING;
93:
94: typedef struct prefix {
95: const XML_Char *name;
96: BINDING *binding;
97: } PREFIX;
98:
99: typedef struct {
100: const XML_Char *str;
101: const XML_Char *localPart;
102: int uriLen;
103: } TAG_NAME;
104:
1.1 frystyk 105: typedef struct tag {
106: struct tag *parent;
107: const char *rawName;
108: int rawNameLength;
1.3 kahan 109: TAG_NAME name;
1.1 frystyk 110: char *buf;
111: char *bufEnd;
1.3 kahan 112: BINDING *bindings;
1.1 frystyk 113: } TAG;
114:
115: typedef struct {
116: const XML_Char *name;
117: const XML_Char *textPtr;
118: int textLen;
119: const XML_Char *systemId;
120: const XML_Char *base;
121: const XML_Char *publicId;
122: const XML_Char *notation;
123: char open;
124: } ENTITY;
125:
126: typedef struct block {
127: struct block *next;
128: int size;
129: XML_Char s[1];
130: } BLOCK;
131:
132: typedef struct {
133: BLOCK *blocks;
134: BLOCK *freeBlocks;
135: const XML_Char *end;
136: XML_Char *ptr;
137: XML_Char *start;
138: } STRING_POOL;
139:
140: /* The XML_Char before the name is used to determine whether
141: an attribute has been specified. */
1.3 kahan 142: typedef struct attribute_id {
1.1 frystyk 143: XML_Char *name;
1.3 kahan 144: PREFIX *prefix;
1.1 frystyk 145: char maybeTokenized;
1.3 kahan 146: char xmlns;
1.1 frystyk 147: } ATTRIBUTE_ID;
148:
149: typedef struct {
150: const ATTRIBUTE_ID *id;
151: char isCdata;
152: const XML_Char *value;
153: } DEFAULT_ATTRIBUTE;
154:
155: typedef struct {
156: const XML_Char *name;
1.3 kahan 157: PREFIX *prefix;
1.1 frystyk 158: int nDefaultAtts;
159: int allocDefaultAtts;
160: DEFAULT_ATTRIBUTE *defaultAtts;
161: } ELEMENT_TYPE;
162:
163: typedef struct {
164: HASH_TABLE generalEntities;
165: HASH_TABLE elementTypes;
166: HASH_TABLE attributeIds;
1.3 kahan 167: HASH_TABLE prefixes;
1.1 frystyk 168: STRING_POOL pool;
169: int complete;
170: int standalone;
1.4 kahan 171: #ifdef XML_DTD
172: HASH_TABLE paramEntities;
173: #endif /* XML_DTD */
1.3 kahan 174: PREFIX defaultPrefix;
1.1 frystyk 175: } DTD;
176:
1.3 kahan 177: typedef struct open_internal_entity {
178: const char *internalEventPtr;
179: const char *internalEventEndPtr;
180: struct open_internal_entity *next;
181: ENTITY *entity;
182: } OPEN_INTERNAL_ENTITY;
183:
1.1 frystyk 184: typedef enum XML_Error Processor(XML_Parser parser,
185: const char *start,
186: const char *end,
187: const char **endPtr);
188:
189: static Processor prologProcessor;
190: static Processor prologInitProcessor;
191: static Processor contentProcessor;
192: static Processor cdataSectionProcessor;
1.4 kahan 193: #ifdef XML_DTD
194: static Processor ignoreSectionProcessor;
195: #endif /* XML_DTD */
1.1 frystyk 196: static Processor epilogProcessor;
197: static Processor errorProcessor;
198: static Processor externalEntityInitProcessor;
199: static Processor externalEntityInitProcessor2;
200: static Processor externalEntityInitProcessor3;
201: static Processor externalEntityContentProcessor;
202:
203: static enum XML_Error
204: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
205: static enum XML_Error
206: processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
207: static enum XML_Error
208: initializeEncoding(XML_Parser parser);
209: static enum XML_Error
1.4 kahan 210: doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
211: const char *end, int tok, const char *next, const char **nextPtr);
212: static enum XML_Error
213: processInternalParamEntity(XML_Parser parser, ENTITY *entity);
214: static enum XML_Error
1.1 frystyk 215: doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
216: const char *start, const char *end, const char **endPtr);
217: static enum XML_Error
218: doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
1.4 kahan 219: #ifdef XML_DTD
220: static enum XML_Error
221: doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
222: #endif /* XML_DTD */
1.3 kahan 223: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s,
224: TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
225: static
226: int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr);
1.1 frystyk 227: static int
228: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
229: static enum XML_Error
230: storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
231: STRING_POOL *);
232: static enum XML_Error
233: appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
234: STRING_POOL *);
235: static ATTRIBUTE_ID *
236: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.3 kahan 237: static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
1.1 frystyk 238: static enum XML_Error
1.4 kahan 239: storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.1 frystyk 240: static int
241: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.3 kahan 242: static int
243: reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.1 frystyk 244: static void
245: reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
246:
1.3 kahan 247: static const XML_Char *getContext(XML_Parser parser);
248: static int setContext(XML_Parser parser, const XML_Char *context);
1.1 frystyk 249: static void normalizePublicId(XML_Char *s);
250: static int dtdInit(DTD *);
251: static void dtdDestroy(DTD *);
252: static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
1.4 kahan 253: static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
254: #ifdef XML_DTD
255: static void dtdSwap(DTD *, DTD *);
256: #endif /* XML_DTD */
1.1 frystyk 257: static void poolInit(STRING_POOL *);
258: static void poolClear(STRING_POOL *);
259: static void poolDestroy(STRING_POOL *);
260: static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
261: const char *ptr, const char *end);
262: static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
263: const char *ptr, const char *end);
264: static int poolGrow(STRING_POOL *pool);
265: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
266: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
267:
268: #define poolStart(pool) ((pool)->start)
269: #define poolEnd(pool) ((pool)->ptr)
270: #define poolLength(pool) ((pool)->ptr - (pool)->start)
271: #define poolChop(pool) ((void)--(pool->ptr))
272: #define poolLastChar(pool) (((pool)->ptr)[-1])
273: #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
274: #define poolFinish(pool) ((pool)->start = (pool)->ptr)
275: #define poolAppendChar(pool, c) \
276: (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
277: ? 0 \
278: : ((*((pool)->ptr)++ = c), 1))
279:
280: typedef struct {
281: /* The first member must be userData so that the XML_GetUserData macro works. */
1.3 kahan 282: void *m_userData;
283: void *m_handlerArg;
284: char *m_buffer;
1.1 frystyk 285: /* first character to be parsed */
1.3 kahan 286: const char *m_bufferPtr;
1.1 frystyk 287: /* past last character to be parsed */
1.3 kahan 288: char *m_bufferEnd;
1.1 frystyk 289: /* allocated end of buffer */
1.3 kahan 290: const char *m_bufferLim;
291: long m_parseEndByteIndex;
292: const char *m_parseEndPtr;
293: XML_Char *m_dataBuf;
294: XML_Char *m_dataBufEnd;
295: XML_StartElementHandler m_startElementHandler;
296: XML_EndElementHandler m_endElementHandler;
297: XML_CharacterDataHandler m_characterDataHandler;
298: XML_ProcessingInstructionHandler m_processingInstructionHandler;
299: XML_CommentHandler m_commentHandler;
300: XML_StartCdataSectionHandler m_startCdataSectionHandler;
301: XML_EndCdataSectionHandler m_endCdataSectionHandler;
302: XML_DefaultHandler m_defaultHandler;
1.4 kahan 303: XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
304: XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
1.3 kahan 305: XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
306: XML_NotationDeclHandler m_notationDeclHandler;
307: XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
308: XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
309: XML_NotStandaloneHandler m_notStandaloneHandler;
310: XML_ExternalEntityRefHandler m_externalEntityRefHandler;
311: void *m_externalEntityRefHandlerArg;
312: XML_UnknownEncodingHandler m_unknownEncodingHandler;
313: const ENCODING *m_encoding;
314: INIT_ENCODING m_initEncoding;
1.4 kahan 315: const ENCODING *m_internalEncoding;
1.3 kahan 316: const XML_Char *m_protocolEncodingName;
317: int m_ns;
318: void *m_unknownEncodingMem;
319: void *m_unknownEncodingData;
320: void *m_unknownEncodingHandlerData;
321: void (*m_unknownEncodingRelease)(void *);
322: PROLOG_STATE m_prologState;
323: Processor *m_processor;
324: enum XML_Error m_errorCode;
325: const char *m_eventPtr;
326: const char *m_eventEndPtr;
327: const char *m_positionPtr;
328: OPEN_INTERNAL_ENTITY *m_openInternalEntities;
329: int m_defaultExpandInternalEntities;
330: int m_tagLevel;
331: ENTITY *m_declEntity;
332: const XML_Char *m_declNotationName;
333: const XML_Char *m_declNotationPublicId;
334: ELEMENT_TYPE *m_declElementType;
335: ATTRIBUTE_ID *m_declAttributeId;
336: char m_declAttributeIsCdata;
337: DTD m_dtd;
1.4 kahan 338: const XML_Char *m_curBase;
1.3 kahan 339: TAG *m_tagStack;
340: TAG *m_freeTagList;
341: BINDING *m_inheritedBindings;
342: BINDING *m_freeBindingList;
343: int m_attsSize;
344: int m_nSpecifiedAtts;
345: ATTRIBUTE *m_atts;
346: POSITION m_position;
347: STRING_POOL m_tempPool;
348: STRING_POOL m_temp2Pool;
349: char *m_groupConnector;
350: unsigned m_groupSize;
351: int m_hadExternalDoctype;
352: XML_Char m_namespaceSeparator;
1.4 kahan 353: #ifdef XML_DTD
354: enum XML_ParamEntityParsing m_paramEntityParsing;
355: XML_Parser m_parentParser;
356: #endif
1.1 frystyk 357: } Parser;
358:
1.3 kahan 359: #define userData (((Parser *)parser)->m_userData)
360: #define handlerArg (((Parser *)parser)->m_handlerArg)
361: #define startElementHandler (((Parser *)parser)->m_startElementHandler)
362: #define endElementHandler (((Parser *)parser)->m_endElementHandler)
363: #define characterDataHandler (((Parser *)parser)->m_characterDataHandler)
364: #define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler)
365: #define commentHandler (((Parser *)parser)->m_commentHandler)
366: #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler)
367: #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler)
368: #define defaultHandler (((Parser *)parser)->m_defaultHandler)
1.4 kahan 369: #define startDoctypeDeclHandler (((Parser *)parser)->m_startDoctypeDeclHandler)
370: #define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler)
1.3 kahan 371: #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler)
372: #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler)
373: #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler)
374: #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler)
375: #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler)
376: #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler)
377: #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg)
378: #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler)
379: #define encoding (((Parser *)parser)->m_encoding)
380: #define initEncoding (((Parser *)parser)->m_initEncoding)
1.4 kahan 381: #define internalEncoding (((Parser *)parser)->m_internalEncoding)
1.3 kahan 382: #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem)
383: #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData)
1.1 frystyk 384: #define unknownEncodingHandlerData \
1.3 kahan 385: (((Parser *)parser)->m_unknownEncodingHandlerData)
386: #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease)
387: #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName)
388: #define ns (((Parser *)parser)->m_ns)
389: #define prologState (((Parser *)parser)->m_prologState)
390: #define processor (((Parser *)parser)->m_processor)
391: #define errorCode (((Parser *)parser)->m_errorCode)
392: #define eventPtr (((Parser *)parser)->m_eventPtr)
393: #define eventEndPtr (((Parser *)parser)->m_eventEndPtr)
394: #define positionPtr (((Parser *)parser)->m_positionPtr)
395: #define position (((Parser *)parser)->m_position)
396: #define openInternalEntities (((Parser *)parser)->m_openInternalEntities)
397: #define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities)
398: #define tagLevel (((Parser *)parser)->m_tagLevel)
399: #define buffer (((Parser *)parser)->m_buffer)
400: #define bufferPtr (((Parser *)parser)->m_bufferPtr)
401: #define bufferEnd (((Parser *)parser)->m_bufferEnd)
402: #define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex)
403: #define parseEndPtr (((Parser *)parser)->m_parseEndPtr)
404: #define bufferLim (((Parser *)parser)->m_bufferLim)
405: #define dataBuf (((Parser *)parser)->m_dataBuf)
406: #define dataBufEnd (((Parser *)parser)->m_dataBufEnd)
407: #define dtd (((Parser *)parser)->m_dtd)
1.4 kahan 408: #define curBase (((Parser *)parser)->m_curBase)
1.3 kahan 409: #define declEntity (((Parser *)parser)->m_declEntity)
410: #define declNotationName (((Parser *)parser)->m_declNotationName)
411: #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId)
412: #define declElementType (((Parser *)parser)->m_declElementType)
413: #define declAttributeId (((Parser *)parser)->m_declAttributeId)
414: #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata)
415: #define freeTagList (((Parser *)parser)->m_freeTagList)
416: #define freeBindingList (((Parser *)parser)->m_freeBindingList)
417: #define inheritedBindings (((Parser *)parser)->m_inheritedBindings)
418: #define tagStack (((Parser *)parser)->m_tagStack)
419: #define atts (((Parser *)parser)->m_atts)
420: #define attsSize (((Parser *)parser)->m_attsSize)
421: #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts)
422: #define tempPool (((Parser *)parser)->m_tempPool)
423: #define temp2Pool (((Parser *)parser)->m_temp2Pool)
424: #define groupConnector (((Parser *)parser)->m_groupConnector)
425: #define groupSize (((Parser *)parser)->m_groupSize)
426: #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype)
427: #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator)
1.4 kahan 428: #ifdef XML_DTD
429: #define parentParser (((Parser *)parser)->m_parentParser)
430: #define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing)
431: #endif /* XML_DTD */
1.3 kahan 432:
433: #ifdef _MSC_VER
434: #ifdef _DEBUG
435: Parser *asParser(XML_Parser parser)
436: {
437: return parser;
438: }
439: #endif
440: #endif
1.1 frystyk 441:
442: XML_Parser XML_ParserCreate(const XML_Char *encodingName)
443: {
444: XML_Parser parser = malloc(sizeof(Parser));
445: if (!parser)
446: return parser;
447: processor = prologInitProcessor;
448: XmlPrologStateInit(&prologState);
449: userData = 0;
450: handlerArg = 0;
451: startElementHandler = 0;
452: endElementHandler = 0;
453: characterDataHandler = 0;
454: processingInstructionHandler = 0;
1.3 kahan 455: commentHandler = 0;
456: startCdataSectionHandler = 0;
457: endCdataSectionHandler = 0;
1.1 frystyk 458: defaultHandler = 0;
1.4 kahan 459: startDoctypeDeclHandler = 0;
460: endDoctypeDeclHandler = 0;
1.1 frystyk 461: unparsedEntityDeclHandler = 0;
462: notationDeclHandler = 0;
1.3 kahan 463: startNamespaceDeclHandler = 0;
464: endNamespaceDeclHandler = 0;
465: notStandaloneHandler = 0;
1.1 frystyk 466: externalEntityRefHandler = 0;
1.3 kahan 467: externalEntityRefHandlerArg = parser;
1.1 frystyk 468: unknownEncodingHandler = 0;
469: buffer = 0;
470: bufferPtr = 0;
471: bufferEnd = 0;
472: parseEndByteIndex = 0;
473: parseEndPtr = 0;
474: bufferLim = 0;
475: declElementType = 0;
476: declAttributeId = 0;
477: declEntity = 0;
478: declNotationName = 0;
479: declNotationPublicId = 0;
480: memset(&position, 0, sizeof(POSITION));
481: errorCode = XML_ERROR_NONE;
482: eventPtr = 0;
483: eventEndPtr = 0;
484: positionPtr = 0;
1.3 kahan 485: openInternalEntities = 0;
1.1 frystyk 486: tagLevel = 0;
487: tagStack = 0;
488: freeTagList = 0;
1.3 kahan 489: freeBindingList = 0;
490: inheritedBindings = 0;
1.1 frystyk 491: attsSize = INIT_ATTS_SIZE;
492: atts = malloc(attsSize * sizeof(ATTRIBUTE));
1.3 kahan 493: nSpecifiedAtts = 0;
1.1 frystyk 494: dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
495: groupSize = 0;
496: groupConnector = 0;
497: hadExternalDoctype = 0;
498: unknownEncodingMem = 0;
499: unknownEncodingRelease = 0;
500: unknownEncodingData = 0;
501: unknownEncodingHandlerData = 0;
1.3 kahan 502: namespaceSeparator = '!';
1.4 kahan 503: #ifdef XML_DTD
504: parentParser = 0;
505: paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
506: #endif
1.3 kahan 507: ns = 0;
1.1 frystyk 508: poolInit(&tempPool);
509: poolInit(&temp2Pool);
510: protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
1.4 kahan 511: curBase = 0;
1.1 frystyk 512: if (!dtdInit(&dtd) || !atts || !dataBuf
513: || (encodingName && !protocolEncodingName)) {
514: XML_ParserFree(parser);
515: return 0;
516: }
517: dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
518: XmlInitEncoding(&initEncoding, &encoding, 0);
1.4 kahan 519: internalEncoding = XmlGetInternalEncoding();
1.1 frystyk 520: return parser;
521: }
522:
1.3 kahan 523: XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
524: {
525: static
526: const XML_Char implicitContext[] = {
527: XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='),
528: XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'),
529: XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'),
530: XML_T('.'), XML_T('w'), XML_T('3'),
531: XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'),
532: XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'),
533: XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'),
534: XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'),
535: XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'),
536: XML_T('\0')
537: };
538:
539: XML_Parser parser = XML_ParserCreate(encodingName);
540: if (parser) {
541: XmlInitEncodingNS(&initEncoding, &encoding, 0);
542: ns = 1;
1.4 kahan 543: internalEncoding = XmlGetInternalEncodingNS();
1.3 kahan 544: namespaceSeparator = nsSep;
545: }
546: if (!setContext(parser, implicitContext)) {
547: XML_ParserFree(parser);
548: return 0;
549: }
550: return parser;
551: }
552:
553: int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
554: {
555: if (!encodingName)
556: protocolEncodingName = 0;
557: else {
558: protocolEncodingName = poolCopyString(&tempPool, encodingName);
559: if (!protocolEncodingName)
560: return 0;
561: }
562: return 1;
563: }
564:
1.1 frystyk 565: XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
1.3 kahan 566: const XML_Char *context,
1.1 frystyk 567: const XML_Char *encodingName)
568: {
569: XML_Parser parser = oldParser;
570: DTD *oldDtd = &dtd;
571: XML_StartElementHandler oldStartElementHandler = startElementHandler;
572: XML_EndElementHandler oldEndElementHandler = endElementHandler;
573: XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
574: XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
1.3 kahan 575: XML_CommentHandler oldCommentHandler = commentHandler;
576: XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler;
577: XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler;
1.1 frystyk 578: XML_DefaultHandler oldDefaultHandler = defaultHandler;
1.3 kahan 579: XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
580: XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
581: XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
1.1 frystyk 582: XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
583: XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
584: void *oldUserData = userData;
585: void *oldHandlerArg = handlerArg;
1.3 kahan 586: int oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
587: void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
1.4 kahan 588: #ifdef XML_DTD
589: int oldParamEntityParsing = paramEntityParsing;
590: #endif
1.3 kahan 591: parser = (ns
592: ? XML_ParserCreateNS(encodingName, namespaceSeparator)
593: : XML_ParserCreate(encodingName));
1.1 frystyk 594: if (!parser)
595: return 0;
596: startElementHandler = oldStartElementHandler;
597: endElementHandler = oldEndElementHandler;
598: characterDataHandler = oldCharacterDataHandler;
599: processingInstructionHandler = oldProcessingInstructionHandler;
1.3 kahan 600: commentHandler = oldCommentHandler;
601: startCdataSectionHandler = oldStartCdataSectionHandler;
602: endCdataSectionHandler = oldEndCdataSectionHandler;
1.1 frystyk 603: defaultHandler = oldDefaultHandler;
1.3 kahan 604: startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
605: endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
606: notStandaloneHandler = oldNotStandaloneHandler;
1.1 frystyk 607: externalEntityRefHandler = oldExternalEntityRefHandler;
608: unknownEncodingHandler = oldUnknownEncodingHandler;
609: userData = oldUserData;
610: if (oldUserData == oldHandlerArg)
611: handlerArg = userData;
612: else
613: handlerArg = parser;
1.3 kahan 614: if (oldExternalEntityRefHandlerArg != oldParser)
615: externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
616: defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1.4 kahan 617: #ifdef XML_DTD
618: paramEntityParsing = oldParamEntityParsing;
619: if (context) {
620: #endif /* XML_DTD */
621: if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) {
622: XML_ParserFree(parser);
623: return 0;
624: }
625: processor = externalEntityInitProcessor;
626: #ifdef XML_DTD
627: }
628: else {
629: dtdSwap(&dtd, oldDtd);
630: parentParser = oldParser;
631: XmlPrologStateInitExternalEntity(&prologState);
632: dtd.complete = 1;
633: hadExternalDoctype = 1;
1.1 frystyk 634: }
1.4 kahan 635: #endif /* XML_DTD */
1.1 frystyk 636: return parser;
637: }
638:
1.3 kahan 639: static
640: void destroyBindings(BINDING *bindings)
641: {
642: for (;;) {
643: BINDING *b = bindings;
644: if (!b)
645: break;
646: bindings = b->nextTagBinding;
647: free(b->uri);
648: free(b);
649: }
650: }
651:
1.1 frystyk 652: void XML_ParserFree(XML_Parser parser)
653: {
654: for (;;) {
655: TAG *p;
656: if (tagStack == 0) {
657: if (freeTagList == 0)
658: break;
659: tagStack = freeTagList;
660: freeTagList = 0;
661: }
662: p = tagStack;
663: tagStack = tagStack->parent;
664: free(p->buf);
1.3 kahan 665: destroyBindings(p->bindings);
1.1 frystyk 666: free(p);
667: }
1.3 kahan 668: destroyBindings(freeBindingList);
669: destroyBindings(inheritedBindings);
1.1 frystyk 670: poolDestroy(&tempPool);
671: poolDestroy(&temp2Pool);
1.4 kahan 672: #ifdef XML_DTD
673: if (parentParser) {
674: if (hadExternalDoctype)
675: dtd.complete = 0;
676: dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd);
677: }
678: #endif /* XML_DTD */
1.1 frystyk 679: dtdDestroy(&dtd);
680: free((void *)atts);
681: free(groupConnector);
682: free(buffer);
683: free(dataBuf);
684: free(unknownEncodingMem);
685: if (unknownEncodingRelease)
686: unknownEncodingRelease(unknownEncodingData);
687: free(parser);
688: }
689:
690: void XML_UseParserAsHandlerArg(XML_Parser parser)
691: {
692: handlerArg = parser;
693: }
694:
695: void XML_SetUserData(XML_Parser parser, void *p)
696: {
697: if (handlerArg == userData)
698: handlerArg = userData = p;
699: else
700: userData = p;
701: }
702:
703: int XML_SetBase(XML_Parser parser, const XML_Char *p)
704: {
705: if (p) {
706: p = poolCopyString(&dtd.pool, p);
707: if (!p)
708: return 0;
1.4 kahan 709: curBase = p;
1.1 frystyk 710: }
711: else
1.4 kahan 712: curBase = 0;
1.1 frystyk 713: return 1;
714: }
715:
716: const XML_Char *XML_GetBase(XML_Parser parser)
717: {
1.4 kahan 718: return curBase;
1.1 frystyk 719: }
720:
1.3 kahan 721: int XML_GetSpecifiedAttributeCount(XML_Parser parser)
722: {
723: return nSpecifiedAtts;
724: }
725:
1.1 frystyk 726: void XML_SetElementHandler(XML_Parser parser,
727: XML_StartElementHandler start,
728: XML_EndElementHandler end)
729: {
730: startElementHandler = start;
731: endElementHandler = end;
732: }
733:
734: void XML_SetCharacterDataHandler(XML_Parser parser,
735: XML_CharacterDataHandler handler)
736: {
737: characterDataHandler = handler;
738: }
739:
740: void XML_SetProcessingInstructionHandler(XML_Parser parser,
741: XML_ProcessingInstructionHandler handler)
742: {
743: processingInstructionHandler = handler;
744: }
745:
1.3 kahan 746: void XML_SetCommentHandler(XML_Parser parser,
747: XML_CommentHandler handler)
748: {
749: commentHandler = handler;
750: }
751:
752: void XML_SetCdataSectionHandler(XML_Parser parser,
753: XML_StartCdataSectionHandler start,
754: XML_EndCdataSectionHandler end)
755: {
756: startCdataSectionHandler = start;
757: endCdataSectionHandler = end;
758: }
759:
1.1 frystyk 760: void XML_SetDefaultHandler(XML_Parser parser,
761: XML_DefaultHandler handler)
762: {
763: defaultHandler = handler;
1.3 kahan 764: defaultExpandInternalEntities = 0;
765: }
766:
767: void XML_SetDefaultHandlerExpand(XML_Parser parser,
768: XML_DefaultHandler handler)
769: {
770: defaultHandler = handler;
771: defaultExpandInternalEntities = 1;
1.1 frystyk 772: }
773:
1.4 kahan 774: void XML_SetDoctypeDeclHandler(XML_Parser parser,
775: XML_StartDoctypeDeclHandler start,
776: XML_EndDoctypeDeclHandler end)
777: {
778: startDoctypeDeclHandler = start;
779: endDoctypeDeclHandler = end;
780: }
781:
1.1 frystyk 782: void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
783: XML_UnparsedEntityDeclHandler handler)
784: {
785: unparsedEntityDeclHandler = handler;
786: }
787:
788: void XML_SetNotationDeclHandler(XML_Parser parser,
789: XML_NotationDeclHandler handler)
790: {
791: notationDeclHandler = handler;
792: }
793:
1.3 kahan 794: void XML_SetNamespaceDeclHandler(XML_Parser parser,
795: XML_StartNamespaceDeclHandler start,
796: XML_EndNamespaceDeclHandler end)
797: {
798: startNamespaceDeclHandler = start;
799: endNamespaceDeclHandler = end;
800: }
801:
802: void XML_SetNotStandaloneHandler(XML_Parser parser,
803: XML_NotStandaloneHandler handler)
804: {
805: notStandaloneHandler = handler;
806: }
807:
1.1 frystyk 808: void XML_SetExternalEntityRefHandler(XML_Parser parser,
809: XML_ExternalEntityRefHandler handler)
810: {
811: externalEntityRefHandler = handler;
812: }
813:
1.3 kahan 814: void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
815: {
816: if (arg)
817: externalEntityRefHandlerArg = arg;
818: else
819: externalEntityRefHandlerArg = parser;
820: }
821:
1.1 frystyk 822: void XML_SetUnknownEncodingHandler(XML_Parser parser,
823: XML_UnknownEncodingHandler handler,
824: void *data)
825: {
826: unknownEncodingHandler = handler;
827: unknownEncodingHandlerData = data;
828: }
829:
1.4 kahan 830: int XML_SetParamEntityParsing(XML_Parser parser,
831: enum XML_ParamEntityParsing parsing)
832: {
833: #ifdef XML_DTD
834: paramEntityParsing = parsing;
835: return 1;
836: #else
837: return parsing == XML_PARAM_ENTITY_PARSING_NEVER;
838: #endif
839: }
840:
1.1 frystyk 841: int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
842: {
843: if (len == 0) {
844: if (!isFinal)
845: return 1;
1.3 kahan 846: positionPtr = bufferPtr;
1.1 frystyk 847: errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
848: if (errorCode == XML_ERROR_NONE)
849: return 1;
850: eventEndPtr = eventPtr;
1.4 kahan 851: processor = errorProcessor;
1.1 frystyk 852: return 0;
853: }
854: else if (bufferPtr == bufferEnd) {
855: const char *end;
856: int nLeftOver;
857: parseEndByteIndex += len;
858: positionPtr = s;
859: if (isFinal) {
860: errorCode = processor(parser, s, parseEndPtr = s + len, 0);
861: if (errorCode == XML_ERROR_NONE)
862: return 1;
863: eventEndPtr = eventPtr;
1.4 kahan 864: processor = errorProcessor;
1.1 frystyk 865: return 0;
866: }
867: errorCode = processor(parser, s, parseEndPtr = s + len, &end);
868: if (errorCode != XML_ERROR_NONE) {
869: eventEndPtr = eventPtr;
1.4 kahan 870: processor = errorProcessor;
1.1 frystyk 871: return 0;
872: }
873: XmlUpdatePosition(encoding, positionPtr, end, &position);
874: nLeftOver = s + len - end;
875: if (nLeftOver) {
876: if (buffer == 0 || nLeftOver > bufferLim - buffer) {
877: /* FIXME avoid integer overflow */
878: buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
1.4 kahan 879: /* FIXME storage leak if realloc fails */
1.1 frystyk 880: if (!buffer) {
881: errorCode = XML_ERROR_NO_MEMORY;
882: eventPtr = eventEndPtr = 0;
1.4 kahan 883: processor = errorProcessor;
1.1 frystyk 884: return 0;
885: }
886: bufferLim = buffer + len * 2;
887: }
888: memcpy(buffer, end, nLeftOver);
889: bufferPtr = buffer;
890: bufferEnd = buffer + nLeftOver;
891: }
892: return 1;
893: }
894: else {
895: memcpy(XML_GetBuffer(parser, len), s, len);
896: return XML_ParseBuffer(parser, len, isFinal);
897: }
898: }
899:
900: int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
901: {
902: const char *start = bufferPtr;
903: positionPtr = start;
904: bufferEnd += len;
905: parseEndByteIndex += len;
906: errorCode = processor(parser, start, parseEndPtr = bufferEnd,
907: isFinal ? (const char **)0 : &bufferPtr);
908: if (errorCode == XML_ERROR_NONE) {
909: if (!isFinal)
910: XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
911: return 1;
912: }
913: else {
914: eventEndPtr = eventPtr;
1.4 kahan 915: processor = errorProcessor;
1.1 frystyk 916: return 0;
917: }
918: }
919:
920: void *XML_GetBuffer(XML_Parser parser, int len)
921: {
922: if (len > bufferLim - bufferEnd) {
923: /* FIXME avoid integer overflow */
924: int neededSize = len + (bufferEnd - bufferPtr);
925: if (neededSize <= bufferLim - buffer) {
926: memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
927: bufferEnd = buffer + (bufferEnd - bufferPtr);
928: bufferPtr = buffer;
929: }
930: else {
931: char *newBuf;
932: int bufferSize = bufferLim - bufferPtr;
933: if (bufferSize == 0)
934: bufferSize = INIT_BUFFER_SIZE;
935: do {
936: bufferSize *= 2;
937: } while (bufferSize < neededSize);
938: newBuf = malloc(bufferSize);
939: if (newBuf == 0) {
940: errorCode = XML_ERROR_NO_MEMORY;
941: return 0;
942: }
943: bufferLim = newBuf + bufferSize;
944: if (bufferPtr) {
945: memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
946: free(buffer);
947: }
948: bufferEnd = newBuf + (bufferEnd - bufferPtr);
949: bufferPtr = buffer = newBuf;
950: }
951: }
952: return bufferEnd;
953: }
954:
955: enum XML_Error XML_GetErrorCode(XML_Parser parser)
956: {
957: return errorCode;
958: }
959:
960: long XML_GetCurrentByteIndex(XML_Parser parser)
961: {
962: if (eventPtr)
963: return parseEndByteIndex - (parseEndPtr - eventPtr);
964: return -1;
965: }
966:
1.3 kahan 967: int XML_GetCurrentByteCount(XML_Parser parser)
968: {
969: if (eventEndPtr && eventPtr)
970: return eventEndPtr - eventPtr;
971: return 0;
972: }
973:
1.1 frystyk 974: int XML_GetCurrentLineNumber(XML_Parser parser)
975: {
976: if (eventPtr) {
977: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
978: positionPtr = eventPtr;
979: }
980: return position.lineNumber + 1;
981: }
982:
983: int XML_GetCurrentColumnNumber(XML_Parser parser)
984: {
985: if (eventPtr) {
986: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
987: positionPtr = eventPtr;
988: }
989: return position.columnNumber;
990: }
991:
992: void XML_DefaultCurrent(XML_Parser parser)
993: {
1.3 kahan 994: if (defaultHandler) {
995: if (openInternalEntities)
996: reportDefault(parser,
1.4 kahan 997: internalEncoding,
1.3 kahan 998: openInternalEntities->internalEventPtr,
999: openInternalEntities->internalEventEndPtr);
1000: else
1001: reportDefault(parser, encoding, eventPtr, eventEndPtr);
1002: }
1.1 frystyk 1003: }
1004:
1005: const XML_LChar *XML_ErrorString(int code)
1006: {
1007: static const XML_LChar *message[] = {
1008: 0,
1009: XML_T("out of memory"),
1010: XML_T("syntax error"),
1011: XML_T("no element found"),
1012: XML_T("not well-formed"),
1013: XML_T("unclosed token"),
1014: XML_T("unclosed token"),
1015: XML_T("mismatched tag"),
1016: XML_T("duplicate attribute"),
1017: XML_T("junk after document element"),
1018: XML_T("illegal parameter entity reference"),
1019: XML_T("undefined entity"),
1020: XML_T("recursive entity reference"),
1021: XML_T("asynchronous entity"),
1022: XML_T("reference to invalid character number"),
1023: XML_T("reference to binary entity"),
1024: XML_T("reference to external entity in attribute"),
1025: XML_T("xml processing instruction not at start of external entity"),
1026: XML_T("unknown encoding"),
1027: XML_T("encoding specified in XML declaration is incorrect"),
1028: XML_T("unclosed CDATA section"),
1.3 kahan 1029: XML_T("error in processing external entity reference"),
1030: XML_T("document is not standalone")
1.1 frystyk 1031: };
1032: if (code > 0 && code < sizeof(message)/sizeof(message[0]))
1033: return message[code];
1034: return 0;
1035: }
1036:
1037: static
1038: enum XML_Error contentProcessor(XML_Parser parser,
1039: const char *start,
1040: const char *end,
1041: const char **endPtr)
1042: {
1043: return doContent(parser, 0, encoding, start, end, endPtr);
1044: }
1045:
1046: static
1047: enum XML_Error externalEntityInitProcessor(XML_Parser parser,
1048: const char *start,
1049: const char *end,
1050: const char **endPtr)
1051: {
1052: enum XML_Error result = initializeEncoding(parser);
1053: if (result != XML_ERROR_NONE)
1054: return result;
1055: processor = externalEntityInitProcessor2;
1056: return externalEntityInitProcessor2(parser, start, end, endPtr);
1057: }
1058:
1059: static
1060: enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
1061: const char *start,
1062: const char *end,
1063: const char **endPtr)
1064: {
1065: const char *next;
1066: int tok = XmlContentTok(encoding, start, end, &next);
1067: switch (tok) {
1068: case XML_TOK_BOM:
1069: start = next;
1070: break;
1071: case XML_TOK_PARTIAL:
1072: if (endPtr) {
1073: *endPtr = start;
1074: return XML_ERROR_NONE;
1075: }
1076: eventPtr = start;
1077: return XML_ERROR_UNCLOSED_TOKEN;
1078: case XML_TOK_PARTIAL_CHAR:
1079: if (endPtr) {
1080: *endPtr = start;
1081: return XML_ERROR_NONE;
1082: }
1083: eventPtr = start;
1084: return XML_ERROR_PARTIAL_CHAR;
1085: }
1086: processor = externalEntityInitProcessor3;
1087: return externalEntityInitProcessor3(parser, start, end, endPtr);
1088: }
1089:
1090: static
1091: enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
1092: const char *start,
1093: const char *end,
1094: const char **endPtr)
1095: {
1096: const char *next;
1097: int tok = XmlContentTok(encoding, start, end, &next);
1098: switch (tok) {
1099: case XML_TOK_XML_DECL:
1100: {
1101: enum XML_Error result = processXmlDecl(parser, 1, start, next);
1102: if (result != XML_ERROR_NONE)
1103: return result;
1104: start = next;
1105: }
1106: break;
1107: case XML_TOK_PARTIAL:
1108: if (endPtr) {
1109: *endPtr = start;
1110: return XML_ERROR_NONE;
1111: }
1112: eventPtr = start;
1113: return XML_ERROR_UNCLOSED_TOKEN;
1114: case XML_TOK_PARTIAL_CHAR:
1115: if (endPtr) {
1116: *endPtr = start;
1117: return XML_ERROR_NONE;
1118: }
1119: eventPtr = start;
1120: return XML_ERROR_PARTIAL_CHAR;
1121: }
1122: processor = externalEntityContentProcessor;
1123: tagLevel = 1;
1124: return doContent(parser, 1, encoding, start, end, endPtr);
1125: }
1126:
1127: static
1128: enum XML_Error externalEntityContentProcessor(XML_Parser parser,
1129: const char *start,
1130: const char *end,
1131: const char **endPtr)
1132: {
1133: return doContent(parser, 1, encoding, start, end, endPtr);
1134: }
1135:
1136: static enum XML_Error
1137: doContent(XML_Parser parser,
1138: int startTagLevel,
1139: const ENCODING *enc,
1140: const char *s,
1141: const char *end,
1142: const char **nextPtr)
1143: {
1144: const char **eventPP;
1145: const char **eventEndPP;
1146: if (enc == encoding) {
1147: eventPP = &eventPtr;
1148: eventEndPP = &eventEndPtr;
1149: }
1.3 kahan 1150: else {
1151: eventPP = &(openInternalEntities->internalEventPtr);
1152: eventEndPP = &(openInternalEntities->internalEventEndPtr);
1153: }
1154: *eventPP = s;
1.1 frystyk 1155: for (;;) {
1.3 kahan 1156: const char *next = s; /* XmlContentTok doesn't always set the last arg */
1.1 frystyk 1157: int tok = XmlContentTok(enc, s, end, &next);
1158: *eventEndPP = next;
1159: switch (tok) {
1160: case XML_TOK_TRAILING_CR:
1161: if (nextPtr) {
1162: *nextPtr = s;
1163: return XML_ERROR_NONE;
1164: }
1165: *eventEndPP = end;
1166: if (characterDataHandler) {
1.3 kahan 1167: XML_Char c = 0xA;
1.1 frystyk 1168: characterDataHandler(handlerArg, &c, 1);
1169: }
1170: else if (defaultHandler)
1171: reportDefault(parser, enc, s, end);
1172: if (startTagLevel == 0)
1173: return XML_ERROR_NO_ELEMENTS;
1174: if (tagLevel != startTagLevel)
1175: return XML_ERROR_ASYNC_ENTITY;
1176: return XML_ERROR_NONE;
1177: case XML_TOK_NONE:
1178: if (nextPtr) {
1179: *nextPtr = s;
1180: return XML_ERROR_NONE;
1181: }
1182: if (startTagLevel > 0) {
1183: if (tagLevel != startTagLevel)
1184: return XML_ERROR_ASYNC_ENTITY;
1185: return XML_ERROR_NONE;
1186: }
1187: return XML_ERROR_NO_ELEMENTS;
1188: case XML_TOK_INVALID:
1189: *eventPP = next;
1190: return XML_ERROR_INVALID_TOKEN;
1191: case XML_TOK_PARTIAL:
1192: if (nextPtr) {
1193: *nextPtr = s;
1194: return XML_ERROR_NONE;
1195: }
1196: return XML_ERROR_UNCLOSED_TOKEN;
1197: case XML_TOK_PARTIAL_CHAR:
1198: if (nextPtr) {
1199: *nextPtr = s;
1200: return XML_ERROR_NONE;
1201: }
1202: return XML_ERROR_PARTIAL_CHAR;
1203: case XML_TOK_ENTITY_REF:
1204: {
1205: const XML_Char *name;
1206: ENTITY *entity;
1207: XML_Char ch = XmlPredefinedEntityName(enc,
1208: s + enc->minBytesPerChar,
1209: next - enc->minBytesPerChar);
1210: if (ch) {
1211: if (characterDataHandler)
1212: characterDataHandler(handlerArg, &ch, 1);
1213: else if (defaultHandler)
1214: reportDefault(parser, enc, s, next);
1215: break;
1216: }
1217: name = poolStoreString(&dtd.pool, enc,
1218: s + enc->minBytesPerChar,
1219: next - enc->minBytesPerChar);
1220: if (!name)
1221: return XML_ERROR_NO_MEMORY;
1222: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1223: poolDiscard(&dtd.pool);
1224: if (!entity) {
1225: if (dtd.complete || dtd.standalone)
1226: return XML_ERROR_UNDEFINED_ENTITY;
1227: if (defaultHandler)
1228: reportDefault(parser, enc, s, next);
1229: break;
1230: }
1231: if (entity->open)
1232: return XML_ERROR_RECURSIVE_ENTITY_REF;
1233: if (entity->notation)
1234: return XML_ERROR_BINARY_ENTITY_REF;
1235: if (entity) {
1236: if (entity->textPtr) {
1237: enum XML_Error result;
1.3 kahan 1238: OPEN_INTERNAL_ENTITY openEntity;
1239: if (defaultHandler && !defaultExpandInternalEntities) {
1.1 frystyk 1240: reportDefault(parser, enc, s, next);
1241: break;
1242: }
1243: entity->open = 1;
1.3 kahan 1244: openEntity.next = openInternalEntities;
1245: openInternalEntities = &openEntity;
1246: openEntity.entity = entity;
1247: openEntity.internalEventPtr = 0;
1248: openEntity.internalEventEndPtr = 0;
1.1 frystyk 1249: result = doContent(parser,
1250: tagLevel,
1.4 kahan 1251: internalEncoding,
1.1 frystyk 1252: (char *)entity->textPtr,
1253: (char *)(entity->textPtr + entity->textLen),
1254: 0);
1255: entity->open = 0;
1.3 kahan 1256: openInternalEntities = openEntity.next;
1.1 frystyk 1257: if (result)
1258: return result;
1259: }
1260: else if (externalEntityRefHandler) {
1.3 kahan 1261: const XML_Char *context;
1.1 frystyk 1262: entity->open = 1;
1.3 kahan 1263: context = getContext(parser);
1.1 frystyk 1264: entity->open = 0;
1.3 kahan 1265: if (!context)
1.1 frystyk 1266: return XML_ERROR_NO_MEMORY;
1.3 kahan 1267: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
1268: context,
1.4 kahan 1269: entity->base,
1.3 kahan 1270: entity->systemId,
1271: entity->publicId))
1.1 frystyk 1272: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
1273: poolDiscard(&tempPool);
1274: }
1275: else if (defaultHandler)
1276: reportDefault(parser, enc, s, next);
1277: }
1278: break;
1279: }
1280: case XML_TOK_START_TAG_WITH_ATTS:
1281: if (!startElementHandler) {
1.3 kahan 1282: enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1.1 frystyk 1283: if (result)
1284: return result;
1285: }
1286: /* fall through */
1287: case XML_TOK_START_TAG_NO_ATTS:
1288: {
1289: TAG *tag;
1290: if (freeTagList) {
1291: tag = freeTagList;
1292: freeTagList = freeTagList->parent;
1293: }
1294: else {
1295: tag = malloc(sizeof(TAG));
1296: if (!tag)
1297: return XML_ERROR_NO_MEMORY;
1298: tag->buf = malloc(INIT_TAG_BUF_SIZE);
1299: if (!tag->buf)
1300: return XML_ERROR_NO_MEMORY;
1301: tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
1302: }
1.3 kahan 1303: tag->bindings = 0;
1.1 frystyk 1304: tag->parent = tagStack;
1305: tagStack = tag;
1.3 kahan 1306: tag->name.localPart = 0;
1.1 frystyk 1307: tag->rawName = s + enc->minBytesPerChar;
1308: tag->rawNameLength = XmlNameLength(enc, tag->rawName);
1309: if (nextPtr) {
1.3 kahan 1310: /* Need to guarantee that:
1311: tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */
1312: if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) {
1.1 frystyk 1313: int bufSize = tag->rawNameLength * 4;
1314: bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
1315: tag->buf = realloc(tag->buf, bufSize);
1316: if (!tag->buf)
1317: return XML_ERROR_NO_MEMORY;
1318: tag->bufEnd = tag->buf + bufSize;
1319: }
1320: memcpy(tag->buf, tag->rawName, tag->rawNameLength);
1321: tag->rawName = tag->buf;
1322: }
1323: ++tagLevel;
1324: if (startElementHandler) {
1325: enum XML_Error result;
1326: XML_Char *toPtr;
1327: for (;;) {
1328: const char *rawNameEnd = tag->rawName + tag->rawNameLength;
1329: const char *fromPtr = tag->rawName;
1330: int bufSize;
1331: if (nextPtr)
1332: toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
1333: else
1334: toPtr = (XML_Char *)tag->buf;
1.3 kahan 1335: tag->name.str = toPtr;
1.1 frystyk 1336: XmlConvert(enc,
1337: &fromPtr, rawNameEnd,
1338: (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
1339: if (fromPtr == rawNameEnd)
1340: break;
1341: bufSize = (tag->bufEnd - tag->buf) << 1;
1342: tag->buf = realloc(tag->buf, bufSize);
1343: if (!tag->buf)
1344: return XML_ERROR_NO_MEMORY;
1345: tag->bufEnd = tag->buf + bufSize;
1346: if (nextPtr)
1347: tag->rawName = tag->buf;
1348: }
1349: *toPtr = XML_T('\0');
1.3 kahan 1350: result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
1.1 frystyk 1351: if (result)
1352: return result;
1.3 kahan 1353: startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts);
1.1 frystyk 1354: poolClear(&tempPool);
1355: }
1356: else {
1.3 kahan 1357: tag->name.str = 0;
1.1 frystyk 1358: if (defaultHandler)
1359: reportDefault(parser, enc, s, next);
1360: }
1361: break;
1362: }
1363: case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1364: if (!startElementHandler) {
1.3 kahan 1365: enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1.1 frystyk 1366: if (result)
1367: return result;
1368: }
1369: /* fall through */
1370: case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1371: if (startElementHandler || endElementHandler) {
1372: const char *rawName = s + enc->minBytesPerChar;
1.3 kahan 1373: enum XML_Error result;
1374: BINDING *bindings = 0;
1375: TAG_NAME name;
1376: name.str = poolStoreString(&tempPool, enc, rawName,
1377: rawName + XmlNameLength(enc, rawName));
1378: if (!name.str)
1.1 frystyk 1379: return XML_ERROR_NO_MEMORY;
1380: poolFinish(&tempPool);
1.3 kahan 1381: result = storeAtts(parser, enc, s, &name, &bindings);
1382: if (result)
1383: return result;
1384: poolFinish(&tempPool);
1385: if (startElementHandler)
1386: startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
1.1 frystyk 1387: if (endElementHandler) {
1388: if (startElementHandler)
1389: *eventPP = *eventEndPP;
1.3 kahan 1390: endElementHandler(handlerArg, name.str);
1.1 frystyk 1391: }
1392: poolClear(&tempPool);
1.3 kahan 1393: while (bindings) {
1394: BINDING *b = bindings;
1395: if (endNamespaceDeclHandler)
1396: endNamespaceDeclHandler(handlerArg, b->prefix->name);
1397: bindings = bindings->nextTagBinding;
1398: b->nextTagBinding = freeBindingList;
1399: freeBindingList = b;
1400: b->prefix->binding = b->prevPrefixBinding;
1401: }
1.1 frystyk 1402: }
1403: else if (defaultHandler)
1404: reportDefault(parser, enc, s, next);
1405: if (tagLevel == 0)
1406: return epilogProcessor(parser, next, end, nextPtr);
1407: break;
1408: case XML_TOK_END_TAG:
1409: if (tagLevel == startTagLevel)
1410: return XML_ERROR_ASYNC_ENTITY;
1411: else {
1412: int len;
1413: const char *rawName;
1414: TAG *tag = tagStack;
1415: tagStack = tag->parent;
1416: tag->parent = freeTagList;
1417: freeTagList = tag;
1418: rawName = s + enc->minBytesPerChar*2;
1419: len = XmlNameLength(enc, rawName);
1420: if (len != tag->rawNameLength
1421: || memcmp(tag->rawName, rawName, len) != 0) {
1422: *eventPP = rawName;
1423: return XML_ERROR_TAG_MISMATCH;
1424: }
1425: --tagLevel;
1.3 kahan 1426: if (endElementHandler && tag->name.str) {
1427: if (tag->name.localPart) {
1428: XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen;
1429: const XML_Char *from = tag->name.localPart;
1430: while ((*to++ = *from++) != 0)
1431: ;
1.1 frystyk 1432: }
1.3 kahan 1433: endElementHandler(handlerArg, tag->name.str);
1.1 frystyk 1434: }
1435: else if (defaultHandler)
1436: reportDefault(parser, enc, s, next);
1.3 kahan 1437: while (tag->bindings) {
1438: BINDING *b = tag->bindings;
1439: if (endNamespaceDeclHandler)
1440: endNamespaceDeclHandler(handlerArg, b->prefix->name);
1441: tag->bindings = tag->bindings->nextTagBinding;
1442: b->nextTagBinding = freeBindingList;
1443: freeBindingList = b;
1444: b->prefix->binding = b->prevPrefixBinding;
1445: }
1.1 frystyk 1446: if (tagLevel == 0)
1447: return epilogProcessor(parser, next, end, nextPtr);
1448: }
1449: break;
1450: case XML_TOK_CHAR_REF:
1451: {
1452: int n = XmlCharRefNumber(enc, s);
1453: if (n < 0)
1454: return XML_ERROR_BAD_CHAR_REF;
1455: if (characterDataHandler) {
1456: XML_Char buf[XML_ENCODE_MAX];
1457: characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1458: }
1459: else if (defaultHandler)
1460: reportDefault(parser, enc, s, next);
1461: }
1462: break;
1463: case XML_TOK_XML_DECL:
1464: return XML_ERROR_MISPLACED_XML_PI;
1465: case XML_TOK_DATA_NEWLINE:
1466: if (characterDataHandler) {
1.3 kahan 1467: XML_Char c = 0xA;
1.1 frystyk 1468: characterDataHandler(handlerArg, &c, 1);
1469: }
1470: else if (defaultHandler)
1471: reportDefault(parser, enc, s, next);
1472: break;
1473: case XML_TOK_CDATA_SECT_OPEN:
1474: {
1475: enum XML_Error result;
1.3 kahan 1476: if (startCdataSectionHandler)
1477: startCdataSectionHandler(handlerArg);
1478: #if 0
1479: /* Suppose you doing a transformation on a document that involves
1480: changing only the character data. You set up a defaultHandler
1481: and a characterDataHandler. The defaultHandler simply copies
1482: characters through. The characterDataHandler does the transformation
1483: and writes the characters out escaping them as necessary. This case
1484: will fail to work if we leave out the following two lines (because &
1485: and < inside CDATA sections will be incorrectly escaped).
1486:
1487: However, now we have a start/endCdataSectionHandler, so it seems
1488: easier to let the user deal with this. */
1489:
1490: else if (characterDataHandler)
1.1 frystyk 1491: characterDataHandler(handlerArg, dataBuf, 0);
1.3 kahan 1492: #endif
1.1 frystyk 1493: else if (defaultHandler)
1494: reportDefault(parser, enc, s, next);
1495: result = doCdataSection(parser, enc, &next, end, nextPtr);
1496: if (!next) {
1497: processor = cdataSectionProcessor;
1498: return result;
1499: }
1500: }
1501: break;
1502: case XML_TOK_TRAILING_RSQB:
1503: if (nextPtr) {
1504: *nextPtr = s;
1505: return XML_ERROR_NONE;
1506: }
1507: if (characterDataHandler) {
1508: if (MUST_CONVERT(enc, s)) {
1509: ICHAR *dataPtr = (ICHAR *)dataBuf;
1510: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1511: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1512: }
1513: else
1514: characterDataHandler(handlerArg,
1515: (XML_Char *)s,
1516: (XML_Char *)end - (XML_Char *)s);
1517: }
1518: else if (defaultHandler)
1519: reportDefault(parser, enc, s, end);
1520: if (startTagLevel == 0) {
1521: *eventPP = end;
1522: return XML_ERROR_NO_ELEMENTS;
1523: }
1524: if (tagLevel != startTagLevel) {
1525: *eventPP = end;
1526: return XML_ERROR_ASYNC_ENTITY;
1527: }
1528: return XML_ERROR_NONE;
1529: case XML_TOK_DATA_CHARS:
1530: if (characterDataHandler) {
1531: if (MUST_CONVERT(enc, s)) {
1532: for (;;) {
1533: ICHAR *dataPtr = (ICHAR *)dataBuf;
1534: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1535: *eventEndPP = s;
1536: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1537: if (s == next)
1538: break;
1539: *eventPP = s;
1540: }
1541: }
1542: else
1543: characterDataHandler(handlerArg,
1544: (XML_Char *)s,
1545: (XML_Char *)next - (XML_Char *)s);
1546: }
1547: else if (defaultHandler)
1548: reportDefault(parser, enc, s, next);
1549: break;
1550: case XML_TOK_PI:
1551: if (!reportProcessingInstruction(parser, enc, s, next))
1552: return XML_ERROR_NO_MEMORY;
1553: break;
1.3 kahan 1554: case XML_TOK_COMMENT:
1555: if (!reportComment(parser, enc, s, next))
1556: return XML_ERROR_NO_MEMORY;
1557: break;
1.1 frystyk 1558: default:
1559: if (defaultHandler)
1560: reportDefault(parser, enc, s, next);
1561: break;
1562: }
1563: *eventPP = s = next;
1564: }
1565: /* not reached */
1566: }
1567:
1.3 kahan 1568: /* If tagNamePtr is non-null, build a real list of attributes,
1.1 frystyk 1569: otherwise just check the attributes for well-formedness. */
1570:
1571: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1.4 kahan 1572: const char *attStr, TAG_NAME *tagNamePtr,
1.3 kahan 1573: BINDING **bindingsPtr)
1.1 frystyk 1574: {
1575: ELEMENT_TYPE *elementType = 0;
1576: int nDefaultAtts = 0;
1.4 kahan 1577: const XML_Char **appAtts; /* the attribute list to pass to the application */
1.3 kahan 1578: int attIndex = 0;
1.1 frystyk 1579: int i;
1580: int n;
1.3 kahan 1581: int nPrefixes = 0;
1582: BINDING *binding;
1583: const XML_Char *localPart;
1584:
1.4 kahan 1585: /* lookup the element type name */
1.3 kahan 1586: if (tagNamePtr) {
1587: elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0);
1588: if (!elementType) {
1589: tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str);
1590: if (!tagNamePtr->str)
1591: return XML_ERROR_NO_MEMORY;
1592: elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE));
1593: if (!elementType)
1594: return XML_ERROR_NO_MEMORY;
1595: if (ns && !setElementTypePrefix(parser, elementType))
1596: return XML_ERROR_NO_MEMORY;
1597: }
1598: nDefaultAtts = elementType->nDefaultAtts;
1.1 frystyk 1599: }
1.4 kahan 1600: /* get the attributes from the tokenizer */
1601: n = XmlGetAttributes(enc, attStr, attsSize, atts);
1.1 frystyk 1602: if (n + nDefaultAtts > attsSize) {
1603: int oldAttsSize = attsSize;
1604: attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1605: atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
1606: if (!atts)
1607: return XML_ERROR_NO_MEMORY;
1608: if (n > oldAttsSize)
1.4 kahan 1609: XmlGetAttributes(enc, attStr, n, atts);
1.1 frystyk 1610: }
1611: appAtts = (const XML_Char **)atts;
1612: for (i = 0; i < n; i++) {
1.4 kahan 1613: /* add the name and value to the attribute list */
1.1 frystyk 1614: ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1.3 kahan 1615: atts[i].name
1616: + XmlNameLength(enc, atts[i].name));
1.1 frystyk 1617: if (!attId)
1618: return XML_ERROR_NO_MEMORY;
1.4 kahan 1619: /* detect duplicate attributes */
1.1 frystyk 1620: if ((attId->name)[-1]) {
1621: if (enc == encoding)
1622: eventPtr = atts[i].name;
1623: return XML_ERROR_DUPLICATE_ATTRIBUTE;
1624: }
1625: (attId->name)[-1] = 1;
1.3 kahan 1626: appAtts[attIndex++] = attId->name;
1.1 frystyk 1627: if (!atts[i].normalized) {
1628: enum XML_Error result;
1629: int isCdata = 1;
1630:
1.4 kahan 1631: /* figure out whether declared as other than CDATA */
1.1 frystyk 1632: if (attId->maybeTokenized) {
1633: int j;
1634: for (j = 0; j < nDefaultAtts; j++) {
1635: if (attId == elementType->defaultAtts[j].id) {
1636: isCdata = elementType->defaultAtts[j].isCdata;
1637: break;
1638: }
1639: }
1640: }
1641:
1.4 kahan 1642: /* normalize the attribute value */
1.1 frystyk 1643: result = storeAttributeValue(parser, enc, isCdata,
1644: atts[i].valuePtr, atts[i].valueEnd,
1645: &tempPool);
1646: if (result)
1647: return result;
1.3 kahan 1648: if (tagNamePtr) {
1649: appAtts[attIndex] = poolStart(&tempPool);
1.1 frystyk 1650: poolFinish(&tempPool);
1651: }
1652: else
1653: poolDiscard(&tempPool);
1654: }
1.3 kahan 1655: else if (tagNamePtr) {
1.4 kahan 1656: /* the value did not need normalizing */
1.3 kahan 1657: appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
1658: if (appAtts[attIndex] == 0)
1.1 frystyk 1659: return XML_ERROR_NO_MEMORY;
1660: poolFinish(&tempPool);
1661: }
1.4 kahan 1662: /* handle prefixed attribute names */
1.3 kahan 1663: if (attId->prefix && tagNamePtr) {
1664: if (attId->xmlns) {
1.4 kahan 1665: /* deal with namespace declarations here */
1.3 kahan 1666: if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr))
1667: return XML_ERROR_NO_MEMORY;
1668: --attIndex;
1669: }
1670: else {
1.4 kahan 1671: /* deal with other prefixed names later */
1.3 kahan 1672: attIndex++;
1673: nPrefixes++;
1674: (attId->name)[-1] = 2;
1675: }
1676: }
1677: else
1678: attIndex++;
1.1 frystyk 1679: }
1.3 kahan 1680: nSpecifiedAtts = attIndex;
1.4 kahan 1681: /* do attribute defaulting */
1.3 kahan 1682: if (tagNamePtr) {
1.1 frystyk 1683: int j;
1684: for (j = 0; j < nDefaultAtts; j++) {
1685: const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
1686: if (!(da->id->name)[-1] && da->value) {
1.3 kahan 1687: if (da->id->prefix) {
1688: if (da->id->xmlns) {
1689: if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr))
1690: return XML_ERROR_NO_MEMORY;
1691: }
1692: else {
1693: (da->id->name)[-1] = 2;
1694: nPrefixes++;
1695: appAtts[attIndex++] = da->id->name;
1696: appAtts[attIndex++] = da->value;
1697: }
1698: }
1699: else {
1700: (da->id->name)[-1] = 1;
1701: appAtts[attIndex++] = da->id->name;
1702: appAtts[attIndex++] = da->value;
1703: }
1704: }
1705: }
1706: appAtts[attIndex] = 0;
1707: }
1708: i = 0;
1709: if (nPrefixes) {
1.4 kahan 1710: /* expand prefixed attribute names */
1.3 kahan 1711: for (; i < attIndex; i += 2) {
1712: if (appAtts[i][-1] == 2) {
1713: ATTRIBUTE_ID *id;
1714: ((XML_Char *)(appAtts[i]))[-1] = 0;
1715: id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0);
1716: if (id->prefix->binding) {
1717: int j;
1718: const BINDING *b = id->prefix->binding;
1719: const XML_Char *s = appAtts[i];
1720: for (j = 0; j < b->uriLen; j++) {
1721: if (!poolAppendChar(&tempPool, b->uri[j]))
1722: return XML_ERROR_NO_MEMORY;
1723: }
1724: while (*s++ != ':')
1725: ;
1726: do {
1727: if (!poolAppendChar(&tempPool, *s))
1728: return XML_ERROR_NO_MEMORY;
1729: } while (*s++);
1730: appAtts[i] = poolStart(&tempPool);
1731: poolFinish(&tempPool);
1732: }
1733: if (!--nPrefixes)
1734: break;
1.1 frystyk 1735: }
1.3 kahan 1736: else
1737: ((XML_Char *)(appAtts[i]))[-1] = 0;
1.1 frystyk 1738: }
1739: }
1.4 kahan 1740: /* clear the flags that say whether attributes were specified */
1.3 kahan 1741: for (; i < attIndex; i += 2)
1742: ((XML_Char *)(appAtts[i]))[-1] = 0;
1743: if (!tagNamePtr)
1744: return XML_ERROR_NONE;
1745: for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
1746: binding->attId->name[-1] = 0;
1.4 kahan 1747: /* expand the element type name */
1.3 kahan 1748: if (elementType->prefix) {
1749: binding = elementType->prefix->binding;
1750: if (!binding)
1751: return XML_ERROR_NONE;
1752: localPart = tagNamePtr->str;
1753: while (*localPart++ != XML_T(':'))
1754: ;
1755: }
1756: else if (dtd.defaultPrefix.binding) {
1757: binding = dtd.defaultPrefix.binding;
1758: localPart = tagNamePtr->str;
1759: }
1760: else
1761: return XML_ERROR_NONE;
1762: tagNamePtr->localPart = localPart;
1763: tagNamePtr->uriLen = binding->uriLen;
1764: i = binding->uriLen;
1765: do {
1766: if (i == binding->uriAlloc) {
1.4 kahan 1767: binding->uri = realloc(binding->uri, (binding->uriAlloc *= 2) * sizeof(XML_Char));
1.3 kahan 1768: if (!binding->uri)
1769: return XML_ERROR_NO_MEMORY;
1770: }
1771: binding->uri[i++] = *localPart;
1772: } while (*localPart++);
1773: tagNamePtr->str = binding->uri;
1.1 frystyk 1774: return XML_ERROR_NONE;
1775: }
1776:
1.3 kahan 1777: static
1778: int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr)
1779: {
1780: BINDING *b;
1781: int len;
1782: for (len = 0; uri[len]; len++)
1783: ;
1784: if (namespaceSeparator)
1785: len++;
1786: if (freeBindingList) {
1787: b = freeBindingList;
1788: if (len > b->uriAlloc) {
1.4 kahan 1789: b->uri = realloc(b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
1.3 kahan 1790: if (!b->uri)
1791: return 0;
1792: b->uriAlloc = len + EXPAND_SPARE;
1793: }
1794: freeBindingList = b->nextTagBinding;
1795: }
1796: else {
1797: b = malloc(sizeof(BINDING));
1798: if (!b)
1799: return 0;
1.4 kahan 1800: b->uri = malloc(sizeof(XML_Char) * (len + EXPAND_SPARE));
1.3 kahan 1801: if (!b->uri) {
1802: free(b);
1803: return 0;
1804: }
1.4 kahan 1805: b->uriAlloc = len + EXPAND_SPARE;
1.3 kahan 1806: }
1807: b->uriLen = len;
1808: memcpy(b->uri, uri, len * sizeof(XML_Char));
1809: if (namespaceSeparator)
1810: b->uri[len - 1] = namespaceSeparator;
1811: b->prefix = prefix;
1812: b->attId = attId;
1813: b->prevPrefixBinding = prefix->binding;
1814: if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix)
1815: prefix->binding = 0;
1816: else
1817: prefix->binding = b;
1818: b->nextTagBinding = *bindingsPtr;
1819: *bindingsPtr = b;
1820: if (startNamespaceDeclHandler)
1821: startNamespaceDeclHandler(handlerArg, prefix->name,
1822: prefix->binding ? uri : 0);
1823: return 1;
1824: }
1825:
1.1 frystyk 1826: /* The idea here is to avoid using stack for each CDATA section when
1827: the whole file is parsed with one call. */
1828:
1829: static
1830: enum XML_Error cdataSectionProcessor(XML_Parser parser,
1831: const char *start,
1832: const char *end,
1833: const char **endPtr)
1834: {
1835: enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
1836: if (start) {
1837: processor = contentProcessor;
1838: return contentProcessor(parser, start, end, endPtr);
1839: }
1840: return result;
1841: }
1842:
1843: /* startPtr gets set to non-null is the section is closed, and to null if
1844: the section is not yet closed. */
1845:
1846: static
1847: enum XML_Error doCdataSection(XML_Parser parser,
1848: const ENCODING *enc,
1849: const char **startPtr,
1850: const char *end,
1851: const char **nextPtr)
1852: {
1853: const char *s = *startPtr;
1854: const char **eventPP;
1855: const char **eventEndPP;
1856: if (enc == encoding) {
1857: eventPP = &eventPtr;
1858: *eventPP = s;
1859: eventEndPP = &eventEndPtr;
1860: }
1.3 kahan 1861: else {
1862: eventPP = &(openInternalEntities->internalEventPtr);
1863: eventEndPP = &(openInternalEntities->internalEventEndPtr);
1864: }
1865: *eventPP = s;
1.1 frystyk 1866: *startPtr = 0;
1867: for (;;) {
1868: const char *next;
1869: int tok = XmlCdataSectionTok(enc, s, end, &next);
1870: *eventEndPP = next;
1871: switch (tok) {
1872: case XML_TOK_CDATA_SECT_CLOSE:
1.3 kahan 1873: if (endCdataSectionHandler)
1874: endCdataSectionHandler(handlerArg);
1875: #if 0
1876: /* see comment under XML_TOK_CDATA_SECT_OPEN */
1877: else if (characterDataHandler)
1.1 frystyk 1878: characterDataHandler(handlerArg, dataBuf, 0);
1.3 kahan 1879: #endif
1.1 frystyk 1880: else if (defaultHandler)
1881: reportDefault(parser, enc, s, next);
1882: *startPtr = next;
1883: return XML_ERROR_NONE;
1884: case XML_TOK_DATA_NEWLINE:
1885: if (characterDataHandler) {
1.3 kahan 1886: XML_Char c = 0xA;
1.1 frystyk 1887: characterDataHandler(handlerArg, &c, 1);
1888: }
1889: else if (defaultHandler)
1890: reportDefault(parser, enc, s, next);
1891: break;
1892: case XML_TOK_DATA_CHARS:
1893: if (characterDataHandler) {
1894: if (MUST_CONVERT(enc, s)) {
1895: for (;;) {
1896: ICHAR *dataPtr = (ICHAR *)dataBuf;
1897: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1898: *eventEndPP = next;
1899: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1900: if (s == next)
1901: break;
1902: *eventPP = s;
1903: }
1904: }
1905: else
1906: characterDataHandler(handlerArg,
1907: (XML_Char *)s,
1908: (XML_Char *)next - (XML_Char *)s);
1909: }
1910: else if (defaultHandler)
1911: reportDefault(parser, enc, s, next);
1912: break;
1913: case XML_TOK_INVALID:
1914: *eventPP = next;
1915: return XML_ERROR_INVALID_TOKEN;
1916: case XML_TOK_PARTIAL_CHAR:
1917: if (nextPtr) {
1918: *nextPtr = s;
1919: return XML_ERROR_NONE;
1920: }
1921: return XML_ERROR_PARTIAL_CHAR;
1922: case XML_TOK_PARTIAL:
1923: case XML_TOK_NONE:
1924: if (nextPtr) {
1925: *nextPtr = s;
1926: return XML_ERROR_NONE;
1927: }
1928: return XML_ERROR_UNCLOSED_CDATA_SECTION;
1929: default:
1930: abort();
1931: }
1932: *eventPP = s = next;
1933: }
1934: /* not reached */
1935: }
1936:
1.4 kahan 1937: #ifdef XML_DTD
1938:
1939: /* The idea here is to avoid using stack for each IGNORE section when
1940: the whole file is parsed with one call. */
1941:
1942: static
1943: enum XML_Error ignoreSectionProcessor(XML_Parser parser,
1944: const char *start,
1945: const char *end,
1946: const char **endPtr)
1947: {
1948: enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr);
1949: if (start) {
1950: processor = prologProcessor;
1951: return prologProcessor(parser, start, end, endPtr);
1952: }
1953: return result;
1954: }
1955:
1956: /* startPtr gets set to non-null is the section is closed, and to null if
1957: the section is not yet closed. */
1958:
1959: static
1960: enum XML_Error doIgnoreSection(XML_Parser parser,
1961: const ENCODING *enc,
1962: const char **startPtr,
1963: const char *end,
1964: const char **nextPtr)
1965: {
1966: const char *next;
1967: int tok;
1968: const char *s = *startPtr;
1969: const char **eventPP;
1970: const char **eventEndPP;
1971: if (enc == encoding) {
1972: eventPP = &eventPtr;
1973: *eventPP = s;
1974: eventEndPP = &eventEndPtr;
1975: }
1976: else {
1977: eventPP = &(openInternalEntities->internalEventPtr);
1978: eventEndPP = &(openInternalEntities->internalEventEndPtr);
1979: }
1980: *eventPP = s;
1981: *startPtr = 0;
1982: tok = XmlIgnoreSectionTok(enc, s, end, &next);
1983: *eventEndPP = next;
1984: switch (tok) {
1985: case XML_TOK_IGNORE_SECT:
1986: if (defaultHandler)
1987: reportDefault(parser, enc, s, next);
1988: *startPtr = next;
1989: return XML_ERROR_NONE;
1990: case XML_TOK_INVALID:
1991: *eventPP = next;
1992: return XML_ERROR_INVALID_TOKEN;
1993: case XML_TOK_PARTIAL_CHAR:
1994: if (nextPtr) {
1995: *nextPtr = s;
1996: return XML_ERROR_NONE;
1997: }
1998: return XML_ERROR_PARTIAL_CHAR;
1999: case XML_TOK_PARTIAL:
2000: case XML_TOK_NONE:
2001: if (nextPtr) {
2002: *nextPtr = s;
2003: return XML_ERROR_NONE;
2004: }
2005: return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
2006: default:
2007: abort();
2008: }
2009: /* not reached */
2010: }
2011:
2012: #endif /* XML_DTD */
2013:
1.1 frystyk 2014: static enum XML_Error
2015: initializeEncoding(XML_Parser parser)
2016: {
2017: const char *s;
2018: #ifdef XML_UNICODE
2019: char encodingBuf[128];
2020: if (!protocolEncodingName)
2021: s = 0;
2022: else {
2023: int i;
2024: for (i = 0; protocolEncodingName[i]; i++) {
2025: if (i == sizeof(encodingBuf) - 1
2026: || protocolEncodingName[i] >= 0x80
2027: || protocolEncodingName[i] < 0) {
2028: encodingBuf[0] = '\0';
2029: break;
2030: }
2031: encodingBuf[i] = (char)protocolEncodingName[i];
2032: }
2033: encodingBuf[i] = '\0';
2034: s = encodingBuf;
2035: }
2036: #else
2037: s = protocolEncodingName;
2038: #endif
1.3 kahan 2039: if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
1.1 frystyk 2040: return XML_ERROR_NONE;
2041: return handleUnknownEncoding(parser, protocolEncodingName);
2042: }
2043:
2044: static enum XML_Error
2045: processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
2046: const char *s, const char *next)
2047: {
2048: const char *encodingName = 0;
2049: const ENCODING *newEncoding = 0;
2050: const char *version;
2051: int standalone = -1;
1.3 kahan 2052: if (!(ns
2053: ? XmlParseXmlDeclNS
2054: : XmlParseXmlDecl)(isGeneralTextEntity,
2055: encoding,
2056: s,
2057: next,
2058: &eventPtr,
2059: &version,
2060: &encodingName,
2061: &newEncoding,
2062: &standalone))
1.1 frystyk 2063: return XML_ERROR_SYNTAX;
1.4 kahan 2064: if (!isGeneralTextEntity && standalone == 1) {
1.1 frystyk 2065: dtd.standalone = 1;
1.4 kahan 2066: #ifdef XML_DTD
2067: if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
2068: paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
2069: #endif /* XML_DTD */
2070: }
1.1 frystyk 2071: if (defaultHandler)
2072: reportDefault(parser, encoding, s, next);
2073: if (!protocolEncodingName) {
2074: if (newEncoding) {
2075: if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
2076: eventPtr = encodingName;
2077: return XML_ERROR_INCORRECT_ENCODING;
2078: }
2079: encoding = newEncoding;
2080: }
2081: else if (encodingName) {
2082: enum XML_Error result;
2083: const XML_Char *s = poolStoreString(&tempPool,
2084: encoding,
2085: encodingName,
2086: encodingName
2087: + XmlNameLength(encoding, encodingName));
2088: if (!s)
2089: return XML_ERROR_NO_MEMORY;
2090: result = handleUnknownEncoding(parser, s);
2091: poolDiscard(&tempPool);
2092: if (result == XML_ERROR_UNKNOWN_ENCODING)
2093: eventPtr = encodingName;
2094: return result;
2095: }
2096: }
2097: return XML_ERROR_NONE;
2098: }
2099:
2100: static enum XML_Error
2101: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
2102: {
2103: if (unknownEncodingHandler) {
2104: XML_Encoding info;
2105: int i;
2106: for (i = 0; i < 256; i++)
2107: info.map[i] = -1;
2108: info.convert = 0;
2109: info.data = 0;
2110: info.release = 0;
2111: if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
2112: ENCODING *enc;
2113: unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding());
2114: if (!unknownEncodingMem) {
2115: if (info.release)
2116: info.release(info.data);
2117: return XML_ERROR_NO_MEMORY;
2118: }
1.3 kahan 2119: enc = (ns
2120: ? XmlInitUnknownEncodingNS
2121: : XmlInitUnknownEncoding)(unknownEncodingMem,
2122: info.map,
2123: info.convert,
2124: info.data);
1.1 frystyk 2125: if (enc) {
2126: unknownEncodingData = info.data;
2127: unknownEncodingRelease = info.release;
2128: encoding = enc;
2129: return XML_ERROR_NONE;
2130: }
2131: }
2132: if (info.release)
2133: info.release(info.data);
2134: }
2135: return XML_ERROR_UNKNOWN_ENCODING;
2136: }
2137:
2138: static enum XML_Error
2139: prologInitProcessor(XML_Parser parser,
2140: const char *s,
2141: const char *end,
2142: const char **nextPtr)
2143: {
2144: enum XML_Error result = initializeEncoding(parser);
2145: if (result != XML_ERROR_NONE)
2146: return result;
2147: processor = prologProcessor;
2148: return prologProcessor(parser, s, end, nextPtr);
2149: }
2150:
2151: static enum XML_Error
2152: prologProcessor(XML_Parser parser,
2153: const char *s,
2154: const char *end,
2155: const char **nextPtr)
2156: {
1.4 kahan 2157: const char *next;
2158: int tok = XmlPrologTok(encoding, s, end, &next);
2159: return doProlog(parser, encoding, s, end, tok, next, nextPtr);
2160: }
2161:
2162: static enum XML_Error
2163: doProlog(XML_Parser parser,
2164: const ENCODING *enc,
2165: const char *s,
2166: const char *end,
2167: int tok,
2168: const char *next,
2169: const char **nextPtr)
2170: {
2171: #ifdef XML_DTD
2172: static const XML_Char externalSubsetName[] = { '#' , '\0' };
2173: #endif /* XML_DTD */
2174:
2175: const char **eventPP;
2176: const char **eventEndPP;
2177: if (enc == encoding) {
2178: eventPP = &eventPtr;
2179: eventEndPP = &eventEndPtr;
2180: }
2181: else {
2182: eventPP = &(openInternalEntities->internalEventPtr);
2183: eventEndPP = &(openInternalEntities->internalEventEndPtr);
2184: }
1.1 frystyk 2185: for (;;) {
1.4 kahan 2186: int role;
2187: *eventPP = s;
2188: *eventEndPP = next;
1.1 frystyk 2189: if (tok <= 0) {
2190: if (nextPtr != 0 && tok != XML_TOK_INVALID) {
2191: *nextPtr = s;
2192: return XML_ERROR_NONE;
2193: }
2194: switch (tok) {
2195: case XML_TOK_INVALID:
1.4 kahan 2196: *eventPP = next;
1.1 frystyk 2197: return XML_ERROR_INVALID_TOKEN;
2198: case XML_TOK_PARTIAL:
2199: return XML_ERROR_UNCLOSED_TOKEN;
2200: case XML_TOK_PARTIAL_CHAR:
2201: return XML_ERROR_PARTIAL_CHAR;
1.4 kahan 2202: case XML_TOK_NONE:
2203: #ifdef XML_DTD
2204: if (enc != encoding)
2205: return XML_ERROR_NONE;
2206: if (parentParser) {
2207: if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
2208: == XML_ROLE_ERROR)
2209: return XML_ERROR_SYNTAX;
2210: hadExternalDoctype = 0;
2211: return XML_ERROR_NONE;
2212: }
2213: #endif /* XML_DTD */
1.1 frystyk 2214: return XML_ERROR_NO_ELEMENTS;
2215: default:
1.4 kahan 2216: tok = -tok;
2217: next = end;
2218: break;
1.1 frystyk 2219: }
2220: }
1.4 kahan 2221: role = XmlTokenRole(&prologState, tok, s, next, enc);
2222: switch (role) {
1.1 frystyk 2223: case XML_ROLE_XML_DECL:
2224: {
2225: enum XML_Error result = processXmlDecl(parser, 0, s, next);
2226: if (result != XML_ERROR_NONE)
2227: return result;
1.4 kahan 2228: enc = encoding;
2229: }
2230: break;
2231: case XML_ROLE_DOCTYPE_NAME:
2232: if (startDoctypeDeclHandler) {
2233: const XML_Char *name = poolStoreString(&tempPool, enc, s, next);
2234: if (!name)
2235: return XML_ERROR_NO_MEMORY;
2236: startDoctypeDeclHandler(handlerArg, name);
2237: poolClear(&tempPool);
1.1 frystyk 2238: }
2239: break;
1.4 kahan 2240: #ifdef XML_DTD
2241: case XML_ROLE_TEXT_DECL:
2242: {
2243: enum XML_Error result = processXmlDecl(parser, 1, s, next);
2244: if (result != XML_ERROR_NONE)
2245: return result;
2246: enc = encoding;
2247: }
1.1 frystyk 2248: break;
1.4 kahan 2249: #endif /* XML_DTD */
1.1 frystyk 2250: case XML_ROLE_DOCTYPE_PUBLIC_ID:
1.4 kahan 2251: #ifdef XML_DTD
2252: declEntity = (ENTITY *)lookup(&dtd.paramEntities,
2253: externalSubsetName,
2254: sizeof(ENTITY));
2255: if (!declEntity)
2256: return XML_ERROR_NO_MEMORY;
2257: #endif /* XML_DTD */
2258: /* fall through */
1.1 frystyk 2259: case XML_ROLE_ENTITY_PUBLIC_ID:
1.4 kahan 2260: if (!XmlIsPublicId(enc, s, next, eventPP))
1.1 frystyk 2261: return XML_ERROR_SYNTAX;
2262: if (declEntity) {
2263: XML_Char *tem = poolStoreString(&dtd.pool,
1.4 kahan 2264: enc,
2265: s + enc->minBytesPerChar,
2266: next - enc->minBytesPerChar);
1.1 frystyk 2267: if (!tem)
2268: return XML_ERROR_NO_MEMORY;
2269: normalizePublicId(tem);
2270: declEntity->publicId = tem;
2271: poolFinish(&dtd.pool);
2272: }
2273: break;
1.4 kahan 2274: case XML_ROLE_DOCTYPE_CLOSE:
2275: if (dtd.complete && hadExternalDoctype) {
2276: dtd.complete = 0;
2277: #ifdef XML_DTD
2278: if (paramEntityParsing && externalEntityRefHandler) {
2279: ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities,
2280: externalSubsetName,
2281: 0);
2282: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
2283: 0,
2284: entity->base,
2285: entity->systemId,
2286: entity->publicId))
2287: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2288: }
2289: #endif /* XML_DTD */
2290: if (!dtd.complete
2291: && !dtd.standalone
2292: && notStandaloneHandler
2293: && !notStandaloneHandler(handlerArg))
2294: return XML_ERROR_NOT_STANDALONE;
2295: }
2296: if (endDoctypeDeclHandler)
2297: endDoctypeDeclHandler(handlerArg);
2298: break;
1.1 frystyk 2299: case XML_ROLE_INSTANCE_START:
2300: processor = contentProcessor;
2301: return contentProcessor(parser, s, end, nextPtr);
2302: case XML_ROLE_ATTLIST_ELEMENT_NAME:
2303: {
1.4 kahan 2304: const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next);
1.1 frystyk 2305: if (!name)
2306: return XML_ERROR_NO_MEMORY;
2307: declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
2308: if (!declElementType)
2309: return XML_ERROR_NO_MEMORY;
2310: if (declElementType->name != name)
2311: poolDiscard(&dtd.pool);
1.3 kahan 2312: else {
1.1 frystyk 2313: poolFinish(&dtd.pool);
1.3 kahan 2314: if (!setElementTypePrefix(parser, declElementType))
2315: return XML_ERROR_NO_MEMORY;
2316: }
1.1 frystyk 2317: break;
2318: }
2319: case XML_ROLE_ATTRIBUTE_NAME:
1.4 kahan 2320: declAttributeId = getAttributeId(parser, enc, s, next);
1.1 frystyk 2321: if (!declAttributeId)
2322: return XML_ERROR_NO_MEMORY;
2323: declAttributeIsCdata = 0;
2324: break;
2325: case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
2326: declAttributeIsCdata = 1;
2327: break;
2328: case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
2329: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
2330: if (dtd.complete
2331: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
2332: return XML_ERROR_NO_MEMORY;
2333: break;
2334: case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
2335: case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
2336: {
2337: const XML_Char *attVal;
2338: enum XML_Error result
1.4 kahan 2339: = storeAttributeValue(parser, enc, declAttributeIsCdata,
2340: s + enc->minBytesPerChar,
2341: next - enc->minBytesPerChar,
1.1 frystyk 2342: &dtd.pool);
2343: if (result)
2344: return result;
2345: attVal = poolStart(&dtd.pool);
2346: poolFinish(&dtd.pool);
2347: if (dtd.complete
2348: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
2349: return XML_ERROR_NO_MEMORY;
2350: break;
2351: }
2352: case XML_ROLE_ENTITY_VALUE:
2353: {
1.4 kahan 2354: enum XML_Error result = storeEntityValue(parser, enc,
2355: s + enc->minBytesPerChar,
2356: next - enc->minBytesPerChar);
2357: if (declEntity) {
2358: declEntity->textPtr = poolStart(&dtd.pool);
2359: declEntity->textLen = poolLength(&dtd.pool);
2360: poolFinish(&dtd.pool);
2361: }
2362: else
2363: poolDiscard(&dtd.pool);
1.1 frystyk 2364: if (result != XML_ERROR_NONE)
2365: return result;
2366: }
2367: break;
1.4 kahan 2368: case XML_ROLE_DOCTYPE_SYSTEM_ID:
2369: if (!dtd.standalone
2370: #ifdef XML_DTD
2371: && !paramEntityParsing
2372: #endif /* XML_DTD */
2373: && notStandaloneHandler
2374: && !notStandaloneHandler(handlerArg))
2375: return XML_ERROR_NOT_STANDALONE;
2376: hadExternalDoctype = 1;
2377: #ifndef XML_DTD
2378: break;
2379: #else /* XML_DTD */
2380: if (!declEntity) {
2381: declEntity = (ENTITY *)lookup(&dtd.paramEntities,
2382: externalSubsetName,
2383: sizeof(ENTITY));
2384: if (!declEntity)
2385: return XML_ERROR_NO_MEMORY;
2386: }
2387: /* fall through */
2388: #endif /* XML_DTD */
1.1 frystyk 2389: case XML_ROLE_ENTITY_SYSTEM_ID:
2390: if (declEntity) {
1.4 kahan 2391: declEntity->systemId = poolStoreString(&dtd.pool, enc,
2392: s + enc->minBytesPerChar,
2393: next - enc->minBytesPerChar);
1.1 frystyk 2394: if (!declEntity->systemId)
2395: return XML_ERROR_NO_MEMORY;
1.4 kahan 2396: declEntity->base = curBase;
1.1 frystyk 2397: poolFinish(&dtd.pool);
2398: }
2399: break;
2400: case XML_ROLE_ENTITY_NOTATION_NAME:
2401: if (declEntity) {
1.4 kahan 2402: declEntity->notation = poolStoreString(&dtd.pool, enc, s, next);
1.1 frystyk 2403: if (!declEntity->notation)
2404: return XML_ERROR_NO_MEMORY;
2405: poolFinish(&dtd.pool);
2406: if (unparsedEntityDeclHandler) {
1.4 kahan 2407: *eventEndPP = s;
1.1 frystyk 2408: unparsedEntityDeclHandler(handlerArg,
2409: declEntity->name,
2410: declEntity->base,
2411: declEntity->systemId,
2412: declEntity->publicId,
2413: declEntity->notation);
2414: }
2415:
2416: }
2417: break;
2418: case XML_ROLE_GENERAL_ENTITY_NAME:
2419: {
2420: const XML_Char *name;
1.4 kahan 2421: if (XmlPredefinedEntityName(enc, s, next)) {
1.1 frystyk 2422: declEntity = 0;
2423: break;
2424: }
1.4 kahan 2425: name = poolStoreString(&dtd.pool, enc, s, next);
1.1 frystyk 2426: if (!name)
2427: return XML_ERROR_NO_MEMORY;
2428: if (dtd.complete) {
2429: declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
2430: if (!declEntity)
2431: return XML_ERROR_NO_MEMORY;
2432: if (declEntity->name != name) {
2433: poolDiscard(&dtd.pool);
2434: declEntity = 0;
2435: }
2436: else
2437: poolFinish(&dtd.pool);
2438: }
2439: else {
2440: poolDiscard(&dtd.pool);
2441: declEntity = 0;
2442: }
2443: }
2444: break;
2445: case XML_ROLE_PARAM_ENTITY_NAME:
1.4 kahan 2446: #ifdef XML_DTD
2447: if (dtd.complete) {
2448: const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next);
2449: if (!name)
2450: return XML_ERROR_NO_MEMORY;
2451: declEntity = (ENTITY *)lookup(&dtd.paramEntities, name, sizeof(ENTITY));
2452: if (!declEntity)
2453: return XML_ERROR_NO_MEMORY;
2454: if (declEntity->name != name) {
2455: poolDiscard(&dtd.pool);
2456: declEntity = 0;
2457: }
2458: else
2459: poolFinish(&dtd.pool);
2460: }
2461: #else /* not XML_DTD */
1.1 frystyk 2462: declEntity = 0;
1.4 kahan 2463: #endif /* not XML_DTD */
1.1 frystyk 2464: break;
2465: case XML_ROLE_NOTATION_NAME:
2466: declNotationPublicId = 0;
2467: declNotationName = 0;
2468: if (notationDeclHandler) {
1.4 kahan 2469: declNotationName = poolStoreString(&tempPool, enc, s, next);
1.1 frystyk 2470: if (!declNotationName)
2471: return XML_ERROR_NO_MEMORY;
2472: poolFinish(&tempPool);
2473: }
2474: break;
2475: case XML_ROLE_NOTATION_PUBLIC_ID:
1.4 kahan 2476: if (!XmlIsPublicId(enc, s, next, eventPP))
1.1 frystyk 2477: return XML_ERROR_SYNTAX;
2478: if (declNotationName) {
2479: XML_Char *tem = poolStoreString(&tempPool,
1.4 kahan 2480: enc,
2481: s + enc->minBytesPerChar,
2482: next - enc->minBytesPerChar);
1.1 frystyk 2483: if (!tem)
2484: return XML_ERROR_NO_MEMORY;
2485: normalizePublicId(tem);
2486: declNotationPublicId = tem;
2487: poolFinish(&tempPool);
2488: }
2489: break;
2490: case XML_ROLE_NOTATION_SYSTEM_ID:
2491: if (declNotationName && notationDeclHandler) {
2492: const XML_Char *systemId
1.4 kahan 2493: = poolStoreString(&tempPool, enc,
2494: s + enc->minBytesPerChar,
2495: next - enc->minBytesPerChar);
1.1 frystyk 2496: if (!systemId)
2497: return XML_ERROR_NO_MEMORY;
1.4 kahan 2498: *eventEndPP = s;
1.1 frystyk 2499: notationDeclHandler(handlerArg,
2500: declNotationName,
1.4 kahan 2501: curBase,
1.1 frystyk 2502: systemId,
2503: declNotationPublicId);
2504: }
2505: poolClear(&tempPool);
2506: break;
2507: case XML_ROLE_NOTATION_NO_SYSTEM_ID:
2508: if (declNotationPublicId && notationDeclHandler) {
1.4 kahan 2509: *eventEndPP = s;
1.1 frystyk 2510: notationDeclHandler(handlerArg,
2511: declNotationName,
1.4 kahan 2512: curBase,
1.1 frystyk 2513: 0,
2514: declNotationPublicId);
2515: }
2516: poolClear(&tempPool);
2517: break;
2518: case XML_ROLE_ERROR:
2519: switch (tok) {
2520: case XML_TOK_PARAM_ENTITY_REF:
2521: return XML_ERROR_PARAM_ENTITY_REF;
2522: case XML_TOK_XML_DECL:
2523: return XML_ERROR_MISPLACED_XML_PI;
2524: default:
2525: return XML_ERROR_SYNTAX;
2526: }
1.4 kahan 2527: #ifdef XML_DTD
2528: case XML_ROLE_IGNORE_SECT:
2529: {
2530: enum XML_Error result;
2531: if (defaultHandler)
2532: reportDefault(parser, enc, s, next);
2533: result = doIgnoreSection(parser, enc, &next, end, nextPtr);
2534: if (!next) {
2535: processor = ignoreSectionProcessor;
2536: return result;
2537: }
2538: }
2539: break;
2540: #endif /* XML_DTD */
1.1 frystyk 2541: case XML_ROLE_GROUP_OPEN:
2542: if (prologState.level >= groupSize) {
2543: if (groupSize)
2544: groupConnector = realloc(groupConnector, groupSize *= 2);
2545: else
2546: groupConnector = malloc(groupSize = 32);
2547: if (!groupConnector)
2548: return XML_ERROR_NO_MEMORY;
2549: }
2550: groupConnector[prologState.level] = 0;
2551: break;
2552: case XML_ROLE_GROUP_SEQUENCE:
1.4 kahan 2553: if (groupConnector[prologState.level] == '|')
1.1 frystyk 2554: return XML_ERROR_SYNTAX;
2555: groupConnector[prologState.level] = ',';
2556: break;
2557: case XML_ROLE_GROUP_CHOICE:
1.4 kahan 2558: if (groupConnector[prologState.level] == ',')
1.1 frystyk 2559: return XML_ERROR_SYNTAX;
2560: groupConnector[prologState.level] = '|';
2561: break;
2562: case XML_ROLE_PARAM_ENTITY_REF:
1.4 kahan 2563: #ifdef XML_DTD
2564: case XML_ROLE_INNER_PARAM_ENTITY_REF:
2565: if (paramEntityParsing
2566: && (dtd.complete || role == XML_ROLE_INNER_PARAM_ENTITY_REF)) {
2567: const XML_Char *name;
2568: ENTITY *entity;
2569: name = poolStoreString(&dtd.pool, enc,
2570: s + enc->minBytesPerChar,
2571: next - enc->minBytesPerChar);
2572: if (!name)
2573: return XML_ERROR_NO_MEMORY;
2574: entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
2575: poolDiscard(&dtd.pool);
2576: if (!entity) {
2577: /* FIXME what to do if !dtd.complete? */
2578: return XML_ERROR_UNDEFINED_ENTITY;
2579: }
2580: if (entity->open)
2581: return XML_ERROR_RECURSIVE_ENTITY_REF;
2582: if (entity->textPtr) {
2583: enum XML_Error result;
2584: result = processInternalParamEntity(parser, entity);
2585: if (result != XML_ERROR_NONE)
2586: return result;
2587: break;
2588: }
2589: if (role == XML_ROLE_INNER_PARAM_ENTITY_REF)
2590: return XML_ERROR_PARAM_ENTITY_REF;
2591: if (externalEntityRefHandler) {
2592: dtd.complete = 0;
2593: entity->open = 1;
2594: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
2595: 0,
2596: entity->base,
2597: entity->systemId,
2598: entity->publicId)) {
2599: entity->open = 0;
2600: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
2601: }
2602: entity->open = 0;
2603: if (dtd.complete)
2604: break;
2605: }
2606: }
2607: #endif /* XML_DTD */
1.3 kahan 2608: if (!dtd.standalone
2609: && notStandaloneHandler
2610: && !notStandaloneHandler(handlerArg))
2611: return XML_ERROR_NOT_STANDALONE;
1.1 frystyk 2612: dtd.complete = 0;
1.4 kahan 2613: if (defaultHandler)
2614: reportDefault(parser, enc, s, next);
1.1 frystyk 2615: break;
2616: case XML_ROLE_NONE:
2617: switch (tok) {
2618: case XML_TOK_PI:
1.4 kahan 2619: if (!reportProcessingInstruction(parser, enc, s, next))
1.1 frystyk 2620: return XML_ERROR_NO_MEMORY;
2621: break;
1.3 kahan 2622: case XML_TOK_COMMENT:
1.4 kahan 2623: if (!reportComment(parser, enc, s, next))
1.3 kahan 2624: return XML_ERROR_NO_MEMORY;
2625: break;
1.1 frystyk 2626: }
2627: break;
2628: }
2629: if (defaultHandler) {
2630: switch (tok) {
2631: case XML_TOK_PI:
1.3 kahan 2632: case XML_TOK_COMMENT:
1.1 frystyk 2633: case XML_TOK_BOM:
2634: case XML_TOK_XML_DECL:
1.4 kahan 2635: #ifdef XML_DTD
2636: case XML_TOK_IGNORE_SECT:
2637: #endif /* XML_DTD */
2638: case XML_TOK_PARAM_ENTITY_REF:
1.1 frystyk 2639: break;
2640: default:
1.4 kahan 2641: #ifdef XML_DTD
2642: if (role != XML_ROLE_IGNORE_SECT)
2643: #endif /* XML_DTD */
2644: reportDefault(parser, enc, s, next);
1.1 frystyk 2645: }
2646: }
2647: s = next;
1.4 kahan 2648: tok = XmlPrologTok(enc, s, end, &next);
1.1 frystyk 2649: }
2650: /* not reached */
2651: }
2652:
2653: static
2654: enum XML_Error epilogProcessor(XML_Parser parser,
2655: const char *s,
2656: const char *end,
2657: const char **nextPtr)
2658: {
2659: processor = epilogProcessor;
2660: eventPtr = s;
2661: for (;;) {
2662: const char *next;
2663: int tok = XmlPrologTok(encoding, s, end, &next);
2664: eventEndPtr = next;
2665: switch (tok) {
1.4 kahan 2666: case -XML_TOK_PROLOG_S:
1.1 frystyk 2667: if (defaultHandler) {
2668: eventEndPtr = end;
2669: reportDefault(parser, encoding, s, end);
2670: }
2671: /* fall through */
2672: case XML_TOK_NONE:
2673: if (nextPtr)
2674: *nextPtr = end;
2675: return XML_ERROR_NONE;
2676: case XML_TOK_PROLOG_S:
2677: if (defaultHandler)
2678: reportDefault(parser, encoding, s, next);
2679: break;
2680: case XML_TOK_PI:
2681: if (!reportProcessingInstruction(parser, encoding, s, next))
2682: return XML_ERROR_NO_MEMORY;
2683: break;
1.3 kahan 2684: case XML_TOK_COMMENT:
2685: if (!reportComment(parser, encoding, s, next))
2686: return XML_ERROR_NO_MEMORY;
2687: break;
1.1 frystyk 2688: case XML_TOK_INVALID:
2689: eventPtr = next;
2690: return XML_ERROR_INVALID_TOKEN;
2691: case XML_TOK_PARTIAL:
2692: if (nextPtr) {
2693: *nextPtr = s;
2694: return XML_ERROR_NONE;
2695: }
2696: return XML_ERROR_UNCLOSED_TOKEN;
2697: case XML_TOK_PARTIAL_CHAR:
2698: if (nextPtr) {
2699: *nextPtr = s;
2700: return XML_ERROR_NONE;
2701: }
2702: return XML_ERROR_PARTIAL_CHAR;
2703: default:
2704: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
2705: }
2706: eventPtr = s = next;
2707: }
2708: }
2709:
1.4 kahan 2710: #ifdef XML_DTD
2711:
2712: static enum XML_Error
2713: processInternalParamEntity(XML_Parser parser, ENTITY *entity)
2714: {
2715: const char *s, *end, *next;
2716: int tok;
2717: enum XML_Error result;
2718: OPEN_INTERNAL_ENTITY openEntity;
2719: entity->open = 1;
2720: openEntity.next = openInternalEntities;
2721: openInternalEntities = &openEntity;
2722: openEntity.entity = entity;
2723: openEntity.internalEventPtr = 0;
2724: openEntity.internalEventEndPtr = 0;
2725: s = (char *)entity->textPtr;
2726: end = (char *)(entity->textPtr + entity->textLen);
2727: tok = XmlPrologTok(internalEncoding, s, end, &next);
2728: result = doProlog(parser, internalEncoding, s, end, tok, next, 0);
2729: entity->open = 0;
2730: openInternalEntities = openEntity.next;
2731: return result;
2732: }
2733:
2734: #endif /* XML_DTD */
2735:
1.1 frystyk 2736: static
2737: enum XML_Error errorProcessor(XML_Parser parser,
2738: const char *s,
2739: const char *end,
2740: const char **nextPtr)
2741: {
2742: return errorCode;
2743: }
2744:
2745: static enum XML_Error
2746: storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2747: const char *ptr, const char *end,
2748: STRING_POOL *pool)
2749: {
2750: enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
2751: if (result)
2752: return result;
1.3 kahan 2753: if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
1.1 frystyk 2754: poolChop(pool);
2755: if (!poolAppendChar(pool, XML_T('\0')))
2756: return XML_ERROR_NO_MEMORY;
2757: return XML_ERROR_NONE;
2758: }
2759:
2760: static enum XML_Error
2761: appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2762: const char *ptr, const char *end,
2763: STRING_POOL *pool)
2764: {
2765: for (;;) {
2766: const char *next;
2767: int tok = XmlAttributeValueTok(enc, ptr, end, &next);
2768: switch (tok) {
2769: case XML_TOK_NONE:
2770: return XML_ERROR_NONE;
2771: case XML_TOK_INVALID:
2772: if (enc == encoding)
2773: eventPtr = next;
2774: return XML_ERROR_INVALID_TOKEN;
2775: case XML_TOK_PARTIAL:
2776: if (enc == encoding)
2777: eventPtr = ptr;
2778: return XML_ERROR_INVALID_TOKEN;
2779: case XML_TOK_CHAR_REF:
2780: {
2781: XML_Char buf[XML_ENCODE_MAX];
2782: int i;
2783: int n = XmlCharRefNumber(enc, ptr);
2784: if (n < 0) {
2785: if (enc == encoding)
2786: eventPtr = ptr;
2787: return XML_ERROR_BAD_CHAR_REF;
2788: }
2789: if (!isCdata
2790: && n == 0x20 /* space */
1.3 kahan 2791: && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
1.1 frystyk 2792: break;
2793: n = XmlEncode(n, (ICHAR *)buf);
2794: if (!n) {
2795: if (enc == encoding)
2796: eventPtr = ptr;
2797: return XML_ERROR_BAD_CHAR_REF;
2798: }
2799: for (i = 0; i < n; i++) {
2800: if (!poolAppendChar(pool, buf[i]))
2801: return XML_ERROR_NO_MEMORY;
2802: }
2803: }
2804: break;
2805: case XML_TOK_DATA_CHARS:
2806: if (!poolAppend(pool, enc, ptr, next))
2807: return XML_ERROR_NO_MEMORY;
2808: break;
2809: break;
2810: case XML_TOK_TRAILING_CR:
2811: next = ptr + enc->minBytesPerChar;
2812: /* fall through */
2813: case XML_TOK_ATTRIBUTE_VALUE_S:
2814: case XML_TOK_DATA_NEWLINE:
1.3 kahan 2815: if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
1.1 frystyk 2816: break;
1.3 kahan 2817: if (!poolAppendChar(pool, 0x20))
1.1 frystyk 2818: return XML_ERROR_NO_MEMORY;
2819: break;
2820: case XML_TOK_ENTITY_REF:
2821: {
2822: const XML_Char *name;
2823: ENTITY *entity;
2824: XML_Char ch = XmlPredefinedEntityName(enc,
2825: ptr + enc->minBytesPerChar,
2826: next - enc->minBytesPerChar);
2827: if (ch) {
2828: if (!poolAppendChar(pool, ch))
2829: return XML_ERROR_NO_MEMORY;
2830: break;
2831: }
2832: name = poolStoreString(&temp2Pool, enc,
2833: ptr + enc->minBytesPerChar,
2834: next - enc->minBytesPerChar);
2835: if (!name)
2836: return XML_ERROR_NO_MEMORY;
2837: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
2838: poolDiscard(&temp2Pool);
2839: if (!entity) {
2840: if (dtd.complete) {
2841: if (enc == encoding)
2842: eventPtr = ptr;
2843: return XML_ERROR_UNDEFINED_ENTITY;
2844: }
1.4.2.1.2.2! kahan 2845: #ifdef XML_AMAYA
! 2846: /* Laurent Carcone (lc@w3.org) 25/June/2001
1.4.2.1 kahan 2847: ** For attributes, Expat is skipping the entity names it
2848: ** doesn't know. This patch keeps those names untranslated.
2849: **/
2850: else {
2851: int i;
1.4.2.1.2.2! kahan 2852: if (!poolAppendChar(pool, (unsigned char) (26)))
1.4.2.1 kahan 2853: return XML_ERROR_NO_MEMORY;
2854: for (i = 0; name[i] != XML_T('\0'); i++) {
2855: if (!poolAppendChar(pool,name[i]))
2856: return XML_ERROR_NO_MEMORY;
2857: }
2858: if (!poolAppendChar(pool, ';'))
2859: return XML_ERROR_NO_MEMORY;
2860: }
2861: /* End of patch */
1.4.2.1.2.2! kahan 2862: #endif /* XML_AMAYA */
1.1 frystyk 2863: }
2864: else if (entity->open) {
2865: if (enc == encoding)
2866: eventPtr = ptr;
2867: return XML_ERROR_RECURSIVE_ENTITY_REF;
2868: }
2869: else if (entity->notation) {
2870: if (enc == encoding)
2871: eventPtr = ptr;
2872: return XML_ERROR_BINARY_ENTITY_REF;
2873: }
2874: else if (!entity->textPtr) {
2875: if (enc == encoding)
2876: eventPtr = ptr;
2877: return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
2878: }
2879: else {
2880: enum XML_Error result;
2881: const XML_Char *textEnd = entity->textPtr + entity->textLen;
2882: entity->open = 1;
1.4 kahan 2883: result = appendAttributeValue(parser, internalEncoding, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
1.1 frystyk 2884: entity->open = 0;
2885: if (result)
2886: return result;
2887: }
2888: }
2889: break;
2890: default:
2891: abort();
2892: }
2893: ptr = next;
2894: }
2895: /* not reached */
2896: }
2897:
2898: static
2899: enum XML_Error storeEntityValue(XML_Parser parser,
1.4 kahan 2900: const ENCODING *enc,
1.1 frystyk 2901: const char *entityTextPtr,
2902: const char *entityTextEnd)
2903: {
2904: STRING_POOL *pool = &(dtd.pool);
2905: for (;;) {
2906: const char *next;
1.4 kahan 2907: int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
1.1 frystyk 2908: switch (tok) {
2909: case XML_TOK_PARAM_ENTITY_REF:
1.4 kahan 2910: #ifdef XML_DTD
2911: if (parentParser || enc != encoding) {
2912: enum XML_Error result;
2913: const XML_Char *name;
2914: ENTITY *entity;
2915: name = poolStoreString(&tempPool, enc,
2916: entityTextPtr + enc->minBytesPerChar,
2917: next - enc->minBytesPerChar);
2918: if (!name)
2919: return XML_ERROR_NO_MEMORY;
2920: entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
2921: poolDiscard(&tempPool);
2922: if (!entity) {
2923: if (enc == encoding)
2924: eventPtr = entityTextPtr;
2925: return XML_ERROR_UNDEFINED_ENTITY;
2926: }
2927: if (entity->open) {
2928: if (enc == encoding)
2929: eventPtr = entityTextPtr;
2930: return XML_ERROR_RECURSIVE_ENTITY_REF;
2931: }
2932: if (entity->systemId) {
2933: if (enc == encoding)
2934: eventPtr = entityTextPtr;
2935: return XML_ERROR_PARAM_ENTITY_REF;
2936: }
2937: entity->open = 1;
2938: result = storeEntityValue(parser,
2939: internalEncoding,
2940: (char *)entity->textPtr,
2941: (char *)(entity->textPtr + entity->textLen));
2942: entity->open = 0;
2943: if (result)
2944: return result;
2945: break;
2946: }
2947: #endif /* XML_DTD */
1.1 frystyk 2948: eventPtr = entityTextPtr;
2949: return XML_ERROR_SYNTAX;
2950: case XML_TOK_NONE:
2951: return XML_ERROR_NONE;
2952: case XML_TOK_ENTITY_REF:
2953: case XML_TOK_DATA_CHARS:
1.4 kahan 2954: if (!poolAppend(pool, enc, entityTextPtr, next))
1.1 frystyk 2955: return XML_ERROR_NO_MEMORY;
2956: break;
2957: case XML_TOK_TRAILING_CR:
1.4 kahan 2958: next = entityTextPtr + enc->minBytesPerChar;
1.1 frystyk 2959: /* fall through */
2960: case XML_TOK_DATA_NEWLINE:
2961: if (pool->end == pool->ptr && !poolGrow(pool))
2962: return XML_ERROR_NO_MEMORY;
1.3 kahan 2963: *(pool->ptr)++ = 0xA;
1.1 frystyk 2964: break;
2965: case XML_TOK_CHAR_REF:
2966: {
2967: XML_Char buf[XML_ENCODE_MAX];
2968: int i;
1.4 kahan 2969: int n = XmlCharRefNumber(enc, entityTextPtr);
1.1 frystyk 2970: if (n < 0) {
1.4 kahan 2971: if (enc == encoding)
2972: eventPtr = entityTextPtr;
1.1 frystyk 2973: return XML_ERROR_BAD_CHAR_REF;
2974: }
2975: n = XmlEncode(n, (ICHAR *)buf);
2976: if (!n) {
1.4 kahan 2977: if (enc == encoding)
2978: eventPtr = entityTextPtr;
1.1 frystyk 2979: return XML_ERROR_BAD_CHAR_REF;
2980: }
2981: for (i = 0; i < n; i++) {
2982: if (pool->end == pool->ptr && !poolGrow(pool))
2983: return XML_ERROR_NO_MEMORY;
2984: *(pool->ptr)++ = buf[i];
2985: }
2986: }
2987: break;
2988: case XML_TOK_PARTIAL:
1.4 kahan 2989: if (enc == encoding)
2990: eventPtr = entityTextPtr;
1.1 frystyk 2991: return XML_ERROR_INVALID_TOKEN;
2992: case XML_TOK_INVALID:
1.4 kahan 2993: if (enc == encoding)
2994: eventPtr = next;
1.1 frystyk 2995: return XML_ERROR_INVALID_TOKEN;
2996: default:
2997: abort();
2998: }
2999: entityTextPtr = next;
3000: }
3001: /* not reached */
3002: }
3003:
3004: static void
3005: normalizeLines(XML_Char *s)
3006: {
3007: XML_Char *p;
3008: for (;; s++) {
3009: if (*s == XML_T('\0'))
3010: return;
1.3 kahan 3011: if (*s == 0xD)
1.1 frystyk 3012: break;
3013: }
3014: p = s;
3015: do {
1.3 kahan 3016: if (*s == 0xD) {
3017: *p++ = 0xA;
3018: if (*++s == 0xA)
1.1 frystyk 3019: s++;
3020: }
3021: else
3022: *p++ = *s++;
3023: } while (*s);
3024: *p = XML_T('\0');
3025: }
3026:
3027: static int
3028: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3029: {
3030: const XML_Char *target;
3031: XML_Char *data;
3032: const char *tem;
3033: if (!processingInstructionHandler) {
3034: if (defaultHandler)
3035: reportDefault(parser, enc, start, end);
3036: return 1;
3037: }
3038: start += enc->minBytesPerChar * 2;
3039: tem = start + XmlNameLength(enc, start);
3040: target = poolStoreString(&tempPool, enc, start, tem);
3041: if (!target)
3042: return 0;
3043: poolFinish(&tempPool);
3044: data = poolStoreString(&tempPool, enc,
3045: XmlSkipS(enc, tem),
3046: end - enc->minBytesPerChar*2);
3047: if (!data)
3048: return 0;
3049: normalizeLines(data);
3050: processingInstructionHandler(handlerArg, target, data);
3051: poolClear(&tempPool);
3052: return 1;
3053: }
3054:
1.3 kahan 3055: static int
3056: reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3057: {
3058: XML_Char *data;
3059: if (!commentHandler) {
3060: if (defaultHandler)
3061: reportDefault(parser, enc, start, end);
3062: return 1;
3063: }
3064: data = poolStoreString(&tempPool,
3065: enc,
3066: start + enc->minBytesPerChar * 4,
3067: end - enc->minBytesPerChar * 3);
3068: if (!data)
3069: return 0;
3070: normalizeLines(data);
3071: commentHandler(handlerArg, data);
3072: poolClear(&tempPool);
3073: return 1;
3074: }
3075:
1.1 frystyk 3076: static void
3077: reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
3078: {
3079: if (MUST_CONVERT(enc, s)) {
1.3 kahan 3080: const char **eventPP;
3081: const char **eventEndPP;
3082: if (enc == encoding) {
3083: eventPP = &eventPtr;
3084: eventEndPP = &eventEndPtr;
3085: }
3086: else {
3087: eventPP = &(openInternalEntities->internalEventPtr);
3088: eventEndPP = &(openInternalEntities->internalEventEndPtr);
3089: }
3090: do {
1.1 frystyk 3091: ICHAR *dataPtr = (ICHAR *)dataBuf;
3092: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1.3 kahan 3093: *eventEndPP = s;
3094: defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
3095: *eventPP = s;
3096: } while (s != end);
1.1 frystyk 3097: }
3098: else
3099: defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
3100: }
3101:
3102:
3103: static int
3104: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
3105: {
3106: DEFAULT_ATTRIBUTE *att;
1.4 kahan 3107: if (value) {
3108: /* The handling of default attributes gets messed up if we have
3109: a default which duplicates a non-default. */
3110: int i;
3111: for (i = 0; i < type->nDefaultAtts; i++)
3112: if (attId == type->defaultAtts[i].id)
3113: return 1;
3114: }
1.1 frystyk 3115: if (type->nDefaultAtts == type->allocDefaultAtts) {
3116: if (type->allocDefaultAtts == 0) {
3117: type->allocDefaultAtts = 8;
3118: type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3119: }
3120: else {
3121: type->allocDefaultAtts *= 2;
3122: type->defaultAtts = realloc(type->defaultAtts,
3123: type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3124: }
3125: if (!type->defaultAtts)
3126: return 0;
3127: }
3128: att = type->defaultAtts + type->nDefaultAtts;
3129: att->id = attId;
3130: att->value = value;
3131: att->isCdata = isCdata;
3132: if (!isCdata)
3133: attId->maybeTokenized = 1;
3134: type->nDefaultAtts += 1;
3135: return 1;
3136: }
3137:
1.3 kahan 3138: static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
3139: {
3140: const XML_Char *name;
3141: for (name = elementType->name; *name; name++) {
3142: if (*name == XML_T(':')) {
3143: PREFIX *prefix;
3144: const XML_Char *s;
3145: for (s = elementType->name; s != name; s++) {
3146: if (!poolAppendChar(&dtd.pool, *s))
3147: return 0;
3148: }
3149: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3150: return 0;
3151: prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3152: if (!prefix)
3153: return 0;
3154: if (prefix->name == poolStart(&dtd.pool))
3155: poolFinish(&dtd.pool);
3156: else
3157: poolDiscard(&dtd.pool);
3158: elementType->prefix = prefix;
3159:
3160: }
3161: }
3162: return 1;
3163: }
3164:
1.1 frystyk 3165: static ATTRIBUTE_ID *
3166: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3167: {
3168: ATTRIBUTE_ID *id;
3169: const XML_Char *name;
3170: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3171: return 0;
3172: name = poolStoreString(&dtd.pool, enc, start, end);
3173: if (!name)
3174: return 0;
3175: ++name;
3176: id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
3177: if (!id)
3178: return 0;
3179: if (id->name != name)
3180: poolDiscard(&dtd.pool);
1.3 kahan 3181: else {
1.1 frystyk 3182: poolFinish(&dtd.pool);
1.3 kahan 3183: if (!ns)
3184: ;
3185: else if (name[0] == 'x'
3186: && name[1] == 'm'
3187: && name[2] == 'l'
3188: && name[3] == 'n'
3189: && name[4] == 's'
3190: && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) {
3191: if (name[5] == '\0')
3192: id->prefix = &dtd.defaultPrefix;
3193: else
3194: id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX));
3195: id->xmlns = 1;
3196: }
3197: else {
3198: int i;
3199: for (i = 0; name[i]; i++) {
3200: if (name[i] == XML_T(':')) {
3201: int j;
3202: for (j = 0; j < i; j++) {
3203: if (!poolAppendChar(&dtd.pool, name[j]))
3204: return 0;
3205: }
3206: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3207: return 0;
3208: id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3209: if (id->prefix->name == poolStart(&dtd.pool))
3210: poolFinish(&dtd.pool);
3211: else
3212: poolDiscard(&dtd.pool);
3213: break;
3214: }
3215: }
3216: }
3217: }
1.1 frystyk 3218: return id;
3219: }
3220:
1.3 kahan 3221: #define CONTEXT_SEP XML_T('\f')
3222:
1.1 frystyk 3223: static
1.3 kahan 3224: const XML_Char *getContext(XML_Parser parser)
1.1 frystyk 3225: {
3226: HASH_TABLE_ITER iter;
1.3 kahan 3227: int needSep = 0;
3228:
3229: if (dtd.defaultPrefix.binding) {
3230: int i;
3231: int len;
3232: if (!poolAppendChar(&tempPool, XML_T('=')))
3233: return 0;
3234: len = dtd.defaultPrefix.binding->uriLen;
3235: if (namespaceSeparator != XML_T('\0'))
3236: len--;
3237: for (i = 0; i < len; i++)
3238: if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i]))
3239: return 0;
3240: needSep = 1;
3241: }
3242:
3243: hashTableIterInit(&iter, &(dtd.prefixes));
3244: for (;;) {
3245: int i;
3246: int len;
3247: const XML_Char *s;
3248: PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
3249: if (!prefix)
3250: break;
3251: if (!prefix->binding)
3252: continue;
3253: if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
3254: return 0;
3255: for (s = prefix->name; *s; s++)
3256: if (!poolAppendChar(&tempPool, *s))
3257: return 0;
3258: if (!poolAppendChar(&tempPool, XML_T('=')))
3259: return 0;
3260: len = prefix->binding->uriLen;
3261: if (namespaceSeparator != XML_T('\0'))
3262: len--;
3263: for (i = 0; i < len; i++)
3264: if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
3265: return 0;
3266: needSep = 1;
3267: }
3268:
1.1 frystyk 3269:
3270: hashTableIterInit(&iter, &(dtd.generalEntities));
3271: for (;;) {
3272: const XML_Char *s;
3273: ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
3274: if (!e)
3275: break;
3276: if (!e->open)
3277: continue;
1.3 kahan 3278: if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
1.1 frystyk 3279: return 0;
3280: for (s = e->name; *s; s++)
3281: if (!poolAppendChar(&tempPool, *s))
3282: return 0;
1.3 kahan 3283: needSep = 1;
1.1 frystyk 3284: }
3285:
3286: if (!poolAppendChar(&tempPool, XML_T('\0')))
3287: return 0;
3288: return tempPool.start;
3289: }
3290:
3291: static
1.3 kahan 3292: int setContext(XML_Parser parser, const XML_Char *context)
1.1 frystyk 3293: {
1.3 kahan 3294: const XML_Char *s = context;
3295:
3296: while (*context != XML_T('\0')) {
3297: if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
1.1 frystyk 3298: ENTITY *e;
3299: if (!poolAppendChar(&tempPool, XML_T('\0')))
3300: return 0;
3301: e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
3302: if (e)
3303: e->open = 1;
1.3 kahan 3304: if (*s != XML_T('\0'))
1.1 frystyk 3305: s++;
1.3 kahan 3306: context = s;
3307: poolDiscard(&tempPool);
3308: }
3309: else if (*s == '=') {
3310: PREFIX *prefix;
3311: if (poolLength(&tempPool) == 0)
3312: prefix = &dtd.defaultPrefix;
3313: else {
3314: if (!poolAppendChar(&tempPool, XML_T('\0')))
3315: return 0;
3316: prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX));
3317: if (!prefix)
3318: return 0;
3319: if (prefix->name == poolStart(&tempPool))
3320: poolFinish(&tempPool);
3321: else
3322: poolDiscard(&tempPool);
3323: }
3324: for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++)
3325: if (!poolAppendChar(&tempPool, *context))
3326: return 0;
3327: if (!poolAppendChar(&tempPool, XML_T('\0')))
3328: return 0;
3329: if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings))
3330: return 0;
1.1 frystyk 3331: poolDiscard(&tempPool);
1.3 kahan 3332: if (*context != XML_T('\0'))
3333: ++context;
3334: s = context;
1.1 frystyk 3335: }
3336: else {
3337: if (!poolAppendChar(&tempPool, *s))
3338: return 0;
3339: s++;
3340: }
3341: }
3342: return 1;
3343: }
3344:
3345:
3346: static
3347: void normalizePublicId(XML_Char *publicId)
3348: {
3349: XML_Char *p = publicId;
3350: XML_Char *s;
3351: for (s = publicId; *s; s++) {
3352: switch (*s) {
1.3 kahan 3353: case 0x20:
3354: case 0xD:
3355: case 0xA:
3356: if (p != publicId && p[-1] != 0x20)
3357: *p++ = 0x20;
1.1 frystyk 3358: break;
3359: default:
3360: *p++ = *s;
3361: }
3362: }
1.3 kahan 3363: if (p != publicId && p[-1] == 0x20)
1.1 frystyk 3364: --p;
3365: *p = XML_T('\0');
3366: }
3367:
3368: static int dtdInit(DTD *p)
3369: {
3370: poolInit(&(p->pool));
3371: hashTableInit(&(p->generalEntities));
3372: hashTableInit(&(p->elementTypes));
3373: hashTableInit(&(p->attributeIds));
1.3 kahan 3374: hashTableInit(&(p->prefixes));
1.1 frystyk 3375: p->complete = 1;
3376: p->standalone = 0;
1.4 kahan 3377: #ifdef XML_DTD
3378: hashTableInit(&(p->paramEntities));
3379: #endif /* XML_DTD */
1.3 kahan 3380: p->defaultPrefix.name = 0;
3381: p->defaultPrefix.binding = 0;
1.1 frystyk 3382: return 1;
3383: }
3384:
1.4 kahan 3385: #ifdef XML_DTD
3386:
3387: static void dtdSwap(DTD *p1, DTD *p2)
3388: {
3389: DTD tem;
3390: memcpy(&tem, p1, sizeof(DTD));
3391: memcpy(p1, p2, sizeof(DTD));
3392: memcpy(p2, &tem, sizeof(DTD));
3393: }
3394:
3395: #endif /* XML_DTD */
3396:
1.1 frystyk 3397: static void dtdDestroy(DTD *p)
3398: {
3399: HASH_TABLE_ITER iter;
3400: hashTableIterInit(&iter, &(p->elementTypes));
3401: for (;;) {
3402: ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3403: if (!e)
3404: break;
3405: if (e->allocDefaultAtts != 0)
3406: free(e->defaultAtts);
3407: }
3408: hashTableDestroy(&(p->generalEntities));
1.4 kahan 3409: #ifdef XML_DTD
3410: hashTableDestroy(&(p->paramEntities));
3411: #endif /* XML_DTD */
1.1 frystyk 3412: hashTableDestroy(&(p->elementTypes));
3413: hashTableDestroy(&(p->attributeIds));
1.3 kahan 3414: hashTableDestroy(&(p->prefixes));
1.1 frystyk 3415: poolDestroy(&(p->pool));
3416: }
3417:
3418: /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
3419: The new DTD has already been initialized. */
3420:
3421: static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
3422: {
3423: HASH_TABLE_ITER iter;
3424:
1.3 kahan 3425: /* Copy the prefix table. */
3426:
3427: hashTableIterInit(&iter, &(oldDtd->prefixes));
3428: for (;;) {
3429: const XML_Char *name;
3430: const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
3431: if (!oldP)
3432: break;
3433: name = poolCopyString(&(newDtd->pool), oldP->name);
3434: if (!name)
3435: return 0;
3436: if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX)))
3437: return 0;
3438: }
3439:
1.1 frystyk 3440: hashTableIterInit(&iter, &(oldDtd->attributeIds));
3441:
3442: /* Copy the attribute id table. */
3443:
3444: for (;;) {
3445: ATTRIBUTE_ID *newA;
3446: const XML_Char *name;
3447: const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
3448:
3449: if (!oldA)
3450: break;
3451: /* Remember to allocate the scratch byte before the name. */
3452: if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
3453: return 0;
3454: name = poolCopyString(&(newDtd->pool), oldA->name);
3455: if (!name)
3456: return 0;
3457: ++name;
3458: newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
3459: if (!newA)
3460: return 0;
3461: newA->maybeTokenized = oldA->maybeTokenized;
1.3 kahan 3462: if (oldA->prefix) {
3463: newA->xmlns = oldA->xmlns;
3464: if (oldA->prefix == &oldDtd->defaultPrefix)
3465: newA->prefix = &newDtd->defaultPrefix;
3466: else
3467: newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0);
3468: }
1.1 frystyk 3469: }
3470:
3471: /* Copy the element type table. */
3472:
3473: hashTableIterInit(&iter, &(oldDtd->elementTypes));
3474:
3475: for (;;) {
3476: int i;
3477: ELEMENT_TYPE *newE;
3478: const XML_Char *name;
3479: const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3480: if (!oldE)
3481: break;
3482: name = poolCopyString(&(newDtd->pool), oldE->name);
3483: if (!name)
3484: return 0;
3485: newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
3486: if (!newE)
3487: return 0;
1.3 kahan 3488: if (oldE->nDefaultAtts) {
3489: newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
3490: if (!newE->defaultAtts)
3491: return 0;
3492: }
1.1 frystyk 3493: newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
1.3 kahan 3494: if (oldE->prefix)
3495: newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0);
1.1 frystyk 3496: for (i = 0; i < newE->nDefaultAtts; i++) {
3497: newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
3498: newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
3499: if (oldE->defaultAtts[i].value) {
3500: newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
3501: if (!newE->defaultAtts[i].value)
3502: return 0;
3503: }
3504: else
3505: newE->defaultAtts[i].value = 0;
3506: }
3507: }
3508:
1.4 kahan 3509: /* Copy the entity tables. */
3510: if (!copyEntityTable(&(newDtd->generalEntities),
3511: &(newDtd->pool),
3512: &(oldDtd->generalEntities)))
3513: return 0;
1.1 frystyk 3514:
1.4 kahan 3515: #ifdef XML_DTD
3516: if (!copyEntityTable(&(newDtd->paramEntities),
3517: &(newDtd->pool),
3518: &(oldDtd->paramEntities)))
3519: return 0;
3520: #endif /* XML_DTD */
3521:
3522: newDtd->complete = oldDtd->complete;
3523: newDtd->standalone = oldDtd->standalone;
3524: return 1;
3525: }
3526:
3527: static int copyEntityTable(HASH_TABLE *newTable,
3528: STRING_POOL *newPool,
3529: const HASH_TABLE *oldTable)
3530: {
3531: HASH_TABLE_ITER iter;
3532: const XML_Char *cachedOldBase = 0;
3533: const XML_Char *cachedNewBase = 0;
3534:
3535: hashTableIterInit(&iter, oldTable);
1.1 frystyk 3536:
3537: for (;;) {
3538: ENTITY *newE;
3539: const XML_Char *name;
3540: const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
3541: if (!oldE)
3542: break;
1.4 kahan 3543: name = poolCopyString(newPool, oldE->name);
1.1 frystyk 3544: if (!name)
3545: return 0;
1.4 kahan 3546: newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY));
1.1 frystyk 3547: if (!newE)
3548: return 0;
3549: if (oldE->systemId) {
1.4 kahan 3550: const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
1.1 frystyk 3551: if (!tem)
3552: return 0;
3553: newE->systemId = tem;
3554: if (oldE->base) {
1.4 kahan 3555: if (oldE->base == cachedOldBase)
3556: newE->base = cachedNewBase;
3557: else {
3558: cachedOldBase = oldE->base;
3559: tem = poolCopyString(newPool, cachedOldBase);
3560: if (!tem)
3561: return 0;
3562: cachedNewBase = newE->base = tem;
3563: }
1.1 frystyk 3564: }
3565: }
3566: else {
1.4 kahan 3567: const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
1.1 frystyk 3568: if (!tem)
3569: return 0;
3570: newE->textPtr = tem;
3571: newE->textLen = oldE->textLen;
3572: }
3573: if (oldE->notation) {
1.4 kahan 3574: const XML_Char *tem = poolCopyString(newPool, oldE->notation);
1.1 frystyk 3575: if (!tem)
3576: return 0;
3577: newE->notation = tem;
3578: }
3579: }
3580: return 1;
3581: }
3582:
3583: static
3584: void poolInit(STRING_POOL *pool)
3585: {
3586: pool->blocks = 0;
3587: pool->freeBlocks = 0;
3588: pool->start = 0;
3589: pool->ptr = 0;
3590: pool->end = 0;
3591: }
3592:
3593: static
3594: void poolClear(STRING_POOL *pool)
3595: {
3596: if (!pool->freeBlocks)
3597: pool->freeBlocks = pool->blocks;
3598: else {
3599: BLOCK *p = pool->blocks;
3600: while (p) {
3601: BLOCK *tem = p->next;
3602: p->next = pool->freeBlocks;
3603: pool->freeBlocks = p;
3604: p = tem;
3605: }
3606: }
3607: pool->blocks = 0;
3608: pool->start = 0;
3609: pool->ptr = 0;
3610: pool->end = 0;
3611: }
3612:
3613: static
3614: void poolDestroy(STRING_POOL *pool)
3615: {
3616: BLOCK *p = pool->blocks;
3617: while (p) {
3618: BLOCK *tem = p->next;
3619: free(p);
3620: p = tem;
3621: }
3622: pool->blocks = 0;
3623: p = pool->freeBlocks;
3624: while (p) {
3625: BLOCK *tem = p->next;
3626: free(p);
3627: p = tem;
3628: }
3629: pool->freeBlocks = 0;
3630: pool->ptr = 0;
3631: pool->start = 0;
3632: pool->end = 0;
3633: }
3634:
3635: static
3636: XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
3637: const char *ptr, const char *end)
3638: {
3639: if (!pool->ptr && !poolGrow(pool))
3640: return 0;
3641: for (;;) {
3642: XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
3643: if (ptr == end)
3644: break;
3645: if (!poolGrow(pool))
3646: return 0;
3647: }
3648: return pool->start;
3649: }
3650:
3651: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
3652: {
3653: do {
3654: if (!poolAppendChar(pool, *s))
3655: return 0;
3656: } while (*s++);
3657: s = pool->start;
3658: poolFinish(pool);
3659: return s;
3660: }
3661:
3662: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
3663: {
3664: if (!pool->ptr && !poolGrow(pool))
3665: return 0;
3666: for (; n > 0; --n, s++) {
3667: if (!poolAppendChar(pool, *s))
3668: return 0;
3669:
3670: }
3671: s = pool->start;
3672: poolFinish(pool);
3673: return s;
3674: }
3675:
3676: static
3677: XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
3678: const char *ptr, const char *end)
3679: {
3680: if (!poolAppend(pool, enc, ptr, end))
3681: return 0;
3682: if (pool->ptr == pool->end && !poolGrow(pool))
3683: return 0;
3684: *(pool->ptr)++ = 0;
3685: return pool->start;
3686: }
3687:
3688: static
3689: int poolGrow(STRING_POOL *pool)
3690: {
3691: if (pool->freeBlocks) {
3692: if (pool->start == 0) {
3693: pool->blocks = pool->freeBlocks;
3694: pool->freeBlocks = pool->freeBlocks->next;
3695: pool->blocks->next = 0;
3696: pool->start = pool->blocks->s;
3697: pool->end = pool->start + pool->blocks->size;
3698: pool->ptr = pool->start;
3699: return 1;
3700: }
3701: if (pool->end - pool->start < pool->freeBlocks->size) {
3702: BLOCK *tem = pool->freeBlocks->next;
3703: pool->freeBlocks->next = pool->blocks;
3704: pool->blocks = pool->freeBlocks;
3705: pool->freeBlocks = tem;
3706: memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
3707: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3708: pool->start = pool->blocks->s;
3709: pool->end = pool->start + pool->blocks->size;
3710: return 1;
3711: }
3712: }
3713: if (pool->blocks && pool->start == pool->blocks->s) {
3714: int blockSize = (pool->end - pool->start)*2;
3715: pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3716: if (!pool->blocks)
3717: return 0;
3718: pool->blocks->size = blockSize;
3719: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3720: pool->start = pool->blocks->s;
3721: pool->end = pool->start + blockSize;
3722: }
3723: else {
3724: BLOCK *tem;
3725: int blockSize = pool->end - pool->start;
3726: if (blockSize < INIT_BLOCK_SIZE)
3727: blockSize = INIT_BLOCK_SIZE;
3728: else
3729: blockSize *= 2;
3730: tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3731: if (!tem)
3732: return 0;
3733: tem->size = blockSize;
3734: tem->next = pool->blocks;
3735: pool->blocks = tem;
3736: memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
3737: pool->ptr = tem->s + (pool->ptr - pool->start);
3738: pool->start = tem->s;
3739: pool->end = tem->s + blockSize;
3740: }
3741: return 1;
3742: }
Webmaster