Annotation of libwww/modules/expat/xmlparse/xmlparse.c, revision 1.4
1.1 frystyk 1: /*
2: The contents of this file are subject to the Mozilla Public License
1.3 kahan 3: Version 1.1 (the "License"); you may not use this file except in
1.1 frystyk 4: compliance with the License. You may obtain a copy of the License at
5: http://www.mozilla.org/MPL/
6:
7: Software distributed under the License is distributed on an "AS IS"
8: basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9: License for the specific language governing rights and limitations
10: under the License.
11:
12: The Original Code is expat.
13:
14: The Initial Developer of the Original Code is James Clark.
1.3 kahan 15: Portions created by James Clark are Copyright (C) 1998, 1999
1.1 frystyk 16: James Clark. All Rights Reserved.
17:
18: Contributor(s):
1.3 kahan 19:
20: Alternatively, the contents of this file may be used under the terms
21: of the GNU General Public License (the "GPL"), in which case the
22: provisions of the GPL are applicable instead of those above. If you
23: wish to allow use of your version of this file only under the terms of
24: the GPL and not to allow others to use your version of this file under
25: the MPL, indicate your decision by deleting the provisions above and
26: replace them with the notice and other provisions required by the
27: GPL. If you do not delete the provisions above, a recipient may use
28: your version of this file under either the MPL or the GPL.
1.1 frystyk 29: */
30:
31: #include "xmldef.h"
1.3 kahan 32: #include "xmlparse.h"
1.1 frystyk 33:
34: #ifdef XML_UNICODE
35: #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
36: #define XmlConvert XmlUtf16Convert
37: #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
1.3 kahan 38: #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
1.1 frystyk 39: #define XmlEncode XmlUtf16Encode
40: #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
41: typedef unsigned short ICHAR;
42: #else
43: #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
44: #define XmlConvert XmlUtf8Convert
45: #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
1.3 kahan 46: #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
1.1 frystyk 47: #define XmlEncode XmlUtf8Encode
48: #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
49: typedef char ICHAR;
50: #endif
51:
1.3 kahan 52:
53: #ifndef XML_NS
54:
55: #define XmlInitEncodingNS XmlInitEncoding
56: #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
57: #undef XmlGetInternalEncodingNS
58: #define XmlGetInternalEncodingNS XmlGetInternalEncoding
59: #define XmlParseXmlDeclNS XmlParseXmlDecl
60:
61: #endif
62:
1.1 frystyk 63: #ifdef XML_UNICODE_WCHAR_T
64: #define XML_T(x) L ## x
65: #else
66: #define XML_T(x) x
67: #endif
68:
69: /* Round up n to be a multiple of sz, where sz is a power of 2. */
70: #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
71:
72: #include "xmltok.h"
73: #include "xmlrole.h"
74: #include "hashtable.h"
75:
76: #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
77: #define INIT_DATA_BUF_SIZE 1024
78: #define INIT_ATTS_SIZE 16
79: #define INIT_BLOCK_SIZE 1024
80: #define INIT_BUFFER_SIZE 1024
81:
1.3 kahan 82: #define EXPAND_SPARE 24
83:
84: typedef struct binding {
85: struct prefix *prefix;
86: struct binding *nextTagBinding;
87: struct binding *prevPrefixBinding;
88: const struct attribute_id *attId;
89: XML_Char *uri;
90: int uriLen;
91: int uriAlloc;
92: } BINDING;
93:
94: typedef struct prefix {
95: const XML_Char *name;
96: BINDING *binding;
97: } PREFIX;
98:
99: typedef struct {
100: const XML_Char *str;
101: const XML_Char *localPart;
102: int uriLen;
103: } TAG_NAME;
104:
1.1 frystyk 105: typedef struct tag {
106: struct tag *parent;
107: const char *rawName;
108: int rawNameLength;
1.3 kahan 109: TAG_NAME name;
1.1 frystyk 110: char *buf;
111: char *bufEnd;
1.3 kahan 112: BINDING *bindings;
1.1 frystyk 113: } TAG;
114:
115: typedef struct {
116: const XML_Char *name;
117: const XML_Char *textPtr;
118: int textLen;
119: const XML_Char *systemId;
120: const XML_Char *base;
121: const XML_Char *publicId;
122: const XML_Char *notation;
123: char open;
124: } ENTITY;
125:
126: typedef struct block {
127: struct block *next;
128: int size;
129: XML_Char s[1];
130: } BLOCK;
131:
132: typedef struct {
133: BLOCK *blocks;
134: BLOCK *freeBlocks;
135: const XML_Char *end;
136: XML_Char *ptr;
137: XML_Char *start;
138: } STRING_POOL;
139:
140: /* The XML_Char before the name is used to determine whether
141: an attribute has been specified. */
1.3 kahan 142: typedef struct attribute_id {
1.1 frystyk 143: XML_Char *name;
1.3 kahan 144: PREFIX *prefix;
1.1 frystyk 145: char maybeTokenized;
1.3 kahan 146: char xmlns;
1.1 frystyk 147: } ATTRIBUTE_ID;
148:
149: typedef struct {
150: const ATTRIBUTE_ID *id;
151: char isCdata;
152: const XML_Char *value;
153: } DEFAULT_ATTRIBUTE;
154:
155: typedef struct {
156: const XML_Char *name;
1.3 kahan 157: PREFIX *prefix;
1.1 frystyk 158: int nDefaultAtts;
159: int allocDefaultAtts;
160: DEFAULT_ATTRIBUTE *defaultAtts;
161: } ELEMENT_TYPE;
162:
163: typedef struct {
164: HASH_TABLE generalEntities;
165: HASH_TABLE elementTypes;
166: HASH_TABLE attributeIds;
1.3 kahan 167: HASH_TABLE prefixes;
1.1 frystyk 168: STRING_POOL pool;
169: int complete;
170: int standalone;
1.4 ! kahan 171: #ifdef XML_DTD
! 172: HASH_TABLE paramEntities;
! 173: #endif /* XML_DTD */
1.3 kahan 174: PREFIX defaultPrefix;
1.1 frystyk 175: } DTD;
176:
1.3 kahan 177: typedef struct open_internal_entity {
178: const char *internalEventPtr;
179: const char *internalEventEndPtr;
180: struct open_internal_entity *next;
181: ENTITY *entity;
182: } OPEN_INTERNAL_ENTITY;
183:
1.1 frystyk 184: typedef enum XML_Error Processor(XML_Parser parser,
185: const char *start,
186: const char *end,
187: const char **endPtr);
188:
189: static Processor prologProcessor;
190: static Processor prologInitProcessor;
191: static Processor contentProcessor;
192: static Processor cdataSectionProcessor;
1.4 ! kahan 193: #ifdef XML_DTD
! 194: static Processor ignoreSectionProcessor;
! 195: #endif /* XML_DTD */
1.1 frystyk 196: static Processor epilogProcessor;
197: static Processor errorProcessor;
198: static Processor externalEntityInitProcessor;
199: static Processor externalEntityInitProcessor2;
200: static Processor externalEntityInitProcessor3;
201: static Processor externalEntityContentProcessor;
202:
203: static enum XML_Error
204: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
205: static enum XML_Error
206: processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
207: static enum XML_Error
208: initializeEncoding(XML_Parser parser);
209: static enum XML_Error
1.4 ! kahan 210: doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
! 211: const char *end, int tok, const char *next, const char **nextPtr);
! 212: static enum XML_Error
! 213: processInternalParamEntity(XML_Parser parser, ENTITY *entity);
! 214: static enum XML_Error
1.1 frystyk 215: doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
216: const char *start, const char *end, const char **endPtr);
217: static enum XML_Error
218: doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
1.4 ! kahan 219: #ifdef XML_DTD
! 220: static enum XML_Error
! 221: doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
! 222: #endif /* XML_DTD */
1.3 kahan 223: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s,
224: TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
225: static
226: int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr);
1.1 frystyk 227: static int
228: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
229: static enum XML_Error
230: storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
231: STRING_POOL *);
232: static enum XML_Error
233: appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
234: STRING_POOL *);
235: static ATTRIBUTE_ID *
236: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.3 kahan 237: static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
1.1 frystyk 238: static enum XML_Error
1.4 ! kahan 239: storeEntityValue(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.1 frystyk 240: static int
241: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.3 kahan 242: static int
243: reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.1 frystyk 244: static void
245: reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
246:
1.3 kahan 247: static const XML_Char *getContext(XML_Parser parser);
248: static int setContext(XML_Parser parser, const XML_Char *context);
1.1 frystyk 249: static void normalizePublicId(XML_Char *s);
250: static int dtdInit(DTD *);
251: static void dtdDestroy(DTD *);
252: static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
1.4 ! kahan 253: static int copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *);
! 254: #ifdef XML_DTD
! 255: static void dtdSwap(DTD *, DTD *);
! 256: #endif /* XML_DTD */
1.1 frystyk 257: static void poolInit(STRING_POOL *);
258: static void poolClear(STRING_POOL *);
259: static void poolDestroy(STRING_POOL *);
260: static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
261: const char *ptr, const char *end);
262: static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
263: const char *ptr, const char *end);
264: static int poolGrow(STRING_POOL *pool);
265: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
266: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
267:
268: #define poolStart(pool) ((pool)->start)
269: #define poolEnd(pool) ((pool)->ptr)
270: #define poolLength(pool) ((pool)->ptr - (pool)->start)
271: #define poolChop(pool) ((void)--(pool->ptr))
272: #define poolLastChar(pool) (((pool)->ptr)[-1])
273: #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
274: #define poolFinish(pool) ((pool)->start = (pool)->ptr)
275: #define poolAppendChar(pool, c) \
276: (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
277: ? 0 \
278: : ((*((pool)->ptr)++ = c), 1))
279:
280: typedef struct {
281: /* The first member must be userData so that the XML_GetUserData macro works. */
1.3 kahan 282: void *m_userData;
283: void *m_handlerArg;
284: char *m_buffer;
1.1 frystyk 285: /* first character to be parsed */
1.3 kahan 286: const char *m_bufferPtr;
1.1 frystyk 287: /* past last character to be parsed */
1.3 kahan 288: char *m_bufferEnd;
1.1 frystyk 289: /* allocated end of buffer */
1.3 kahan 290: const char *m_bufferLim;
291: long m_parseEndByteIndex;
292: const char *m_parseEndPtr;
293: XML_Char *m_dataBuf;
294: XML_Char *m_dataBufEnd;
295: XML_StartElementHandler m_startElementHandler;
296: XML_EndElementHandler m_endElementHandler;
297: XML_CharacterDataHandler m_characterDataHandler;
298: XML_ProcessingInstructionHandler m_processingInstructionHandler;
299: XML_CommentHandler m_commentHandler;
300: XML_StartCdataSectionHandler m_startCdataSectionHandler;
301: XML_EndCdataSectionHandler m_endCdataSectionHandler;
302: XML_DefaultHandler m_defaultHandler;
1.4 ! kahan 303: XML_StartDoctypeDeclHandler m_startDoctypeDeclHandler;
! 304: XML_EndDoctypeDeclHandler m_endDoctypeDeclHandler;
1.3 kahan 305: XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
306: XML_NotationDeclHandler m_notationDeclHandler;
307: XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
308: XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
309: XML_NotStandaloneHandler m_notStandaloneHandler;
310: XML_ExternalEntityRefHandler m_externalEntityRefHandler;
311: void *m_externalEntityRefHandlerArg;
312: XML_UnknownEncodingHandler m_unknownEncodingHandler;
313: const ENCODING *m_encoding;
314: INIT_ENCODING m_initEncoding;
1.4 ! kahan 315: const ENCODING *m_internalEncoding;
1.3 kahan 316: const XML_Char *m_protocolEncodingName;
317: int m_ns;
318: void *m_unknownEncodingMem;
319: void *m_unknownEncodingData;
320: void *m_unknownEncodingHandlerData;
321: void (*m_unknownEncodingRelease)(void *);
322: PROLOG_STATE m_prologState;
323: Processor *m_processor;
324: enum XML_Error m_errorCode;
325: const char *m_eventPtr;
326: const char *m_eventEndPtr;
327: const char *m_positionPtr;
328: OPEN_INTERNAL_ENTITY *m_openInternalEntities;
329: int m_defaultExpandInternalEntities;
330: int m_tagLevel;
331: ENTITY *m_declEntity;
332: const XML_Char *m_declNotationName;
333: const XML_Char *m_declNotationPublicId;
334: ELEMENT_TYPE *m_declElementType;
335: ATTRIBUTE_ID *m_declAttributeId;
336: char m_declAttributeIsCdata;
337: DTD m_dtd;
1.4 ! kahan 338: const XML_Char *m_curBase;
1.3 kahan 339: TAG *m_tagStack;
340: TAG *m_freeTagList;
341: BINDING *m_inheritedBindings;
342: BINDING *m_freeBindingList;
343: int m_attsSize;
344: int m_nSpecifiedAtts;
345: ATTRIBUTE *m_atts;
346: POSITION m_position;
347: STRING_POOL m_tempPool;
348: STRING_POOL m_temp2Pool;
349: char *m_groupConnector;
350: unsigned m_groupSize;
351: int m_hadExternalDoctype;
352: XML_Char m_namespaceSeparator;
1.4 ! kahan 353: #ifdef XML_DTD
! 354: enum XML_ParamEntityParsing m_paramEntityParsing;
! 355: XML_Parser m_parentParser;
! 356: #endif
1.1 frystyk 357: } Parser;
358:
1.3 kahan 359: #define userData (((Parser *)parser)->m_userData)
360: #define handlerArg (((Parser *)parser)->m_handlerArg)
361: #define startElementHandler (((Parser *)parser)->m_startElementHandler)
362: #define endElementHandler (((Parser *)parser)->m_endElementHandler)
363: #define characterDataHandler (((Parser *)parser)->m_characterDataHandler)
364: #define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler)
365: #define commentHandler (((Parser *)parser)->m_commentHandler)
366: #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler)
367: #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler)
368: #define defaultHandler (((Parser *)parser)->m_defaultHandler)
1.4 ! kahan 369: #define startDoctypeDeclHandler (((Parser *)parser)->m_startDoctypeDeclHandler)
! 370: #define endDoctypeDeclHandler (((Parser *)parser)->m_endDoctypeDeclHandler)
1.3 kahan 371: #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler)
372: #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler)
373: #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler)
374: #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler)
375: #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler)
376: #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler)
377: #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg)
378: #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler)
379: #define encoding (((Parser *)parser)->m_encoding)
380: #define initEncoding (((Parser *)parser)->m_initEncoding)
1.4 ! kahan 381: #define internalEncoding (((Parser *)parser)->m_internalEncoding)
1.3 kahan 382: #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem)
383: #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData)
1.1 frystyk 384: #define unknownEncodingHandlerData \
1.3 kahan 385: (((Parser *)parser)->m_unknownEncodingHandlerData)
386: #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease)
387: #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName)
388: #define ns (((Parser *)parser)->m_ns)
389: #define prologState (((Parser *)parser)->m_prologState)
390: #define processor (((Parser *)parser)->m_processor)
391: #define errorCode (((Parser *)parser)->m_errorCode)
392: #define eventPtr (((Parser *)parser)->m_eventPtr)
393: #define eventEndPtr (((Parser *)parser)->m_eventEndPtr)
394: #define positionPtr (((Parser *)parser)->m_positionPtr)
395: #define position (((Parser *)parser)->m_position)
396: #define openInternalEntities (((Parser *)parser)->m_openInternalEntities)
397: #define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities)
398: #define tagLevel (((Parser *)parser)->m_tagLevel)
399: #define buffer (((Parser *)parser)->m_buffer)
400: #define bufferPtr (((Parser *)parser)->m_bufferPtr)
401: #define bufferEnd (((Parser *)parser)->m_bufferEnd)
402: #define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex)
403: #define parseEndPtr (((Parser *)parser)->m_parseEndPtr)
404: #define bufferLim (((Parser *)parser)->m_bufferLim)
405: #define dataBuf (((Parser *)parser)->m_dataBuf)
406: #define dataBufEnd (((Parser *)parser)->m_dataBufEnd)
407: #define dtd (((Parser *)parser)->m_dtd)
1.4 ! kahan 408: #define curBase (((Parser *)parser)->m_curBase)
1.3 kahan 409: #define declEntity (((Parser *)parser)->m_declEntity)
410: #define declNotationName (((Parser *)parser)->m_declNotationName)
411: #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId)
412: #define declElementType (((Parser *)parser)->m_declElementType)
413: #define declAttributeId (((Parser *)parser)->m_declAttributeId)
414: #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata)
415: #define freeTagList (((Parser *)parser)->m_freeTagList)
416: #define freeBindingList (((Parser *)parser)->m_freeBindingList)
417: #define inheritedBindings (((Parser *)parser)->m_inheritedBindings)
418: #define tagStack (((Parser *)parser)->m_tagStack)
419: #define atts (((Parser *)parser)->m_atts)
420: #define attsSize (((Parser *)parser)->m_attsSize)
421: #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts)
422: #define tempPool (((Parser *)parser)->m_tempPool)
423: #define temp2Pool (((Parser *)parser)->m_temp2Pool)
424: #define groupConnector (((Parser *)parser)->m_groupConnector)
425: #define groupSize (((Parser *)parser)->m_groupSize)
426: #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype)
427: #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator)
1.4 ! kahan 428: #ifdef XML_DTD
! 429: #define parentParser (((Parser *)parser)->m_parentParser)
! 430: #define paramEntityParsing (((Parser *)parser)->m_paramEntityParsing)
! 431: #endif /* XML_DTD */
1.3 kahan 432:
433: #ifdef _MSC_VER
434: #ifdef _DEBUG
435: Parser *asParser(XML_Parser parser)
436: {
437: return parser;
438: }
439: #endif
440: #endif
1.1 frystyk 441:
442: XML_Parser XML_ParserCreate(const XML_Char *encodingName)
443: {
444: XML_Parser parser = malloc(sizeof(Parser));
445: if (!parser)
446: return parser;
447: processor = prologInitProcessor;
448: XmlPrologStateInit(&prologState);
449: userData = 0;
450: handlerArg = 0;
451: startElementHandler = 0;
452: endElementHandler = 0;
453: characterDataHandler = 0;
454: processingInstructionHandler = 0;
1.3 kahan 455: commentHandler = 0;
456: startCdataSectionHandler = 0;
457: endCdataSectionHandler = 0;
1.1 frystyk 458: defaultHandler = 0;
1.4 ! kahan 459: startDoctypeDeclHandler = 0;
! 460: endDoctypeDeclHandler = 0;
1.1 frystyk 461: unparsedEntityDeclHandler = 0;
462: notationDeclHandler = 0;
1.3 kahan 463: startNamespaceDeclHandler = 0;
464: endNamespaceDeclHandler = 0;
465: notStandaloneHandler = 0;
1.1 frystyk 466: externalEntityRefHandler = 0;
1.3 kahan 467: externalEntityRefHandlerArg = parser;
1.1 frystyk 468: unknownEncodingHandler = 0;
469: buffer = 0;
470: bufferPtr = 0;
471: bufferEnd = 0;
472: parseEndByteIndex = 0;
473: parseEndPtr = 0;
474: bufferLim = 0;
475: declElementType = 0;
476: declAttributeId = 0;
477: declEntity = 0;
478: declNotationName = 0;
479: declNotationPublicId = 0;
480: memset(&position, 0, sizeof(POSITION));
481: errorCode = XML_ERROR_NONE;
482: eventPtr = 0;
483: eventEndPtr = 0;
484: positionPtr = 0;
1.3 kahan 485: openInternalEntities = 0;
1.1 frystyk 486: tagLevel = 0;
487: tagStack = 0;
488: freeTagList = 0;
1.3 kahan 489: freeBindingList = 0;
490: inheritedBindings = 0;
1.1 frystyk 491: attsSize = INIT_ATTS_SIZE;
492: atts = malloc(attsSize * sizeof(ATTRIBUTE));
1.3 kahan 493: nSpecifiedAtts = 0;
1.1 frystyk 494: dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
495: groupSize = 0;
496: groupConnector = 0;
497: hadExternalDoctype = 0;
498: unknownEncodingMem = 0;
499: unknownEncodingRelease = 0;
500: unknownEncodingData = 0;
501: unknownEncodingHandlerData = 0;
1.3 kahan 502: namespaceSeparator = '!';
1.4 ! kahan 503: #ifdef XML_DTD
! 504: parentParser = 0;
! 505: paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
! 506: #endif
1.3 kahan 507: ns = 0;
1.1 frystyk 508: poolInit(&tempPool);
509: poolInit(&temp2Pool);
510: protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
1.4 ! kahan 511: curBase = 0;
1.1 frystyk 512: if (!dtdInit(&dtd) || !atts || !dataBuf
513: || (encodingName && !protocolEncodingName)) {
514: XML_ParserFree(parser);
515: return 0;
516: }
517: dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
518: XmlInitEncoding(&initEncoding, &encoding, 0);
1.4 ! kahan 519: internalEncoding = XmlGetInternalEncoding();
1.1 frystyk 520: return parser;
521: }
522:
1.3 kahan 523: XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
524: {
525: static
526: const XML_Char implicitContext[] = {
527: XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='),
528: XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'),
529: XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'),
530: XML_T('.'), XML_T('w'), XML_T('3'),
531: XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'),
532: XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'),
533: XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'),
534: XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'),
535: XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'),
536: XML_T('\0')
537: };
538:
539: XML_Parser parser = XML_ParserCreate(encodingName);
540: if (parser) {
541: XmlInitEncodingNS(&initEncoding, &encoding, 0);
542: ns = 1;
1.4 ! kahan 543: internalEncoding = XmlGetInternalEncodingNS();
1.3 kahan 544: namespaceSeparator = nsSep;
545: }
546: if (!setContext(parser, implicitContext)) {
547: XML_ParserFree(parser);
548: return 0;
549: }
550: return parser;
551: }
552:
553: int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
554: {
555: if (!encodingName)
556: protocolEncodingName = 0;
557: else {
558: protocolEncodingName = poolCopyString(&tempPool, encodingName);
559: if (!protocolEncodingName)
560: return 0;
561: }
562: return 1;
563: }
564:
1.1 frystyk 565: XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
1.3 kahan 566: const XML_Char *context,
1.1 frystyk 567: const XML_Char *encodingName)
568: {
569: XML_Parser parser = oldParser;
570: DTD *oldDtd = &dtd;
571: XML_StartElementHandler oldStartElementHandler = startElementHandler;
572: XML_EndElementHandler oldEndElementHandler = endElementHandler;
573: XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
574: XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
1.3 kahan 575: XML_CommentHandler oldCommentHandler = commentHandler;
576: XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler;
577: XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler;
1.1 frystyk 578: XML_DefaultHandler oldDefaultHandler = defaultHandler;
1.3 kahan 579: XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
580: XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
581: XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
1.1 frystyk 582: XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
583: XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
584: void *oldUserData = userData;
585: void *oldHandlerArg = handlerArg;
1.3 kahan 586: int oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
587: void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
1.4 ! kahan 588: #ifdef XML_DTD
! 589: int oldParamEntityParsing = paramEntityParsing;
! 590: #endif
1.3 kahan 591: parser = (ns
592: ? XML_ParserCreateNS(encodingName, namespaceSeparator)
593: : XML_ParserCreate(encodingName));
1.1 frystyk 594: if (!parser)
595: return 0;
596: startElementHandler = oldStartElementHandler;
597: endElementHandler = oldEndElementHandler;
598: characterDataHandler = oldCharacterDataHandler;
599: processingInstructionHandler = oldProcessingInstructionHandler;
1.3 kahan 600: commentHandler = oldCommentHandler;
601: startCdataSectionHandler = oldStartCdataSectionHandler;
602: endCdataSectionHandler = oldEndCdataSectionHandler;
1.1 frystyk 603: defaultHandler = oldDefaultHandler;
1.3 kahan 604: startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
605: endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
606: notStandaloneHandler = oldNotStandaloneHandler;
1.1 frystyk 607: externalEntityRefHandler = oldExternalEntityRefHandler;
608: unknownEncodingHandler = oldUnknownEncodingHandler;
609: userData = oldUserData;
610: if (oldUserData == oldHandlerArg)
611: handlerArg = userData;
612: else
613: handlerArg = parser;
1.3 kahan 614: if (oldExternalEntityRefHandlerArg != oldParser)
615: externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
616: defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
1.4 ! kahan 617: #ifdef XML_DTD
! 618: paramEntityParsing = oldParamEntityParsing;
! 619: if (context) {
! 620: #endif /* XML_DTD */
! 621: if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) {
! 622: XML_ParserFree(parser);
! 623: return 0;
! 624: }
! 625: processor = externalEntityInitProcessor;
! 626: #ifdef XML_DTD
! 627: }
! 628: else {
! 629: dtdSwap(&dtd, oldDtd);
! 630: parentParser = oldParser;
! 631: XmlPrologStateInitExternalEntity(&prologState);
! 632: dtd.complete = 1;
! 633: hadExternalDoctype = 1;
1.1 frystyk 634: }
1.4 ! kahan 635: #endif /* XML_DTD */
1.1 frystyk 636: return parser;
637: }
638:
1.3 kahan 639: static
640: void destroyBindings(BINDING *bindings)
641: {
642: for (;;) {
643: BINDING *b = bindings;
644: if (!b)
645: break;
646: bindings = b->nextTagBinding;
647: free(b->uri);
648: free(b);
649: }
650: }
651:
1.1 frystyk 652: void XML_ParserFree(XML_Parser parser)
653: {
654: for (;;) {
655: TAG *p;
656: if (tagStack == 0) {
657: if (freeTagList == 0)
658: break;
659: tagStack = freeTagList;
660: freeTagList = 0;
661: }
662: p = tagStack;
663: tagStack = tagStack->parent;
664: free(p->buf);
1.3 kahan 665: destroyBindings(p->bindings);
1.1 frystyk 666: free(p);
667: }
1.3 kahan 668: destroyBindings(freeBindingList);
669: destroyBindings(inheritedBindings);
1.1 frystyk 670: poolDestroy(&tempPool);
671: poolDestroy(&temp2Pool);
1.4 ! kahan 672: #ifdef XML_DTD
! 673: if (parentParser) {
! 674: if (hadExternalDoctype)
! 675: dtd.complete = 0;
! 676: dtdSwap(&dtd, &((Parser *)parentParser)->m_dtd);
! 677: }
! 678: #endif /* XML_DTD */
1.1 frystyk 679: dtdDestroy(&dtd);
680: free((void *)atts);
681: free(groupConnector);
682: free(buffer);
683: free(dataBuf);
684: free(unknownEncodingMem);
685: if (unknownEncodingRelease)
686: unknownEncodingRelease(unknownEncodingData);
687: free(parser);
688: }
689:
690: void XML_UseParserAsHandlerArg(XML_Parser parser)
691: {
692: handlerArg = parser;
693: }
694:
695: void XML_SetUserData(XML_Parser parser, void *p)
696: {
697: if (handlerArg == userData)
698: handlerArg = userData = p;
699: else
700: userData = p;
701: }
702:
703: int XML_SetBase(XML_Parser parser, const XML_Char *p)
704: {
705: if (p) {
706: p = poolCopyString(&dtd.pool, p);
707: if (!p)
708: return 0;
1.4 ! kahan 709: curBase = p;
1.1 frystyk 710: }
711: else
1.4 ! kahan 712: curBase = 0;
1.1 frystyk 713: return 1;
714: }
715:
716: const XML_Char *XML_GetBase(XML_Parser parser)
717: {
1.4 ! kahan 718: return curBase;
1.1 frystyk 719: }
720:
1.3 kahan 721: int XML_GetSpecifiedAttributeCount(XML_Parser parser)
722: {
723: return nSpecifiedAtts;
724: }
725:
1.1 frystyk 726: void XML_SetElementHandler(XML_Parser parser,
727: XML_StartElementHandler start,
728: XML_EndElementHandler end)
729: {
730: startElementHandler = start;
731: endElementHandler = end;
732: }
733:
734: void XML_SetCharacterDataHandler(XML_Parser parser,
735: XML_CharacterDataHandler handler)
736: {
737: characterDataHandler = handler;
738: }
739:
740: void XML_SetProcessingInstructionHandler(XML_Parser parser,
741: XML_ProcessingInstructionHandler handler)
742: {
743: processingInstructionHandler = handler;
744: }
745:
1.3 kahan 746: void XML_SetCommentHandler(XML_Parser parser,
747: XML_CommentHandler handler)
748: {
749: commentHandler = handler;
750: }
751:
752: void XML_SetCdataSectionHandler(XML_Parser parser,
753: XML_StartCdataSectionHandler start,
754: XML_EndCdataSectionHandler end)
755: {
756: startCdataSectionHandler = start;
757: endCdataSectionHandler = end;
758: }
759:
1.1 frystyk 760: void XML_SetDefaultHandler(XML_Parser parser,
761: XML_DefaultHandler handler)
762: {
763: defaultHandler = handler;
1.3 kahan 764: defaultExpandInternalEntities = 0;
765: }
766:
767: void XML_SetDefaultHandlerExpand(XML_Parser parser,
768: XML_DefaultHandler handler)
769: {
770: defaultHandler = handler;
771: defaultExpandInternalEntities = 1;
1.1 frystyk 772: }
773:
1.4 ! kahan 774: void XML_SetDoctypeDeclHandler(XML_Parser parser,
! 775: XML_StartDoctypeDeclHandler start,
! 776: XML_EndDoctypeDeclHandler end)
! 777: {
! 778: startDoctypeDeclHandler = start;
! 779: endDoctypeDeclHandler = end;
! 780: }
! 781:
1.1 frystyk 782: void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
783: XML_UnparsedEntityDeclHandler handler)
784: {
785: unparsedEntityDeclHandler = handler;
786: }
787:
788: void XML_SetNotationDeclHandler(XML_Parser parser,
789: XML_NotationDeclHandler handler)
790: {
791: notationDeclHandler = handler;
792: }
793:
1.3 kahan 794: void XML_SetNamespaceDeclHandler(XML_Parser parser,
795: XML_StartNamespaceDeclHandler start,
796: XML_EndNamespaceDeclHandler end)
797: {
798: startNamespaceDeclHandler = start;
799: endNamespaceDeclHandler = end;
800: }
801:
802: void XML_SetNotStandaloneHandler(XML_Parser parser,
803: XML_NotStandaloneHandler handler)
804: {
805: notStandaloneHandler = handler;
806: }
807:
1.1 frystyk 808: void XML_SetExternalEntityRefHandler(XML_Parser parser,
809: XML_ExternalEntityRefHandler handler)
810: {
811: externalEntityRefHandler = handler;
812: }
813:
1.3 kahan 814: void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
815: {
816: if (arg)
817: externalEntityRefHandlerArg = arg;
818: else
819: externalEntityRefHandlerArg = parser;
820: }
821:
1.1 frystyk 822: void XML_SetUnknownEncodingHandler(XML_Parser parser,
823: XML_UnknownEncodingHandler handler,
824: void *data)
825: {
826: unknownEncodingHandler = handler;
827: unknownEncodingHandlerData = data;
828: }
829:
1.4 ! kahan 830: int XML_SetParamEntityParsing(XML_Parser parser,
! 831: enum XML_ParamEntityParsing parsing)
! 832: {
! 833: #ifdef XML_DTD
! 834: paramEntityParsing = parsing;
! 835: return 1;
! 836: #else
! 837: return parsing == XML_PARAM_ENTITY_PARSING_NEVER;
! 838: #endif
! 839: }
! 840:
1.1 frystyk 841: int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
842: {
843: if (len == 0) {
844: if (!isFinal)
845: return 1;
1.3 kahan 846: positionPtr = bufferPtr;
1.1 frystyk 847: errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
848: if (errorCode == XML_ERROR_NONE)
849: return 1;
850: eventEndPtr = eventPtr;
1.4 ! kahan 851: processor = errorProcessor;
1.1 frystyk 852: return 0;
853: }
854: else if (bufferPtr == bufferEnd) {
855: const char *end;
856: int nLeftOver;
857: parseEndByteIndex += len;
858: positionPtr = s;
859: if (isFinal) {
860: errorCode = processor(parser, s, parseEndPtr = s + len, 0);
861: if (errorCode == XML_ERROR_NONE)
862: return 1;
863: eventEndPtr = eventPtr;
1.4 ! kahan 864: processor = errorProcessor;
1.1 frystyk 865: return 0;
866: }
867: errorCode = processor(parser, s, parseEndPtr = s + len, &end);
868: if (errorCode != XML_ERROR_NONE) {
869: eventEndPtr = eventPtr;
1.4 ! kahan 870: processor = errorProcessor;
1.1 frystyk 871: return 0;
872: }
873: XmlUpdatePosition(encoding, positionPtr, end, &position);
874: nLeftOver = s + len - end;
875: if (nLeftOver) {
876: if (buffer == 0 || nLeftOver > bufferLim - buffer) {
877: /* FIXME avoid integer overflow */
878: buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
1.4 ! kahan 879: /* FIXME storage leak if realloc fails */
1.1 frystyk 880: if (!buffer) {
881: errorCode = XML_ERROR_NO_MEMORY;
882: eventPtr = eventEndPtr = 0;
1.4 ! kahan 883: processor = errorProcessor;
1.1 frystyk 884: return 0;
885: }
886: bufferLim = buffer + len * 2;
887: }
888: memcpy(buffer, end, nLeftOver);
889: bufferPtr = buffer;
890: bufferEnd = buffer + nLeftOver;
891: }
892: return 1;
893: }
894: else {
895: memcpy(XML_GetBuffer(parser, len), s, len);
896: return XML_ParseBuffer(parser, len, isFinal);
897: }
898: }
899:
900: int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
901: {
902: const char *start = bufferPtr;
903: positionPtr = start;
904: bufferEnd += len;
905: parseEndByteIndex += len;
906: errorCode = processor(parser, start, parseEndPtr = bufferEnd,
907: isFinal ? (const char **)0 : &bufferPtr);
908: if (errorCode == XML_ERROR_NONE) {
909: if (!isFinal)
910: XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
911: return 1;
912: }
913: else {
914: eventEndPtr = eventPtr;
1.4 ! kahan 915: processor = errorProcessor;
1.1 frystyk 916: return 0;
917: }
918: }
919:
920: void *XML_GetBuffer(XML_Parser parser, int len)
921: {
922: if (len > bufferLim - bufferEnd) {
923: /* FIXME avoid integer overflow */
924: int neededSize = len + (bufferEnd - bufferPtr);
925: if (neededSize <= bufferLim - buffer) {
926: memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
927: bufferEnd = buffer + (bufferEnd - bufferPtr);
928: bufferPtr = buffer;
929: }
930: else {
931: char *newBuf;
932: int bufferSize = bufferLim - bufferPtr;
933: if (bufferSize == 0)
934: bufferSize = INIT_BUFFER_SIZE;
935: do {
936: bufferSize *= 2;
937: } while (bufferSize < neededSize);
938: newBuf = malloc(bufferSize);
939: if (newBuf == 0) {
940: errorCode = XML_ERROR_NO_MEMORY;
941: return 0;
942: }
943: bufferLim = newBuf + bufferSize;
944: if (bufferPtr) {
945: memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
946: free(buffer);
947: }
948: bufferEnd = newBuf + (bufferEnd - bufferPtr);
949: bufferPtr = buffer = newBuf;
950: }
951: }
952: return bufferEnd;
953: }
954:
955: enum XML_Error XML_GetErrorCode(XML_Parser parser)
956: {
957: return errorCode;
958: }
959:
960: long XML_GetCurrentByteIndex(XML_Parser parser)
961: {
962: if (eventPtr)
963: return parseEndByteIndex - (parseEndPtr - eventPtr);
964: return -1;
965: }
966:
1.3 kahan 967: int XML_GetCurrentByteCount(XML_Parser parser)
968: {
969: if (eventEndPtr && eventPtr)
970: return eventEndPtr - eventPtr;
971: return 0;
972: }
973:
1.1 frystyk 974: int XML_GetCurrentLineNumber(XML_Parser parser)
975: {
976: if (eventPtr) {
977: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
978: positionPtr = eventPtr;
979: }
980: return position.lineNumber + 1;
981: }
982:
983: int XML_GetCurrentColumnNumber(XML_Parser parser)
984: {
985: if (eventPtr) {
986: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
987: positionPtr = eventPtr;
988: }
989: return position.columnNumber;
990: }
991:
992: void XML_DefaultCurrent(XML_Parser parser)
993: {
1.3 kahan 994: if (defaultHandler) {
995: if (openInternalEntities)
996: reportDefault(parser,
1.4 ! kahan 997: internalEncoding,
1.3 kahan 998: openInternalEntities->internalEventPtr,
999: openInternalEntities->internalEventEndPtr);
1000: else
1001: reportDefault(parser, encoding, eventPtr, eventEndPtr);
1002: }
1.1 frystyk 1003: }
1004:
1005: const XML_LChar *XML_ErrorString(int code)
1006: {
1007: static const XML_LChar *message[] = {
1008: 0,
1009: XML_T("out of memory"),
1010: XML_T("syntax error"),
1011: XML_T("no element found"),
1012: XML_T("not well-formed"),
1013: XML_T("unclosed token"),
1014: XML_T("unclosed token"),
1015: XML_T("mismatched tag"),
1016: XML_T("duplicate attribute"),
1017: XML_T("junk after document element"),
1018: XML_T("illegal parameter entity reference"),
1019: XML_T("undefined entity"),
1020: XML_T("recursive entity reference"),
1021: XML_T("asynchronous entity"),
1022: XML_T("reference to invalid character number"),
1023: XML_T("reference to binary entity"),
1024: XML_T("reference to external entity in attribute"),
1025: XML_T("xml processing instruction not at start of external entity"),
1026: XML_T("unknown encoding"),
1027: XML_T("encoding specified in XML declaration is incorrect"),
1028: XML_T("unclosed CDATA section"),
1.3 kahan 1029: XML_T("error in processing external entity reference"),
1030: XML_T("document is not standalone")
1.1 frystyk 1031: };
1032: if (code > 0 && code < sizeof(message)/sizeof(message[0]))
1033: return message[code];
1034: return 0;
1035: }
1036:
1037: static
1038: enum XML_Error contentProcessor(XML_Parser parser,
1039: const char *start,
1040: const char *end,
1041: const char **endPtr)
1042: {
1043: return doContent(parser, 0, encoding, start, end, endPtr);
1044: }
1045:
1046: static
1047: enum XML_Error externalEntityInitProcessor(XML_Parser parser,
1048: const char *start,
1049: const char *end,
1050: const char **endPtr)
1051: {
1052: enum XML_Error result = initializeEncoding(parser);
1053: if (result != XML_ERROR_NONE)
1054: return result;
1055: processor = externalEntityInitProcessor2;
1056: return externalEntityInitProcessor2(parser, start, end, endPtr);
1057: }
1058:
1059: static
1060: enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
1061: const char *start,
1062: const char *end,
1063: const char **endPtr)
1064: {
1065: const char *next;
1066: int tok = XmlContentTok(encoding, start, end, &next);
1067: switch (tok) {
1068: case XML_TOK_BOM:
1069: start = next;
1070: break;
1071: case XML_TOK_PARTIAL:
1072: if (endPtr) {
1073: *endPtr = start;
1074: return XML_ERROR_NONE;
1075: }
1076: eventPtr = start;
1077: return XML_ERROR_UNCLOSED_TOKEN;
1078: case XML_TOK_PARTIAL_CHAR:
1079: if (endPtr) {
1080: *endPtr = start;
1081: return XML_ERROR_NONE;
1082: }
1083: eventPtr = start;
1084: return XML_ERROR_PARTIAL_CHAR;
1085: }
1086: processor = externalEntityInitProcessor3;
1087: return externalEntityInitProcessor3(parser, start, end, endPtr);
1088: }
1089:
1090: static
1091: enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
1092: const char *start,
1093: const char *end,
1094: const char **endPtr)
1095: {
1096: const char *next;
1097: int tok = XmlContentTok(encoding, start, end, &next);
1098: switch (tok) {
1099: case XML_TOK_XML_DECL:
1100: {
1101: enum XML_Error result = processXmlDecl(parser, 1, start, next);
1102: if (result != XML_ERROR_NONE)
1103: return result;
1104: start = next;
1105: }
1106: break;
1107: case XML_TOK_PARTIAL:
1108: if (endPtr) {
1109: *endPtr = start;
1110: return XML_ERROR_NONE;
1111: }
1112: eventPtr = start;
1113: return XML_ERROR_UNCLOSED_TOKEN;
1114: case XML_TOK_PARTIAL_CHAR:
1115: if (endPtr) {
1116: *endPtr = start;
1117: return XML_ERROR_NONE;
1118: }
1119: eventPtr = start;
1120: return XML_ERROR_PARTIAL_CHAR;
1121: }
1122: processor = externalEntityContentProcessor;
1123: tagLevel = 1;
1124: return doContent(parser, 1, encoding, start, end, endPtr);
1125: }
1126:
1127: static
1128: enum XML_Error externalEntityContentProcessor(XML_Parser parser,
1129: const char *start,
1130: const char *end,
1131: const char **endPtr)
1132: {
1133: return doContent(parser, 1, encoding, start, end, endPtr);
1134: }
1135:
1136: static enum XML_Error
1137: doContent(XML_Parser parser,
1138: int startTagLevel,
1139: const ENCODING *enc,
1140: const char *s,
1141: const char *end,
1142: const char **nextPtr)
1143: {
1144: const char **eventPP;
1145: const char **eventEndPP;
1146: if (enc == encoding) {
1147: eventPP = &eventPtr;
1148: eventEndPP = &eventEndPtr;
1149: }
1.3 kahan 1150: else {
1151: eventPP = &(openInternalEntities->internalEventPtr);
1152: eventEndPP = &(openInternalEntities->internalEventEndPtr);
1153: }
1154: *eventPP = s;
1.1 frystyk 1155: for (;;) {
1.3 kahan 1156: const char *next = s; /* XmlContentTok doesn't always set the last arg */
1.1 frystyk 1157: int tok = XmlContentTok(enc, s, end, &next);
1158: *eventEndPP = next;
1159: switch (tok) {
1160: case XML_TOK_TRAILING_CR:
1161: if (nextPtr) {
1162: *nextPtr = s;
1163: return XML_ERROR_NONE;
1164: }
1165: *eventEndPP = end;
1166: if (characterDataHandler) {
1.3 kahan 1167: XML_Char c = 0xA;
1.1 frystyk 1168: characterDataHandler(handlerArg, &c, 1);
1169: }
1170: else if (defaultHandler)
1171: reportDefault(parser, enc, s, end);
1172: if (startTagLevel == 0)
1173: return XML_ERROR_NO_ELEMENTS;
1174: if (tagLevel != startTagLevel)
1175: return XML_ERROR_ASYNC_ENTITY;
1176: return XML_ERROR_NONE;
1177: case XML_TOK_NONE:
1178: if (nextPtr) {
1179: *nextPtr = s;
1180: return XML_ERROR_NONE;
1181: }
1182: if (startTagLevel > 0) {
1183: if (tagLevel != startTagLevel)
1184: return XML_ERROR_ASYNC_ENTITY;
1185: return XML_ERROR_NONE;
1186: }
1187: return XML_ERROR_NO_ELEMENTS;
1188: case XML_TOK_INVALID:
1189: *eventPP = next;
1190: return XML_ERROR_INVALID_TOKEN;
1191: case XML_TOK_PARTIAL:
1192: if (nextPtr) {
1193: *nextPtr = s;
1194: return XML_ERROR_NONE;
1195: }
1196: return XML_ERROR_UNCLOSED_TOKEN;
1197: case XML_TOK_PARTIAL_CHAR:
1198: if (nextPtr) {
1199: *nextPtr = s;
1200: return XML_ERROR_NONE;
1201: }
1202: return XML_ERROR_PARTIAL_CHAR;
1203: case XML_TOK_ENTITY_REF:
1204: {
1205: const XML_Char *name;
1206: ENTITY *entity;
1207: XML_Char ch = XmlPredefinedEntityName(enc,
1208: s + enc->minBytesPerChar,
1209: next - enc->minBytesPerChar);
1210: if (ch) {
1211: if (characterDataHandler)
1212: characterDataHandler(handlerArg, &ch, 1);
1213: else if (defaultHandler)
1214: reportDefault(parser, enc, s, next);
1215: break;
1216: }
1217: name = poolStoreString(&dtd.pool, enc,
1218: s + enc->minBytesPerChar,
1219: next - enc->minBytesPerChar);
1220: if (!name)
1221: return XML_ERROR_NO_MEMORY;
1222: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1223: poolDiscard(&dtd.pool);
1224: if (!entity) {
1225: if (dtd.complete || dtd.standalone)
1226: return XML_ERROR_UNDEFINED_ENTITY;
1227: if (defaultHandler)
1228: reportDefault(parser, enc, s, next);
1229: break;
1230: }
1231: if (entity->open)
1232: return XML_ERROR_RECURSIVE_ENTITY_REF;
1233: if (entity->notation)
1234: return XML_ERROR_BINARY_ENTITY_REF;
1235: if (entity) {
1236: if (entity->textPtr) {
1237: enum XML_Error result;
1.3 kahan 1238: OPEN_INTERNAL_ENTITY openEntity;
1239: if (defaultHandler && !defaultExpandInternalEntities) {
1.1 frystyk 1240: reportDefault(parser, enc, s, next);
1241: break;
1242: }
1243: entity->open = 1;
1.3 kahan 1244: openEntity.next = openInternalEntities;
1245: openInternalEntities = &openEntity;
1246: openEntity.entity = entity;
1247: openEntity.internalEventPtr = 0;
1248: openEntity.internalEventEndPtr = 0;
1.1 frystyk 1249: result = doContent(parser,
1250: tagLevel,
1.4 ! kahan 1251: internalEncoding,
1.1 frystyk 1252: (char *)entity->textPtr,
1253: (char *)(entity->textPtr + entity->textLen),
1254: 0);
1255: entity->open = 0;
1.3 kahan 1256: openInternalEntities = openEntity.next;
1.1 frystyk 1257: if (result)
1258: return result;
1259: }
1260: else if (externalEntityRefHandler) {
1.3 kahan 1261: const XML_Char *context;
1.1 frystyk 1262: entity->open = 1;
1.3 kahan 1263: context = getContext(parser);
1.1 frystyk 1264: entity->open = 0;
1.3 kahan 1265: if (!context)
1.1 frystyk 1266: return XML_ERROR_NO_MEMORY;
1.3 kahan 1267: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
1268: context,
1.4 ! kahan 1269: entity->base,
1.3 kahan 1270: entity->systemId,
1271: entity->publicId))
1.1 frystyk 1272: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
1273: poolDiscard(&tempPool);
1274: }
1275: else if (defaultHandler)
1276: reportDefault(parser, enc, s, next);
1277: }
1278: break;
1279: }
1280: case XML_TOK_START_TAG_WITH_ATTS:
1281: if (!startElementHandler) {
1.3 kahan 1282: enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1.1 frystyk 1283: if (result)
1284: return result;
1285: }
1286: /* fall through */
1287: case XML_TOK_START_TAG_NO_ATTS:
1288: {
1289: TAG *tag;
1290: if (freeTagList) {
1291: tag = freeTagList;
1292: freeTagList = freeTagList->parent;
1293: }
1294: else {
1295: tag = malloc(sizeof(TAG));
1296: if (!tag)
1297: return XML_ERROR_NO_MEMORY;
1298: tag->buf = malloc(INIT_TAG_BUF_SIZE);
1299: if (!tag->buf)
1300: return XML_ERROR_NO_MEMORY;
1301: tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
1302: }
1.3 kahan 1303: tag->bindings = 0;
1.1 frystyk 1304: tag->parent = tagStack;
1305: tagStack = tag;
1.3 kahan 1306: tag->name.localPart = 0;
1.1 frystyk 1307: tag->rawName = s + enc->minBytesPerChar;
1308: tag->rawNameLength = XmlNameLength(enc, tag->rawName);
1309: if (nextPtr) {
1.3 kahan 1310: /* Need to guarantee that:
1311: tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */
1312: if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) {
1.1 frystyk 1313: int bufSize = tag->rawNameLength * 4;
1314: bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
1315: tag->buf = realloc(tag->buf, bufSize);
1316: if (!tag->buf)
1317: return XML_ERROR_NO_MEMORY;
1318: tag->bufEnd = tag->buf + bufSize;
1319: }
1320: memcpy(tag->buf, tag->rawName, tag->rawNameLength);
1321: tag->rawName = tag->buf;
1322: }
1323: ++tagLevel;
1324: if (startElementHandler) {
1325: enum XML_Error result;
1326: XML_Char *toPtr;
1327: for (;;) {
1328: const char *rawNameEnd = tag->rawName + tag->rawNameLength;
1329: const char *fromPtr = tag->rawName;
1330: int bufSize;
1331: if (nextPtr)
1332: toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
1333: else
1334: toPtr = (XML_Char *)tag->buf;
1.3 kahan 1335: tag->name.str = toPtr;
1.1 frystyk 1336: XmlConvert(enc,
1337: &fromPtr, rawNameEnd,
1338: (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
1339: if (fromPtr == rawNameEnd)
1340: break;
1341: bufSize = (tag->bufEnd - tag->buf) << 1;
1342: tag->buf = realloc(tag->buf, bufSize);
1343: if (!tag->buf)
1344: return XML_ERROR_NO_MEMORY;
1345: tag->bufEnd = tag->buf + bufSize;
1346: if (nextPtr)
1347: tag->rawName = tag->buf;
1348: }
1349: *toPtr = XML_T('\0');
1.3 kahan 1350: result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
1.1 frystyk 1351: if (result)
1352: return result;
1.3 kahan 1353: startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts);
1.1 frystyk 1354: poolClear(&tempPool);
1355: }
1356: else {
1.3 kahan 1357: tag->name.str = 0;
1.1 frystyk 1358: if (defaultHandler)
1359: reportDefault(parser, enc, s, next);
1360: }
1361: break;
1362: }
1363: case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1364: if (!startElementHandler) {
1.3 kahan 1365: enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1.1 frystyk 1366: if (result)
1367: return result;
1368: }
1369: /* fall through */
1370: case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1371: if (startElementHandler || endElementHandler) {
1372: const char *rawName = s + enc->minBytesPerChar;
1.3 kahan 1373: enum XML_Error result;
1374: BINDING *bindings = 0;
1375: TAG_NAME name;
1376: name.str = poolStoreString(&tempPool, enc, rawName,
1377: rawName + XmlNameLength(enc, rawName));
1378: if (!name.str)
1.1 frystyk 1379: return XML_ERROR_NO_MEMORY;
1380: poolFinish(&tempPool);
1.3 kahan 1381: result = storeAtts(parser, enc, s, &name, &bindings);
1382: if (result)
1383: return result;
1384: poolFinish(&tempPool);
1385: if (startElementHandler)
1386: startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
1.1 frystyk 1387: if (endElementHandler) {
1388: if (startElementHandler)
1389: *eventPP = *eventEndPP;
1.3 kahan 1390: endElementHandler(handlerArg, name.str);
1.1 frystyk 1391: }
1392: poolClear(&tempPool);
1.3 kahan 1393: while (bindings) {
1394: BINDING *b = bindings;
1395: if (endNamespaceDeclHandler)
1396: endNamespaceDeclHandler(handlerArg, b->prefix->name);
1397: bindings = bindings->nextTagBinding;
1398: b->nextTagBinding = freeBindingList;
1399: freeBindingList = b;
1400: b->prefix->binding = b->prevPrefixBinding;
1401: }
1.1 frystyk 1402: }
1403: else if (defaultHandler)
1404: reportDefault(parser, enc, s, next);
1405: if (tagLevel == 0)
1406: return epilogProcessor(parser, next, end, nextPtr);
1407: break;
1408: case XML_TOK_END_TAG:
1409: if (tagLevel == startTagLevel)
1410: return XML_ERROR_ASYNC_ENTITY;
1411: else {
1412: int len;
1413: const char *rawName;
1414: TAG *tag = tagStack;
1415: tagStack = tag->parent;
1416: tag->parent = freeTagList;
1417: freeTagList = tag;
1418: rawName = s + enc->minBytesPerChar*2;
1419: len = XmlNameLength(enc, rawName);
1420: if (len != tag->rawNameLength
1421: || memcmp(tag->rawName, rawName, len) != 0) {
1422: *eventPP = rawName;
1423: return XML_ERROR_TAG_MISMATCH;
1424: }
1425: --tagLevel;
1.3 kahan 1426: if (endElementHandler && tag->name.str) {
1427: if (tag->name.localPart) {
1428: XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen;
1429: const XML_Char *from = tag->name.localPart;
1430: while ((*to++ = *from++) != 0)
1431: ;
1.1 frystyk 1432: }
1.3 kahan 1433: endElementHandler(handlerArg, tag->name.str);
1.1 frystyk 1434: }
1435: else if (defaultHandler)
1436: reportDefault(parser, enc, s, next);
1.3 kahan 1437: while (tag->bindings) {
1438: BINDING *b = tag->bindings;
1439: if (endNamespaceDeclHandler)
1440: endNamespaceDeclHandler(handlerArg, b->prefix->name);
1441: tag->bindings = tag->bindings->nextTagBinding;
1442: b->nextTagBinding = freeBindingList;
1443: freeBindingList = b;
1444: b->prefix->binding = b->prevPrefixBinding;
1445: }
1.1 frystyk 1446: if (tagLevel == 0)
1447: return epilogProcessor(parser, next, end, nextPtr);
1448: }
1449: break;
1450: case XML_TOK_CHAR_REF:
1451: {
1452: int n = XmlCharRefNumber(enc, s);
1453: if (n < 0)
1454: return XML_ERROR_BAD_CHAR_REF;
1455: if (characterDataHandler) {
1456: XML_Char buf[XML_ENCODE_MAX];
1457: characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1458: }
1459: else if (defaultHandler)
1460: reportDefault(parser, enc, s, next);
1461: }
1462: break;
1463: case XML_TOK_XML_DECL:
1464: return XML_ERROR_MISPLACED_XML_PI;
1465: case XML_TOK_DATA_NEWLINE:
1466: if (characterDataHandler) {
1.3 kahan 1467: XML_Char c = 0xA;
1.1 frystyk 1468: characterDataHandler(handlerArg, &c, 1);
1469: }
1470: else if (defaultHandler)
1471: reportDefault(parser, enc, s, next);
1472: break;
1473: case XML_TOK_CDATA_SECT_OPEN:
1474: {
1475: enum XML_Error result;
1.3 kahan 1476: if (startCdataSectionHandler)
1477: startCdataSectionHandler(handlerArg);
1478: #if 0
1479: /* Suppose you doing a transformation on a document that involves
1480: changing only the character data. You set up a defaultHandler
1481: and a characterDataHandler. The defaultHandler simply copies
1482: characters through. The characterDataHandler does the transformation
1483: and writes the characters out escaping them as necessary. This case
1484: will fail to work if we leave out the following two lines (because &
1485: and < inside CDATA sections will be incorrectly escaped).
1486:
1487: However, now we have a start/endCdataSectionHandler, so it seems
1488: easier to let the user deal with this. */
1489:
1490: else if (characterDataHandler)
1.1 frystyk 1491: characterDataHandler(handlerArg, dataBuf, 0);
1.3 kahan 1492: #endif
1.1 frystyk 1493: else if (defaultHandler)
1494: reportDefault(parser, enc, s, next);
1495: result = doCdataSection(parser, enc, &next, end, nextPtr);
1496: if (!next) {
1497: processor = cdataSectionProcessor;
1498: return result;
1499: }
1500: }
1501: break;
1502: case XML_TOK_TRAILING_RSQB:
1503: if (nextPtr) {
1504: *nextPtr = s;
1505: return XML_ERROR_NONE;
1506: }
1507: if (characterDataHandler) {
1508: if (MUST_CONVERT(enc, s)) {
1509: ICHAR *dataPtr = (ICHAR *)dataBuf;
1510: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1511: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1512: }
1513: else
1514: characterDataHandler(handlerArg,
1515: (XML_Char *)s,
1516: (XML_Char *)end - (XML_Char *)s);
1517: }
1518: else if (defaultHandler)
1519: reportDefault(parser, enc, s, end);
1520: if (startTagLevel == 0) {
1521: *eventPP = end;
1522: return XML_ERROR_NO_ELEMENTS;
1523: }
1524: if (tagLevel != startTagLevel) {
1525: *eventPP = end;
1526: return XML_ERROR_ASYNC_ENTITY;
1527: }
1528: return XML_ERROR_NONE;
1529: case XML_TOK_DATA_CHARS:
1530: if (characterDataHandler) {
1531: if (MUST_CONVERT(enc, s)) {
1532: for (;;) {
1533: ICHAR *dataPtr = (ICHAR *)dataBuf;
1534: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1535: *eventEndPP = s;
1536: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1537: if (s == next)
1538: break;
1539: *eventPP = s;
1540: }
1541: }
1542: else
1543: characterDataHandler(handlerArg,
1544: (XML_Char *)s,
1545: (XML_Char *)next - (XML_Char *)s);
1546: }
1547: else if (defaultHandler)
1548: reportDefault(parser, enc, s, next);
1549: break;
1550: case XML_TOK_PI:
1551: if (!reportProcessingInstruction(parser, enc, s, next))
1552: return XML_ERROR_NO_MEMORY;
1553: break;
1.3 kahan 1554: case XML_TOK_COMMENT:
1555: if (!reportComment(parser, enc, s, next))
1556: return XML_ERROR_NO_MEMORY;
1557: break;
1.1 frystyk 1558: default:
1559: if (defaultHandler)
1560: reportDefault(parser, enc, s, next);
1561: break;
1562: }
1563: *eventPP = s = next;
1564: }
1565: /* not reached */
1566: }
1567:
1.3 kahan 1568: /* If tagNamePtr is non-null, build a real list of attributes,
1.1 frystyk 1569: otherwise just check the attributes for well-formedness. */
1570:
1571: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1.4 ! kahan 1572: const char *attStr, TAG_NAME *tagNamePtr,
1.3 kahan 1573: BINDING **bindingsPtr)
1.1 frystyk 1574: {
1575: ELEMENT_TYPE *elementType = 0;
1576: int nDefaultAtts = 0;
1.4 ! kahan 1577: const XML_Char **appAtts; /* the attribute list to pass to the application */
1.3 kahan 1578: int attIndex = 0;
1.1 frystyk 1579: int i;
1580: int n;
1.3 kahan 1581: int nPrefixes = 0;
1582: BINDING *binding;
1583: const XML_Char *localPart;
1584:
1.4 ! kahan 1585: /* lookup the element type name */
1.3 kahan 1586: if (tagNamePtr) {
1587: elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0);
1588: if (!elementType) {
1589: tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str);
1590: if (!tagNamePtr->str)
1591: return XML_ERROR_NO_MEMORY;
1592: elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE));
1593: if (!elementType)
1594: return XML_ERROR_NO_MEMORY;
1595: if (ns && !setElementTypePrefix(parser, elementType))
1596: return XML_ERROR_NO_MEMORY;
1597: }
1598: nDefaultAtts = elementType->nDefaultAtts;
1.1 frystyk 1599: }
1.4 ! kahan 1600: /* get the attributes from the tokenizer */
! 1601: n = XmlGetAttributes(enc, attStr, attsSize, atts);
1.1 frystyk 1602: if (n + nDefaultAtts > attsSize) {
1603: int oldAttsSize = attsSize;
1604: attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1605: atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
1606: if (!atts)
1607: return XML_ERROR_NO_MEMORY;
1608: if (n > oldAttsSize)
1.4 ! kahan 1609: XmlGetAttributes(enc, attStr, n, atts);
1.1 frystyk 1610: }
1611: appAtts = (const XML_Char **)atts;
1612: for (i = 0; i < n; i++) {
1.4 ! kahan 1613: /* add the name and value to the attribute list */
1.1 frystyk 1614: ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1.3 kahan 1615: atts[i].name
1616: + XmlNameLength(enc, atts[i].name));
1.1 frystyk 1617: if (!attId)
1618: return XML_ERROR_NO_MEMORY;
1.4 ! kahan 1619: /* detect duplicate attributes */
1.1 frystyk 1620: if ((attId->name)[-1]) {
1621: if (enc == encoding)
1622: eventPtr = atts[i].name;
1623: return XML_ERROR_DUPLICATE_ATTRIBUTE;
1624: }
1625: (attId->name)[-1] = 1;
1.3 kahan 1626: appAtts[attIndex++] = attId->name;
1.1 frystyk 1627: if (!atts[i].normalized) {
1628: enum XML_Error result;
1629: int isCdata = 1;
1630:
1.4 ! kahan 1631: /* figure out whether declared as other than CDATA */
1.1 frystyk 1632: if (attId->maybeTokenized) {
1633: int j;
1634: for (j = 0; j < nDefaultAtts; j++) {
1635: if (attId == elementType->defaultAtts[j].id) {
1636: isCdata = elementType->defaultAtts[j].isCdata;
1637: break;
1638: }
1639: }
1640: }
1641:
1.4 ! kahan 1642: /* normalize the attribute value */
1.1 frystyk 1643: result = storeAttributeValue(parser, enc, isCdata,
1644: atts[i].valuePtr, atts[i].valueEnd,
1645: &tempPool);
1646: if (result)
1647: return result;
1.3 kahan 1648: if (tagNamePtr) {
1649: appAtts[attIndex] = poolStart(&tempPool);
1.1 frystyk 1650: poolFinish(&tempPool);
1651: }
1652: else
1653: poolDiscard(&tempPool);
1654: }
1.3 kahan 1655: else if (tagNamePtr) {
1.4 ! kahan 1656: /* the value did not need normalizing */
1.3 kahan 1657: appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
1658: if (appAtts[attIndex] == 0)
1.1 frystyk 1659: return XML_ERROR_NO_MEMORY;
1660: poolFinish(&tempPool);
1661: }
1.4 ! kahan 1662: /* handle prefixed attribute names */
1.3 kahan 1663: if (attId->prefix && tagNamePtr) {
1664: if (attId->xmlns) {
1.4 ! kahan 1665: /* deal with namespace declarations here */
1.3 kahan 1666: if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr))
1667: return XML_ERROR_NO_MEMORY;
1668: --attIndex;
1669: }
1670: else {
1.4 ! kahan 1671: /* deal with other prefixed names later */
1.3 kahan 1672: attIndex++;
1673: nPrefixes++;
1674: (attId->name)[-1] = 2;
1675: }
1676: }
1677: else
1678: attIndex++;
1.1 frystyk 1679: }
1.3 kahan 1680: nSpecifiedAtts = attIndex;
1.4 ! kahan 1681: /* do attribute defaulting */
1.3 kahan 1682: if (tagNamePtr) {
1.1 frystyk 1683: int j;
1684: for (j = 0; j < nDefaultAtts; j++) {
1685: const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
1686: if (!(da->id->name)[-1] && da->value) {
1.3 kahan 1687: if (da->id->prefix) {
1688: if (da->id->xmlns) {
1689: if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr))
1690: return XML_ERROR_NO_MEMORY;
1691: }
1692: else {
1693: (da->id->name)[-1] = 2;
1694: nPrefixes++;
1695: appAtts[attIndex++] = da->id->name;
1696: appAtts[attIndex++] = da->value;
1697: }
1698: }
1699: else {
1700: (da->id->name)[-1] = 1;
1701: appAtts[attIndex++] = da->id->name;
1702: appAtts[attIndex++] = da->value;
1703: }
1704: }
1705: }
1706: appAtts[attIndex] = 0;
1707: }
1708: i = 0;
1709: if (nPrefixes) {
1.4 ! kahan 1710: /* expand prefixed attribute names */
1.3 kahan 1711: for (; i < attIndex; i += 2) {
1712: if (appAtts[i][-1] == 2) {
1713: ATTRIBUTE_ID *id;
1714: ((XML_Char *)(appAtts[i]))[-1] = 0;
1715: id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0);
1716: if (id->prefix->binding) {
1717: int j;
1718: const BINDING *b = id->prefix->binding;
1719: const XML_Char *s = appAtts[i];
1720: for (j = 0; j < b->uriLen; j++) {
1721: if (!poolAppendChar(&tempPool, b->uri[j]))
1722: return XML_ERROR_NO_MEMORY;
1723: }
1724: while (*s++ != ':')
1725: ;
1726: do {
1727: if (!poolAppendChar(&tempPool, *s))
1728: return XML_ERROR_NO_MEMORY;
1729: } while (*s++);
1730: appAtts[i] = poolStart(&tempPool);
1731: poolFinish(&tempPool);
1732: }
1733: if (!--nPrefixes)
1734: break;
1.1 frystyk 1735: }
1.3 kahan 1736: else
1737: ((XML_Char *)(appAtts[i]))[-1] = 0;
1.1 frystyk 1738: }
1739: }
1.4 ! kahan 1740: /* clear the flags that say whether attributes were specified */
1.3 kahan 1741: for (; i < attIndex; i += 2)
1742: ((XML_Char *)(appAtts[i]))[-1] = 0;
1743: if (!tagNamePtr)
1744: return XML_ERROR_NONE;
1745: for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
1746: binding->attId->name[-1] = 0;
1.4 ! kahan 1747: /* expand the element type name */
1.3 kahan 1748: if (elementType->prefix) {
1749: binding = elementType->prefix->binding;
1750: if (!binding)
1751: return XML_ERROR_NONE;
1752: localPart = tagNamePtr->str;
1753: while (*localPart++ != XML_T(':'))
1754: ;
1755: }
1756: else if (dtd.defaultPrefix.binding) {
1757: binding = dtd.defaultPrefix.binding;
1758: localPart = tagNamePtr->str;
1759: }
1760: else
1761: return XML_ERROR_NONE;
1762: tagNamePtr->localPart = localPart;
1763: tagNamePtr->uriLen = binding->uriLen;
1764: i = binding->uriLen;
1765: do {
1766: if (i == binding->uriAlloc) {
1.4 ! kahan 1767: binding->uri = realloc(binding->uri, (binding->uriAlloc *= 2) * sizeof(XML_Char));
1.3 kahan 1768: if (!binding->uri)
1769: return XML_ERROR_NO_MEMORY;
1770: }
1771: binding->uri[i++] = *localPart;
1772: } while (*localPart++);
1773: tagNamePtr->str = binding->uri;
1.1 frystyk 1774: return XML_ERROR_NONE;
1775: }
1776:
1.3 kahan 1777: static
1778: int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr)
1779: {
1780: BINDING *b;
1781: int len;
1782: for (len = 0; uri[len]; len++)
1783: ;
1784: if (namespaceSeparator)
1785: len++;
1786: if (freeBindingList) {
1787: b = freeBindingList;
1788: if (len > b->uriAlloc) {
1.4 ! kahan 1789: b->uri = realloc(b->uri, sizeof(XML_Char) * (len + EXPAND_SPARE));
1.3 kahan 1790: if (!b->uri)
1791: return 0;
1792: b->uriAlloc = len + EXPAND_SPARE;
1793: }
1794: freeBindingList = b->nextTagBinding;
1795: }
1796: else {
1797: b = malloc(sizeof(BINDING));
1798: if (!b)
1799: return 0;
1.4 ! kahan 1800: b->uri = malloc(sizeof(XML_Char) * (len + EXPAND_SPARE));
1.3 kahan 1801: if (!b->uri) {
1802: free(b);
1803: return 0;
1804: }
1.4 ! kahan 1805: b->uriAlloc = len + EXPAND_SPARE;
1.3 kahan 1806: }
1807: b->uriLen = len;
1808: memcpy(b->uri, uri, len * sizeof(XML_Char));
1809: if (namespaceSeparator)
1810: b->uri[len - 1] = namespaceSeparator;
1811: b->prefix = prefix;
1812: b->attId = attId;
1813: b->prevPrefixBinding = prefix->binding;
1814: if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix)
1815: prefix->binding = 0;
1816: else
1817: prefix->binding = b;
1818: b->nextTagBinding = *bindingsPtr;
1819: *bindingsPtr = b;
1820: if (startNamespaceDeclHandler)
1821: startNamespaceDeclHandler(handlerArg, prefix->name,
1822: prefix->binding ? uri : 0);
1823: return 1;
1824: }
1825:
1.1 frystyk 1826: /* The idea here is to avoid using stack for each CDATA section when
1827: the whole file is parsed with one call. */
1828:
1829: static
1830: enum XML_Error cdataSectionProcessor(XML_Parser parser,
1831: const char *start,
1832: const char *end,
1833: const char **endPtr)
1834: {
1835: enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
1836: if (start) {
1837: processor = contentProcessor;
1838: return contentProcessor(parser, start, end, endPtr);
1839: }
1840: return result;
1841: }
1842:
1843: /* startPtr gets set to non-null is the section is closed, and to null if
1844: the section is not yet closed. */
1845:
1846: static
1847: enum XML_Error doCdataSection(XML_Parser parser,
1848: const ENCODING *enc,
1849: const char **startPtr,
1850: const char *end,
1851: const char **nextPtr)
1852: {
1853: const char *s = *startPtr;
1854: const char **eventPP;
1855: const char **eventEndPP;
1856: if (enc == encoding) {
1857: eventPP = &eventPtr;
1858: *eventPP = s;
1859: eventEndPP = &eventEndPtr;
1860: }
1.3 kahan 1861: else {
1862: eventPP = &(openInternalEntities->internalEventPtr);
1863: eventEndPP = &(openInternalEntities->internalEventEndPtr);
1864: }
1865: *eventPP = s;
1.1 frystyk 1866: *startPtr = 0;
1867: for (;;) {
1868: const char *next;
1869: int tok = XmlCdataSectionTok(enc, s, end, &next);
1870: *eventEndPP = next;
1871: switch (tok) {
1872: case XML_TOK_CDATA_SECT_CLOSE:
1.3 kahan 1873: if (endCdataSectionHandler)
1874: endCdataSectionHandler(handlerArg);
1875: #if 0
1876: /* see comment under XML_TOK_CDATA_SECT_OPEN */
1877: else if (characterDataHandler)
1.1 frystyk 1878: characterDataHandler(handlerArg, dataBuf, 0);
1.3 kahan 1879: #endif
1.1 frystyk 1880: else if (defaultHandler)
1881: reportDefault(parser, enc, s, next);
1882: *startPtr = next;
1883: return XML_ERROR_NONE;
1884: case XML_TOK_DATA_NEWLINE:
1885: if (characterDataHandler) {
1.3 kahan 1886: XML_Char c = 0xA;
1.1 frystyk 1887: characterDataHandler(handlerArg, &c, 1);
1888: }
1889: else if (defaultHandler)
1890: reportDefault(parser, enc, s, next);
1891: break;
1892: case XML_TOK_DATA_CHARS:
1893: if (characterDataHandler) {
1894: if (MUST_CONVERT(enc, s)) {
1895: for (;;) {
1896: ICHAR *dataPtr = (ICHAR *)dataBuf;
1897: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1898: *eventEndPP = next;
1899: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1900: if (s == next)
1901: break;
1902: *eventPP = s;
1903: }
1904: }
1905: else
1906: characterDataHandler(handlerArg,
1907: (XML_Char *)s,
1908: (XML_Char *)next - (XML_Char *)s);
1909: }
1910: else if (defaultHandler)
1911: reportDefault(parser, enc, s, next);
1912: break;
1913: case XML_TOK_INVALID:
1914: *eventPP = next;
1915: return XML_ERROR_INVALID_TOKEN;
1916: case XML_TOK_PARTIAL_CHAR:
1917: if (nextPtr) {
1918: *nextPtr = s;
1919: return XML_ERROR_NONE;
1920: }
1921: return XML_ERROR_PARTIAL_CHAR;
1922: case XML_TOK_PARTIAL:
1923: case XML_TOK_NONE:
1924: if (nextPtr) {
1925: *nextPtr = s;
1926: return XML_ERROR_NONE;
1927: }
1928: return XML_ERROR_UNCLOSED_CDATA_SECTION;
1929: default:
1930: abort();
1931: }
1932: *eventPP = s = next;
1933: }
1934: /* not reached */
1935: }
1936:
1.4 ! kahan 1937: #ifdef XML_DTD
! 1938:
! 1939: /* The idea here is to avoid using stack for each IGNORE section when
! 1940: the whole file is parsed with one call. */
! 1941:
! 1942: static
! 1943: enum XML_Error ignoreSectionProcessor(XML_Parser parser,
! 1944: const char *start,
! 1945: const char *end,
! 1946: const char **endPtr)
! 1947: {
! 1948: enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, endPtr);
! 1949: if (start) {
! 1950: processor = prologProcessor;
! 1951: return prologProcessor(parser, start, end, endPtr);
! 1952: }
! 1953: return result;
! 1954: }
! 1955:
! 1956: /* startPtr gets set to non-null is the section is closed, and to null if
! 1957: the section is not yet closed. */
! 1958:
! 1959: static
! 1960: enum XML_Error doIgnoreSection(XML_Parser parser,
! 1961: const ENCODING *enc,
! 1962: const char **startPtr,
! 1963: const char *end,
! 1964: const char **nextPtr)
! 1965: {
! 1966: const char *next;
! 1967: int tok;
! 1968: const char *s = *startPtr;
! 1969: const char **eventPP;
! 1970: const char **eventEndPP;
! 1971: if (enc == encoding) {
! 1972: eventPP = &eventPtr;
! 1973: *eventPP = s;
! 1974: eventEndPP = &eventEndPtr;
! 1975: }
! 1976: else {
! 1977: eventPP = &(openInternalEntities->internalEventPtr);
! 1978: eventEndPP = &(openInternalEntities->internalEventEndPtr);
! 1979: }
! 1980: *eventPP = s;
! 1981: *startPtr = 0;
! 1982: tok = XmlIgnoreSectionTok(enc, s, end, &next);
! 1983: *eventEndPP = next;
! 1984: switch (tok) {
! 1985: case XML_TOK_IGNORE_SECT:
! 1986: if (defaultHandler)
! 1987: reportDefault(parser, enc, s, next);
! 1988: *startPtr = next;
! 1989: return XML_ERROR_NONE;
! 1990: case XML_TOK_INVALID:
! 1991: *eventPP = next;
! 1992: return XML_ERROR_INVALID_TOKEN;
! 1993: case XML_TOK_PARTIAL_CHAR:
! 1994: if (nextPtr) {
! 1995: *nextPtr = s;
! 1996: return XML_ERROR_NONE;
! 1997: }
! 1998: return XML_ERROR_PARTIAL_CHAR;
! 1999: case XML_TOK_PARTIAL:
! 2000: case XML_TOK_NONE:
! 2001: if (nextPtr) {
! 2002: *nextPtr = s;
! 2003: return XML_ERROR_NONE;
! 2004: }
! 2005: return XML_ERROR_SYNTAX; /* XML_ERROR_UNCLOSED_IGNORE_SECTION */
! 2006: default:
! 2007: abort();
! 2008: }
! 2009: /* not reached */
! 2010: }
! 2011:
! 2012: #endif /* XML_DTD */
! 2013:
1.1 frystyk 2014: static enum XML_Error
2015: initializeEncoding(XML_Parser parser)
2016: {
2017: const char *s;
2018: #ifdef XML_UNICODE
2019: char encodingBuf[128];
2020: if (!protocolEncodingName)
2021: s = 0;
2022: else {
2023: int i;
2024: for (i = 0; protocolEncodingName[i]; i++) {
2025: if (i == sizeof(encodingBuf) - 1
2026: || protocolEncodingName[i] >= 0x80
2027: || protocolEncodingName[i] < 0) {
2028: encodingBuf[0] = '\0';
2029: break;
2030: }
2031: encodingBuf[i] = (char)protocolEncodingName[i];
2032: }
2033: encodingBuf[i] = '\0';
2034: s = encodingBuf;
2035: }
2036: #else
2037: s = protocolEncodingName;
2038: #endif
1.3 kahan 2039: if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
1.1 frystyk 2040: return XML_ERROR_NONE;
2041: return handleUnknownEncoding(parser, protocolEncodingName);
2042: }
2043:
2044: static enum XML_Error
2045: processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
2046: const char *s, const char *next)
2047: {
2048: const char *encodingName = 0;
2049: const ENCODING *newEncoding = 0;
2050: const char *version;
2051: int standalone = -1;
1.3 kahan 2052: if (!(ns
2053: ? XmlParseXmlDeclNS
2054: : XmlParseXmlDecl)(isGeneralTextEntity,
2055: encoding,
2056: s,
2057: next,
2058: &eventPtr,
2059: &version,
2060: &encodingName,
2061: &newEncoding,
2062: &standalone))
1.1 frystyk 2063: return XML_ERROR_SYNTAX;
1.4 ! kahan 2064: if (!isGeneralTextEntity && standalone == 1) {
1.1 frystyk 2065: dtd.standalone = 1;
1.4 ! kahan 2066: #ifdef XML_DTD
! 2067: if (paramEntityParsing == XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
! 2068: paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER;
! 2069: #endif /* XML_DTD */
! 2070: }
1.1 frystyk 2071: if (defaultHandler)
2072: reportDefault(parser, encoding, s, next);
2073: if (!protocolEncodingName) {
2074: if (newEncoding) {
2075: if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
2076: eventPtr = encodingName;
2077: return XML_ERROR_INCORRECT_ENCODING;
2078: }
2079: encoding = newEncoding;
2080: }
2081: else if (encodingName) {
2082: enum XML_Error result;
2083: const XML_Char *s = poolStoreString(&tempPool,
2084: encoding,
2085: encodingName,
2086: encodingName
2087: + XmlNameLength(encoding, encodingName));
2088: if (!s)
2089: return XML_ERROR_NO_MEMORY;
2090: result = handleUnknownEncoding(parser, s);
2091: poolDiscard(&tempPool);
2092: if (result == XML_ERROR_UNKNOWN_ENCODING)
2093: eventPtr = encodingName;
2094: return result;
2095: }
2096: }
2097: return XML_ERROR_NONE;
2098: }
2099:
2100: static enum XML_Error
2101: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
2102: {
2103: if (unknownEncodingHandler) {
2104: XML_Encoding info;
2105: int i;
2106: for (i = 0; i < 256; i++)
2107: info.map[i] = -1;
2108: info.convert = 0;
2109: info.data = 0;
2110: info.release = 0;
2111: if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
2112: ENCODING *enc;
2113: unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding());
2114: if (!unknownEncodingMem) {
2115: if (info.release)
2116: info.release(info.data);
2117: return XML_ERROR_NO_MEMORY;
2118: }
1.3 kahan 2119: enc = (ns
2120: ? XmlInitUnknownEncodingNS
2121: : XmlInitUnknownEncoding)(unknownEncodingMem,
2122: info.map,
2123: info.convert,
2124: info.data);
1.1 frystyk 2125: if (enc) {
2126: unknownEncodingData = info.data;
2127: unknownEncodingRelease = info.release;
2128: encoding = enc;
2129: return XML_ERROR_NONE;
2130: }
2131: }
2132: if (info.release)
2133: info.release(info.data);
2134: }
2135: return XML_ERROR_UNKNOWN_ENCODING;
2136: }
2137:
2138: static enum XML_Error
2139: prologInitProcessor(XML_Parser parser,
2140: const char *s,
2141: const char *end,
2142: const char **nextPtr)
2143: {
2144: enum XML_Error result = initializeEncoding(parser);
2145: if (result != XML_ERROR_NONE)
2146: return result;
2147: processor = prologProcessor;
2148: return prologProcessor(parser, s, end, nextPtr);
2149: }
2150:
2151: static enum XML_Error
2152: prologProcessor(XML_Parser parser,
2153: const char *s,
2154: const char *end,
2155: const char **nextPtr)
2156: {
1.4 ! kahan 2157: const char *next;
! 2158: int tok = XmlPrologTok(encoding, s, end, &next);
! 2159: return doProlog(parser, encoding, s, end, tok, next, nextPtr);
! 2160: }
! 2161:
! 2162: static enum XML_Error
! 2163: doProlog(XML_Parser parser,
! 2164: const ENCODING *enc,
! 2165: const char *s,
! 2166: const char *end,
! 2167: int tok,
! 2168: const char *next,
! 2169: const char **nextPtr)
! 2170: {
! 2171: #ifdef XML_DTD
! 2172: static const XML_Char externalSubsetName[] = { '#' , '\0' };
! 2173: #endif /* XML_DTD */
! 2174:
! 2175: const char **eventPP;
! 2176: const char **eventEndPP;
! 2177: if (enc == encoding) {
! 2178: eventPP = &eventPtr;
! 2179: eventEndPP = &eventEndPtr;
! 2180: }
! 2181: else {
! 2182: eventPP = &(openInternalEntities->internalEventPtr);
! 2183: eventEndPP = &(openInternalEntities->internalEventEndPtr);
! 2184: }
1.1 frystyk 2185: for (;;) {
1.4 ! kahan 2186: int role;
! 2187: *eventPP = s;
! 2188: *eventEndPP = next;
1.1 frystyk 2189: if (tok <= 0) {
2190: if (nextPtr != 0 && tok != XML_TOK_INVALID) {
2191: *nextPtr = s;
2192: return XML_ERROR_NONE;
2193: }
2194: switch (tok) {
2195: case XML_TOK_INVALID:
1.4 ! kahan 2196: *eventPP = next;
1.1 frystyk 2197: return XML_ERROR_INVALID_TOKEN;
2198: case XML_TOK_PARTIAL:
2199: return XML_ERROR_UNCLOSED_TOKEN;
2200: case XML_TOK_PARTIAL_CHAR:
2201: return XML_ERROR_PARTIAL_CHAR;
1.4 ! kahan 2202: case XML_TOK_NONE:
! 2203: #ifdef XML_DTD
! 2204: if (enc != encoding)
! 2205: return XML_ERROR_NONE;
! 2206: if (parentParser) {
! 2207: if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc)
! 2208: == XML_ROLE_ERROR)
! 2209: return XML_ERROR_SYNTAX;
! 2210: hadExternalDoctype = 0;
! 2211: return XML_ERROR_NONE;
! 2212: }
! 2213: #endif /* XML_DTD */
1.1 frystyk 2214: return XML_ERROR_NO_ELEMENTS;
2215: default:
1.4 ! kahan 2216: tok = -tok;
! 2217: next = end;
! 2218: break;
1.1 frystyk 2219: }
2220: }
1.4 ! kahan 2221: role = XmlTokenRole(&prologState, tok, s, next, enc);
! 2222: switch (role) {
1.1 frystyk 2223: case XML_ROLE_XML_DECL:
2224: {
2225: enum XML_Error result = processXmlDecl(parser, 0, s, next);
2226: if (result != XML_ERROR_NONE)
2227: return result;
1.4 ! kahan 2228: enc = encoding;
! 2229: }
! 2230: break;
! 2231: case XML_ROLE_DOCTYPE_NAME:
! 2232: if (startDoctypeDeclHandler) {
! 2233: const XML_Char *name = poolStoreString(&tempPool, enc, s, next);
! 2234: if (!name)
! 2235: return XML_ERROR_NO_MEMORY;
! 2236: startDoctypeDeclHandler(handlerArg, name);
! 2237: poolClear(&tempPool);
1.1 frystyk 2238: }
2239: break;
1.4 ! kahan 2240: #ifdef XML_DTD
! 2241: case XML_ROLE_TEXT_DECL:
! 2242: {
! 2243: enum XML_Error result = processXmlDecl(parser, 1, s, next);
! 2244: if (result != XML_ERROR_NONE)
! 2245: return result;
! 2246: enc = encoding;
! 2247: }
1.1 frystyk 2248: break;
1.4 ! kahan 2249: #endif /* XML_DTD */
1.1 frystyk 2250: case XML_ROLE_DOCTYPE_PUBLIC_ID:
1.4 ! kahan 2251: #ifdef XML_DTD
! 2252: declEntity = (ENTITY *)lookup(&dtd.paramEntities,
! 2253: externalSubsetName,
! 2254: sizeof(ENTITY));
! 2255: if (!declEntity)
! 2256: return XML_ERROR_NO_MEMORY;
! 2257: #endif /* XML_DTD */
! 2258: /* fall through */
1.1 frystyk 2259: case XML_ROLE_ENTITY_PUBLIC_ID:
1.4 ! kahan 2260: if (!XmlIsPublicId(enc, s, next, eventPP))
1.1 frystyk 2261: return XML_ERROR_SYNTAX;
2262: if (declEntity) {
2263: XML_Char *tem = poolStoreString(&dtd.pool,
1.4 ! kahan 2264: enc,
! 2265: s + enc->minBytesPerChar,
! 2266: next - enc->minBytesPerChar);
1.1 frystyk 2267: if (!tem)
2268: return XML_ERROR_NO_MEMORY;
2269: normalizePublicId(tem);
2270: declEntity->publicId = tem;
2271: poolFinish(&dtd.pool);
2272: }
2273: break;
1.4 ! kahan 2274: case XML_ROLE_DOCTYPE_CLOSE:
! 2275: if (dtd.complete && hadExternalDoctype) {
! 2276: dtd.complete = 0;
! 2277: #ifdef XML_DTD
! 2278: if (paramEntityParsing && externalEntityRefHandler) {
! 2279: ENTITY *entity = (ENTITY *)lookup(&dtd.paramEntities,
! 2280: externalSubsetName,
! 2281: 0);
! 2282: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
! 2283: 0,
! 2284: entity->base,
! 2285: entity->systemId,
! 2286: entity->publicId))
! 2287: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
! 2288: }
! 2289: #endif /* XML_DTD */
! 2290: if (!dtd.complete
! 2291: && !dtd.standalone
! 2292: && notStandaloneHandler
! 2293: && !notStandaloneHandler(handlerArg))
! 2294: return XML_ERROR_NOT_STANDALONE;
! 2295: }
! 2296: if (endDoctypeDeclHandler)
! 2297: endDoctypeDeclHandler(handlerArg);
! 2298: break;
1.1 frystyk 2299: case XML_ROLE_INSTANCE_START:
2300: processor = contentProcessor;
2301: return contentProcessor(parser, s, end, nextPtr);
2302: case XML_ROLE_ATTLIST_ELEMENT_NAME:
2303: {
1.4 ! kahan 2304: const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next);
1.1 frystyk 2305: if (!name)
2306: return XML_ERROR_NO_MEMORY;
2307: declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
2308: if (!declElementType)
2309: return XML_ERROR_NO_MEMORY;
2310: if (declElementType->name != name)
2311: poolDiscard(&dtd.pool);
1.3 kahan 2312: else {
1.1 frystyk 2313: poolFinish(&dtd.pool);
1.3 kahan 2314: if (!setElementTypePrefix(parser, declElementType))
2315: return XML_ERROR_NO_MEMORY;
2316: }
1.1 frystyk 2317: break;
2318: }
2319: case XML_ROLE_ATTRIBUTE_NAME:
1.4 ! kahan 2320: declAttributeId = getAttributeId(parser, enc, s, next);
1.1 frystyk 2321: if (!declAttributeId)
2322: return XML_ERROR_NO_MEMORY;
2323: declAttributeIsCdata = 0;
2324: break;
2325: case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
2326: declAttributeIsCdata = 1;
2327: break;
2328: case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
2329: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
2330: if (dtd.complete
2331: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
2332: return XML_ERROR_NO_MEMORY;
2333: break;
2334: case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
2335: case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
2336: {
2337: const XML_Char *attVal;
2338: enum XML_Error result
1.4 ! kahan 2339: = storeAttributeValue(parser, enc, declAttributeIsCdata,
! 2340: s + enc->minBytesPerChar,
! 2341: next - enc->minBytesPerChar,
1.1 frystyk 2342: &dtd.pool);
2343: if (result)
2344: return result;
2345: attVal = poolStart(&dtd.pool);
2346: poolFinish(&dtd.pool);
2347: if (dtd.complete
2348: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
2349: return XML_ERROR_NO_MEMORY;
2350: break;
2351: }
2352: case XML_ROLE_ENTITY_VALUE:
2353: {
1.4 ! kahan 2354: enum XML_Error result = storeEntityValue(parser, enc,
! 2355: s + enc->minBytesPerChar,
! 2356: next - enc->minBytesPerChar);
! 2357: if (declEntity) {
! 2358: declEntity->textPtr = poolStart(&dtd.pool);
! 2359: declEntity->textLen = poolLength(&dtd.pool);
! 2360: poolFinish(&dtd.pool);
! 2361: }
! 2362: else
! 2363: poolDiscard(&dtd.pool);
1.1 frystyk 2364: if (result != XML_ERROR_NONE)
2365: return result;
2366: }
2367: break;
1.4 ! kahan 2368: case XML_ROLE_DOCTYPE_SYSTEM_ID:
! 2369: if (!dtd.standalone
! 2370: #ifdef XML_DTD
! 2371: && !paramEntityParsing
! 2372: #endif /* XML_DTD */
! 2373: && notStandaloneHandler
! 2374: && !notStandaloneHandler(handlerArg))
! 2375: return XML_ERROR_NOT_STANDALONE;
! 2376: hadExternalDoctype = 1;
! 2377: #ifndef XML_DTD
! 2378: break;
! 2379: #else /* XML_DTD */
! 2380: if (!declEntity) {
! 2381: declEntity = (ENTITY *)lookup(&dtd.paramEntities,
! 2382: externalSubsetName,
! 2383: sizeof(ENTITY));
! 2384: if (!declEntity)
! 2385: return XML_ERROR_NO_MEMORY;
! 2386: }
! 2387: /* fall through */
! 2388: #endif /* XML_DTD */
1.1 frystyk 2389: case XML_ROLE_ENTITY_SYSTEM_ID:
2390: if (declEntity) {
1.4 ! kahan 2391: declEntity->systemId = poolStoreString(&dtd.pool, enc,
! 2392: s + enc->minBytesPerChar,
! 2393: next - enc->minBytesPerChar);
1.1 frystyk 2394: if (!declEntity->systemId)
2395: return XML_ERROR_NO_MEMORY;
1.4 ! kahan 2396: declEntity->base = curBase;
1.1 frystyk 2397: poolFinish(&dtd.pool);
2398: }
2399: break;
2400: case XML_ROLE_ENTITY_NOTATION_NAME:
2401: if (declEntity) {
1.4 ! kahan 2402: declEntity->notation = poolStoreString(&dtd.pool, enc, s, next);
1.1 frystyk 2403: if (!declEntity->notation)
2404: return XML_ERROR_NO_MEMORY;
2405: poolFinish(&dtd.pool);
2406: if (unparsedEntityDeclHandler) {
1.4 ! kahan 2407: *eventEndPP = s;
1.1 frystyk 2408: unparsedEntityDeclHandler(handlerArg,
2409: declEntity->name,
2410: declEntity->base,
2411: declEntity->systemId,
2412: declEntity->publicId,
2413: declEntity->notation);
2414: }
2415:
2416: }
2417: break;
2418: case XML_ROLE_GENERAL_ENTITY_NAME:
2419: {
2420: const XML_Char *name;
1.4 ! kahan 2421: if (XmlPredefinedEntityName(enc, s, next)) {
1.1 frystyk 2422: declEntity = 0;
2423: break;
2424: }
1.4 ! kahan 2425: name = poolStoreString(&dtd.pool, enc, s, next);
1.1 frystyk 2426: if (!name)
2427: return XML_ERROR_NO_MEMORY;
2428: if (dtd.complete) {
2429: declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
2430: if (!declEntity)
2431: return XML_ERROR_NO_MEMORY;
2432: if (declEntity->name != name) {
2433: poolDiscard(&dtd.pool);
2434: declEntity = 0;
2435: }
2436: else
2437: poolFinish(&dtd.pool);
2438: }
2439: else {
2440: poolDiscard(&dtd.pool);
2441: declEntity = 0;
2442: }
2443: }
2444: break;
2445: case XML_ROLE_PARAM_ENTITY_NAME:
1.4 ! kahan 2446: #ifdef XML_DTD
! 2447: if (dtd.complete) {
! 2448: const XML_Char *name = poolStoreString(&dtd.pool, enc, s, next);
! 2449: if (!name)
! 2450: return XML_ERROR_NO_MEMORY;
! 2451: declEntity = (ENTITY *)lookup(&dtd.paramEntities, name, sizeof(ENTITY));
! 2452: if (!declEntity)
! 2453: return XML_ERROR_NO_MEMORY;
! 2454: if (declEntity->name != name) {
! 2455: poolDiscard(&dtd.pool);
! 2456: declEntity = 0;
! 2457: }
! 2458: else
! 2459: poolFinish(&dtd.pool);
! 2460: }
! 2461: #else /* not XML_DTD */
1.1 frystyk 2462: declEntity = 0;
1.4 ! kahan 2463: #endif /* not XML_DTD */
1.1 frystyk 2464: break;
2465: case XML_ROLE_NOTATION_NAME:
2466: declNotationPublicId = 0;
2467: declNotationName = 0;
2468: if (notationDeclHandler) {
1.4 ! kahan 2469: declNotationName = poolStoreString(&tempPool, enc, s, next);
1.1 frystyk 2470: if (!declNotationName)
2471: return XML_ERROR_NO_MEMORY;
2472: poolFinish(&tempPool);
2473: }
2474: break;
2475: case XML_ROLE_NOTATION_PUBLIC_ID:
1.4 ! kahan 2476: if (!XmlIsPublicId(enc, s, next, eventPP))
1.1 frystyk 2477: return XML_ERROR_SYNTAX;
2478: if (declNotationName) {
2479: XML_Char *tem = poolStoreString(&tempPool,
1.4 ! kahan 2480: enc,
! 2481: s + enc->minBytesPerChar,
! 2482: next - enc->minBytesPerChar);
1.1 frystyk 2483: if (!tem)
2484: return XML_ERROR_NO_MEMORY;
2485: normalizePublicId(tem);
2486: declNotationPublicId = tem;
2487: poolFinish(&tempPool);
2488: }
2489: break;
2490: case XML_ROLE_NOTATION_SYSTEM_ID:
2491: if (declNotationName && notationDeclHandler) {
2492: const XML_Char *systemId
1.4 ! kahan 2493: = poolStoreString(&tempPool, enc,
! 2494: s + enc->minBytesPerChar,
! 2495: next - enc->minBytesPerChar);
1.1 frystyk 2496: if (!systemId)
2497: return XML_ERROR_NO_MEMORY;
1.4 ! kahan 2498: *eventEndPP = s;
1.1 frystyk 2499: notationDeclHandler(handlerArg,
2500: declNotationName,
1.4 ! kahan 2501: curBase,
1.1 frystyk 2502: systemId,
2503: declNotationPublicId);
2504: }
2505: poolClear(&tempPool);
2506: break;
2507: case XML_ROLE_NOTATION_NO_SYSTEM_ID:
2508: if (declNotationPublicId && notationDeclHandler) {
1.4 ! kahan 2509: *eventEndPP = s;
1.1 frystyk 2510: notationDeclHandler(handlerArg,
2511: declNotationName,
1.4 ! kahan 2512: curBase,
1.1 frystyk 2513: 0,
2514: declNotationPublicId);
2515: }
2516: poolClear(&tempPool);
2517: break;
2518: case XML_ROLE_ERROR:
2519: switch (tok) {
2520: case XML_TOK_PARAM_ENTITY_REF:
2521: return XML_ERROR_PARAM_ENTITY_REF;
2522: case XML_TOK_XML_DECL:
2523: return XML_ERROR_MISPLACED_XML_PI;
2524: default:
2525: return XML_ERROR_SYNTAX;
2526: }
1.4 ! kahan 2527: #ifdef XML_DTD
! 2528: case XML_ROLE_IGNORE_SECT:
! 2529: {
! 2530: enum XML_Error result;
! 2531: if (defaultHandler)
! 2532: reportDefault(parser, enc, s, next);
! 2533: result = doIgnoreSection(parser, enc, &next, end, nextPtr);
! 2534: if (!next) {
! 2535: processor = ignoreSectionProcessor;
! 2536: return result;
! 2537: }
! 2538: }
! 2539: break;
! 2540: #endif /* XML_DTD */
1.1 frystyk 2541: case XML_ROLE_GROUP_OPEN:
2542: if (prologState.level >= groupSize) {
2543: if (groupSize)
2544: groupConnector = realloc(groupConnector, groupSize *= 2);
2545: else
2546: groupConnector = malloc(groupSize = 32);
2547: if (!groupConnector)
2548: return XML_ERROR_NO_MEMORY;
2549: }
2550: groupConnector[prologState.level] = 0;
2551: break;
2552: case XML_ROLE_GROUP_SEQUENCE:
1.4 ! kahan 2553: if (groupConnector[prologState.level] == '|')
1.1 frystyk 2554: return XML_ERROR_SYNTAX;
2555: groupConnector[prologState.level] = ',';
2556: break;
2557: case XML_ROLE_GROUP_CHOICE:
1.4 ! kahan 2558: if (groupConnector[prologState.level] == ',')
1.1 frystyk 2559: return XML_ERROR_SYNTAX;
2560: groupConnector[prologState.level] = '|';
2561: break;
2562: case XML_ROLE_PARAM_ENTITY_REF:
1.4 ! kahan 2563: #ifdef XML_DTD
! 2564: case XML_ROLE_INNER_PARAM_ENTITY_REF:
! 2565: if (paramEntityParsing
! 2566: && (dtd.complete || role == XML_ROLE_INNER_PARAM_ENTITY_REF)) {
! 2567: const XML_Char *name;
! 2568: ENTITY *entity;
! 2569: name = poolStoreString(&dtd.pool, enc,
! 2570: s + enc->minBytesPerChar,
! 2571: next - enc->minBytesPerChar);
! 2572: if (!name)
! 2573: return XML_ERROR_NO_MEMORY;
! 2574: entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
! 2575: poolDiscard(&dtd.pool);
! 2576: if (!entity) {
! 2577: /* FIXME what to do if !dtd.complete? */
! 2578: return XML_ERROR_UNDEFINED_ENTITY;
! 2579: }
! 2580: if (entity->open)
! 2581: return XML_ERROR_RECURSIVE_ENTITY_REF;
! 2582: if (entity->textPtr) {
! 2583: enum XML_Error result;
! 2584: result = processInternalParamEntity(parser, entity);
! 2585: if (result != XML_ERROR_NONE)
! 2586: return result;
! 2587: break;
! 2588: }
! 2589: if (role == XML_ROLE_INNER_PARAM_ENTITY_REF)
! 2590: return XML_ERROR_PARAM_ENTITY_REF;
! 2591: if (externalEntityRefHandler) {
! 2592: dtd.complete = 0;
! 2593: entity->open = 1;
! 2594: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
! 2595: 0,
! 2596: entity->base,
! 2597: entity->systemId,
! 2598: entity->publicId)) {
! 2599: entity->open = 0;
! 2600: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
! 2601: }
! 2602: entity->open = 0;
! 2603: if (dtd.complete)
! 2604: break;
! 2605: }
! 2606: }
! 2607: #endif /* XML_DTD */
1.3 kahan 2608: if (!dtd.standalone
2609: && notStandaloneHandler
2610: && !notStandaloneHandler(handlerArg))
2611: return XML_ERROR_NOT_STANDALONE;
1.1 frystyk 2612: dtd.complete = 0;
1.4 ! kahan 2613: if (defaultHandler)
! 2614: reportDefault(parser, enc, s, next);
1.1 frystyk 2615: break;
2616: case XML_ROLE_NONE:
2617: switch (tok) {
2618: case XML_TOK_PI:
1.4 ! kahan 2619: if (!reportProcessingInstruction(parser, enc, s, next))
1.1 frystyk 2620: return XML_ERROR_NO_MEMORY;
2621: break;
1.3 kahan 2622: case XML_TOK_COMMENT:
1.4 ! kahan 2623: if (!reportComment(parser, enc, s, next))
1.3 kahan 2624: return XML_ERROR_NO_MEMORY;
2625: break;
1.1 frystyk 2626: }
2627: break;
2628: }
2629: if (defaultHandler) {
2630: switch (tok) {
2631: case XML_TOK_PI:
1.3 kahan 2632: case XML_TOK_COMMENT:
1.1 frystyk 2633: case XML_TOK_BOM:
2634: case XML_TOK_XML_DECL:
1.4 ! kahan 2635: #ifdef XML_DTD
! 2636: case XML_TOK_IGNORE_SECT:
! 2637: #endif /* XML_DTD */
! 2638: case XML_TOK_PARAM_ENTITY_REF:
1.1 frystyk 2639: break;
2640: default:
1.4 ! kahan 2641: #ifdef XML_DTD
! 2642: if (role != XML_ROLE_IGNORE_SECT)
! 2643: #endif /* XML_DTD */
! 2644: reportDefault(parser, enc, s, next);
1.1 frystyk 2645: }
2646: }
2647: s = next;
1.4 ! kahan 2648: tok = XmlPrologTok(enc, s, end, &next);
1.1 frystyk 2649: }
2650: /* not reached */
2651: }
2652:
2653: static
2654: enum XML_Error epilogProcessor(XML_Parser parser,
2655: const char *s,
2656: const char *end,
2657: const char **nextPtr)
2658: {
2659: processor = epilogProcessor;
2660: eventPtr = s;
2661: for (;;) {
2662: const char *next;
2663: int tok = XmlPrologTok(encoding, s, end, &next);
2664: eventEndPtr = next;
2665: switch (tok) {
1.4 ! kahan 2666: case -XML_TOK_PROLOG_S:
1.1 frystyk 2667: if (defaultHandler) {
2668: eventEndPtr = end;
2669: reportDefault(parser, encoding, s, end);
2670: }
2671: /* fall through */
2672: case XML_TOK_NONE:
2673: if (nextPtr)
2674: *nextPtr = end;
2675: return XML_ERROR_NONE;
2676: case XML_TOK_PROLOG_S:
2677: if (defaultHandler)
2678: reportDefault(parser, encoding, s, next);
2679: break;
2680: case XML_TOK_PI:
2681: if (!reportProcessingInstruction(parser, encoding, s, next))
2682: return XML_ERROR_NO_MEMORY;
2683: break;
1.3 kahan 2684: case XML_TOK_COMMENT:
2685: if (!reportComment(parser, encoding, s, next))
2686: return XML_ERROR_NO_MEMORY;
2687: break;
1.1 frystyk 2688: case XML_TOK_INVALID:
2689: eventPtr = next;
2690: return XML_ERROR_INVALID_TOKEN;
2691: case XML_TOK_PARTIAL:
2692: if (nextPtr) {
2693: *nextPtr = s;
2694: return XML_ERROR_NONE;
2695: }
2696: return XML_ERROR_UNCLOSED_TOKEN;
2697: case XML_TOK_PARTIAL_CHAR:
2698: if (nextPtr) {
2699: *nextPtr = s;
2700: return XML_ERROR_NONE;
2701: }
2702: return XML_ERROR_PARTIAL_CHAR;
2703: default:
2704: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
2705: }
2706: eventPtr = s = next;
2707: }
2708: }
2709:
1.4 ! kahan 2710: #ifdef XML_DTD
! 2711:
! 2712: static enum XML_Error
! 2713: processInternalParamEntity(XML_Parser parser, ENTITY *entity)
! 2714: {
! 2715: const char *s, *end, *next;
! 2716: int tok;
! 2717: enum XML_Error result;
! 2718: OPEN_INTERNAL_ENTITY openEntity;
! 2719: entity->open = 1;
! 2720: openEntity.next = openInternalEntities;
! 2721: openInternalEntities = &openEntity;
! 2722: openEntity.entity = entity;
! 2723: openEntity.internalEventPtr = 0;
! 2724: openEntity.internalEventEndPtr = 0;
! 2725: s = (char *)entity->textPtr;
! 2726: end = (char *)(entity->textPtr + entity->textLen);
! 2727: tok = XmlPrologTok(internalEncoding, s, end, &next);
! 2728: result = doProlog(parser, internalEncoding, s, end, tok, next, 0);
! 2729: entity->open = 0;
! 2730: openInternalEntities = openEntity.next;
! 2731: return result;
! 2732: }
! 2733:
! 2734: #endif /* XML_DTD */
! 2735:
1.1 frystyk 2736: static
2737: enum XML_Error errorProcessor(XML_Parser parser,
2738: const char *s,
2739: const char *end,
2740: const char **nextPtr)
2741: {
2742: return errorCode;
2743: }
2744:
2745: static enum XML_Error
2746: storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2747: const char *ptr, const char *end,
2748: STRING_POOL *pool)
2749: {
2750: enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
2751: if (result)
2752: return result;
1.3 kahan 2753: if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
1.1 frystyk 2754: poolChop(pool);
2755: if (!poolAppendChar(pool, XML_T('\0')))
2756: return XML_ERROR_NO_MEMORY;
2757: return XML_ERROR_NONE;
2758: }
2759:
2760: static enum XML_Error
2761: appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2762: const char *ptr, const char *end,
2763: STRING_POOL *pool)
2764: {
2765: for (;;) {
2766: const char *next;
2767: int tok = XmlAttributeValueTok(enc, ptr, end, &next);
2768: switch (tok) {
2769: case XML_TOK_NONE:
2770: return XML_ERROR_NONE;
2771: case XML_TOK_INVALID:
2772: if (enc == encoding)
2773: eventPtr = next;
2774: return XML_ERROR_INVALID_TOKEN;
2775: case XML_TOK_PARTIAL:
2776: if (enc == encoding)
2777: eventPtr = ptr;
2778: return XML_ERROR_INVALID_TOKEN;
2779: case XML_TOK_CHAR_REF:
2780: {
2781: XML_Char buf[XML_ENCODE_MAX];
2782: int i;
2783: int n = XmlCharRefNumber(enc, ptr);
2784: if (n < 0) {
2785: if (enc == encoding)
2786: eventPtr = ptr;
2787: return XML_ERROR_BAD_CHAR_REF;
2788: }
2789: if (!isCdata
2790: && n == 0x20 /* space */
1.3 kahan 2791: && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
1.1 frystyk 2792: break;
2793: n = XmlEncode(n, (ICHAR *)buf);
2794: if (!n) {
2795: if (enc == encoding)
2796: eventPtr = ptr;
2797: return XML_ERROR_BAD_CHAR_REF;
2798: }
2799: for (i = 0; i < n; i++) {
2800: if (!poolAppendChar(pool, buf[i]))
2801: return XML_ERROR_NO_MEMORY;
2802: }
2803: }
2804: break;
2805: case XML_TOK_DATA_CHARS:
2806: if (!poolAppend(pool, enc, ptr, next))
2807: return XML_ERROR_NO_MEMORY;
2808: break;
2809: break;
2810: case XML_TOK_TRAILING_CR:
2811: next = ptr + enc->minBytesPerChar;
2812: /* fall through */
2813: case XML_TOK_ATTRIBUTE_VALUE_S:
2814: case XML_TOK_DATA_NEWLINE:
1.3 kahan 2815: if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
1.1 frystyk 2816: break;
1.3 kahan 2817: if (!poolAppendChar(pool, 0x20))
1.1 frystyk 2818: return XML_ERROR_NO_MEMORY;
2819: break;
2820: case XML_TOK_ENTITY_REF:
2821: {
2822: const XML_Char *name;
2823: ENTITY *entity;
2824: XML_Char ch = XmlPredefinedEntityName(enc,
2825: ptr + enc->minBytesPerChar,
2826: next - enc->minBytesPerChar);
2827: if (ch) {
2828: if (!poolAppendChar(pool, ch))
2829: return XML_ERROR_NO_MEMORY;
2830: break;
2831: }
2832: name = poolStoreString(&temp2Pool, enc,
2833: ptr + enc->minBytesPerChar,
2834: next - enc->minBytesPerChar);
2835: if (!name)
2836: return XML_ERROR_NO_MEMORY;
2837: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
2838: poolDiscard(&temp2Pool);
2839: if (!entity) {
2840: if (dtd.complete) {
2841: if (enc == encoding)
2842: eventPtr = ptr;
2843: return XML_ERROR_UNDEFINED_ENTITY;
2844: }
2845: }
2846: else if (entity->open) {
2847: if (enc == encoding)
2848: eventPtr = ptr;
2849: return XML_ERROR_RECURSIVE_ENTITY_REF;
2850: }
2851: else if (entity->notation) {
2852: if (enc == encoding)
2853: eventPtr = ptr;
2854: return XML_ERROR_BINARY_ENTITY_REF;
2855: }
2856: else if (!entity->textPtr) {
2857: if (enc == encoding)
2858: eventPtr = ptr;
2859: return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
2860: }
2861: else {
2862: enum XML_Error result;
2863: const XML_Char *textEnd = entity->textPtr + entity->textLen;
2864: entity->open = 1;
1.4 ! kahan 2865: result = appendAttributeValue(parser, internalEncoding, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
1.1 frystyk 2866: entity->open = 0;
2867: if (result)
2868: return result;
2869: }
2870: }
2871: break;
2872: default:
2873: abort();
2874: }
2875: ptr = next;
2876: }
2877: /* not reached */
2878: }
2879:
2880: static
2881: enum XML_Error storeEntityValue(XML_Parser parser,
1.4 ! kahan 2882: const ENCODING *enc,
1.1 frystyk 2883: const char *entityTextPtr,
2884: const char *entityTextEnd)
2885: {
2886: STRING_POOL *pool = &(dtd.pool);
2887: for (;;) {
2888: const char *next;
1.4 ! kahan 2889: int tok = XmlEntityValueTok(enc, entityTextPtr, entityTextEnd, &next);
1.1 frystyk 2890: switch (tok) {
2891: case XML_TOK_PARAM_ENTITY_REF:
1.4 ! kahan 2892: #ifdef XML_DTD
! 2893: if (parentParser || enc != encoding) {
! 2894: enum XML_Error result;
! 2895: const XML_Char *name;
! 2896: ENTITY *entity;
! 2897: name = poolStoreString(&tempPool, enc,
! 2898: entityTextPtr + enc->minBytesPerChar,
! 2899: next - enc->minBytesPerChar);
! 2900: if (!name)
! 2901: return XML_ERROR_NO_MEMORY;
! 2902: entity = (ENTITY *)lookup(&dtd.paramEntities, name, 0);
! 2903: poolDiscard(&tempPool);
! 2904: if (!entity) {
! 2905: if (enc == encoding)
! 2906: eventPtr = entityTextPtr;
! 2907: return XML_ERROR_UNDEFINED_ENTITY;
! 2908: }
! 2909: if (entity->open) {
! 2910: if (enc == encoding)
! 2911: eventPtr = entityTextPtr;
! 2912: return XML_ERROR_RECURSIVE_ENTITY_REF;
! 2913: }
! 2914: if (entity->systemId) {
! 2915: if (enc == encoding)
! 2916: eventPtr = entityTextPtr;
! 2917: return XML_ERROR_PARAM_ENTITY_REF;
! 2918: }
! 2919: entity->open = 1;
! 2920: result = storeEntityValue(parser,
! 2921: internalEncoding,
! 2922: (char *)entity->textPtr,
! 2923: (char *)(entity->textPtr + entity->textLen));
! 2924: entity->open = 0;
! 2925: if (result)
! 2926: return result;
! 2927: break;
! 2928: }
! 2929: #endif /* XML_DTD */
1.1 frystyk 2930: eventPtr = entityTextPtr;
2931: return XML_ERROR_SYNTAX;
2932: case XML_TOK_NONE:
2933: return XML_ERROR_NONE;
2934: case XML_TOK_ENTITY_REF:
2935: case XML_TOK_DATA_CHARS:
1.4 ! kahan 2936: if (!poolAppend(pool, enc, entityTextPtr, next))
1.1 frystyk 2937: return XML_ERROR_NO_MEMORY;
2938: break;
2939: case XML_TOK_TRAILING_CR:
1.4 ! kahan 2940: next = entityTextPtr + enc->minBytesPerChar;
1.1 frystyk 2941: /* fall through */
2942: case XML_TOK_DATA_NEWLINE:
2943: if (pool->end == pool->ptr && !poolGrow(pool))
2944: return XML_ERROR_NO_MEMORY;
1.3 kahan 2945: *(pool->ptr)++ = 0xA;
1.1 frystyk 2946: break;
2947: case XML_TOK_CHAR_REF:
2948: {
2949: XML_Char buf[XML_ENCODE_MAX];
2950: int i;
1.4 ! kahan 2951: int n = XmlCharRefNumber(enc, entityTextPtr);
1.1 frystyk 2952: if (n < 0) {
1.4 ! kahan 2953: if (enc == encoding)
! 2954: eventPtr = entityTextPtr;
1.1 frystyk 2955: return XML_ERROR_BAD_CHAR_REF;
2956: }
2957: n = XmlEncode(n, (ICHAR *)buf);
2958: if (!n) {
1.4 ! kahan 2959: if (enc == encoding)
! 2960: eventPtr = entityTextPtr;
1.1 frystyk 2961: return XML_ERROR_BAD_CHAR_REF;
2962: }
2963: for (i = 0; i < n; i++) {
2964: if (pool->end == pool->ptr && !poolGrow(pool))
2965: return XML_ERROR_NO_MEMORY;
2966: *(pool->ptr)++ = buf[i];
2967: }
2968: }
2969: break;
2970: case XML_TOK_PARTIAL:
1.4 ! kahan 2971: if (enc == encoding)
! 2972: eventPtr = entityTextPtr;
1.1 frystyk 2973: return XML_ERROR_INVALID_TOKEN;
2974: case XML_TOK_INVALID:
1.4 ! kahan 2975: if (enc == encoding)
! 2976: eventPtr = next;
1.1 frystyk 2977: return XML_ERROR_INVALID_TOKEN;
2978: default:
2979: abort();
2980: }
2981: entityTextPtr = next;
2982: }
2983: /* not reached */
2984: }
2985:
2986: static void
2987: normalizeLines(XML_Char *s)
2988: {
2989: XML_Char *p;
2990: for (;; s++) {
2991: if (*s == XML_T('\0'))
2992: return;
1.3 kahan 2993: if (*s == 0xD)
1.1 frystyk 2994: break;
2995: }
2996: p = s;
2997: do {
1.3 kahan 2998: if (*s == 0xD) {
2999: *p++ = 0xA;
3000: if (*++s == 0xA)
1.1 frystyk 3001: s++;
3002: }
3003: else
3004: *p++ = *s++;
3005: } while (*s);
3006: *p = XML_T('\0');
3007: }
3008:
3009: static int
3010: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3011: {
3012: const XML_Char *target;
3013: XML_Char *data;
3014: const char *tem;
3015: if (!processingInstructionHandler) {
3016: if (defaultHandler)
3017: reportDefault(parser, enc, start, end);
3018: return 1;
3019: }
3020: start += enc->minBytesPerChar * 2;
3021: tem = start + XmlNameLength(enc, start);
3022: target = poolStoreString(&tempPool, enc, start, tem);
3023: if (!target)
3024: return 0;
3025: poolFinish(&tempPool);
3026: data = poolStoreString(&tempPool, enc,
3027: XmlSkipS(enc, tem),
3028: end - enc->minBytesPerChar*2);
3029: if (!data)
3030: return 0;
3031: normalizeLines(data);
3032: processingInstructionHandler(handlerArg, target, data);
3033: poolClear(&tempPool);
3034: return 1;
3035: }
3036:
1.3 kahan 3037: static int
3038: reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3039: {
3040: XML_Char *data;
3041: if (!commentHandler) {
3042: if (defaultHandler)
3043: reportDefault(parser, enc, start, end);
3044: return 1;
3045: }
3046: data = poolStoreString(&tempPool,
3047: enc,
3048: start + enc->minBytesPerChar * 4,
3049: end - enc->minBytesPerChar * 3);
3050: if (!data)
3051: return 0;
3052: normalizeLines(data);
3053: commentHandler(handlerArg, data);
3054: poolClear(&tempPool);
3055: return 1;
3056: }
3057:
1.1 frystyk 3058: static void
3059: reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
3060: {
3061: if (MUST_CONVERT(enc, s)) {
1.3 kahan 3062: const char **eventPP;
3063: const char **eventEndPP;
3064: if (enc == encoding) {
3065: eventPP = &eventPtr;
3066: eventEndPP = &eventEndPtr;
3067: }
3068: else {
3069: eventPP = &(openInternalEntities->internalEventPtr);
3070: eventEndPP = &(openInternalEntities->internalEventEndPtr);
3071: }
3072: do {
1.1 frystyk 3073: ICHAR *dataPtr = (ICHAR *)dataBuf;
3074: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1.3 kahan 3075: *eventEndPP = s;
3076: defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
3077: *eventPP = s;
3078: } while (s != end);
1.1 frystyk 3079: }
3080: else
3081: defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
3082: }
3083:
3084:
3085: static int
3086: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
3087: {
3088: DEFAULT_ATTRIBUTE *att;
1.4 ! kahan 3089: if (value) {
! 3090: /* The handling of default attributes gets messed up if we have
! 3091: a default which duplicates a non-default. */
! 3092: int i;
! 3093: for (i = 0; i < type->nDefaultAtts; i++)
! 3094: if (attId == type->defaultAtts[i].id)
! 3095: return 1;
! 3096: }
1.1 frystyk 3097: if (type->nDefaultAtts == type->allocDefaultAtts) {
3098: if (type->allocDefaultAtts == 0) {
3099: type->allocDefaultAtts = 8;
3100: type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3101: }
3102: else {
3103: type->allocDefaultAtts *= 2;
3104: type->defaultAtts = realloc(type->defaultAtts,
3105: type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
3106: }
3107: if (!type->defaultAtts)
3108: return 0;
3109: }
3110: att = type->defaultAtts + type->nDefaultAtts;
3111: att->id = attId;
3112: att->value = value;
3113: att->isCdata = isCdata;
3114: if (!isCdata)
3115: attId->maybeTokenized = 1;
3116: type->nDefaultAtts += 1;
3117: return 1;
3118: }
3119:
1.3 kahan 3120: static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
3121: {
3122: const XML_Char *name;
3123: for (name = elementType->name; *name; name++) {
3124: if (*name == XML_T(':')) {
3125: PREFIX *prefix;
3126: const XML_Char *s;
3127: for (s = elementType->name; s != name; s++) {
3128: if (!poolAppendChar(&dtd.pool, *s))
3129: return 0;
3130: }
3131: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3132: return 0;
3133: prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3134: if (!prefix)
3135: return 0;
3136: if (prefix->name == poolStart(&dtd.pool))
3137: poolFinish(&dtd.pool);
3138: else
3139: poolDiscard(&dtd.pool);
3140: elementType->prefix = prefix;
3141:
3142: }
3143: }
3144: return 1;
3145: }
3146:
1.1 frystyk 3147: static ATTRIBUTE_ID *
3148: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
3149: {
3150: ATTRIBUTE_ID *id;
3151: const XML_Char *name;
3152: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3153: return 0;
3154: name = poolStoreString(&dtd.pool, enc, start, end);
3155: if (!name)
3156: return 0;
3157: ++name;
3158: id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
3159: if (!id)
3160: return 0;
3161: if (id->name != name)
3162: poolDiscard(&dtd.pool);
1.3 kahan 3163: else {
1.1 frystyk 3164: poolFinish(&dtd.pool);
1.3 kahan 3165: if (!ns)
3166: ;
3167: else if (name[0] == 'x'
3168: && name[1] == 'm'
3169: && name[2] == 'l'
3170: && name[3] == 'n'
3171: && name[4] == 's'
3172: && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) {
3173: if (name[5] == '\0')
3174: id->prefix = &dtd.defaultPrefix;
3175: else
3176: id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX));
3177: id->xmlns = 1;
3178: }
3179: else {
3180: int i;
3181: for (i = 0; name[i]; i++) {
3182: if (name[i] == XML_T(':')) {
3183: int j;
3184: for (j = 0; j < i; j++) {
3185: if (!poolAppendChar(&dtd.pool, name[j]))
3186: return 0;
3187: }
3188: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
3189: return 0;
3190: id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
3191: if (id->prefix->name == poolStart(&dtd.pool))
3192: poolFinish(&dtd.pool);
3193: else
3194: poolDiscard(&dtd.pool);
3195: break;
3196: }
3197: }
3198: }
3199: }
1.1 frystyk 3200: return id;
3201: }
3202:
1.3 kahan 3203: #define CONTEXT_SEP XML_T('\f')
3204:
1.1 frystyk 3205: static
1.3 kahan 3206: const XML_Char *getContext(XML_Parser parser)
1.1 frystyk 3207: {
3208: HASH_TABLE_ITER iter;
1.3 kahan 3209: int needSep = 0;
3210:
3211: if (dtd.defaultPrefix.binding) {
3212: int i;
3213: int len;
3214: if (!poolAppendChar(&tempPool, XML_T('=')))
3215: return 0;
3216: len = dtd.defaultPrefix.binding->uriLen;
3217: if (namespaceSeparator != XML_T('\0'))
3218: len--;
3219: for (i = 0; i < len; i++)
3220: if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i]))
3221: return 0;
3222: needSep = 1;
3223: }
3224:
3225: hashTableIterInit(&iter, &(dtd.prefixes));
3226: for (;;) {
3227: int i;
3228: int len;
3229: const XML_Char *s;
3230: PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
3231: if (!prefix)
3232: break;
3233: if (!prefix->binding)
3234: continue;
3235: if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
3236: return 0;
3237: for (s = prefix->name; *s; s++)
3238: if (!poolAppendChar(&tempPool, *s))
3239: return 0;
3240: if (!poolAppendChar(&tempPool, XML_T('=')))
3241: return 0;
3242: len = prefix->binding->uriLen;
3243: if (namespaceSeparator != XML_T('\0'))
3244: len--;
3245: for (i = 0; i < len; i++)
3246: if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
3247: return 0;
3248: needSep = 1;
3249: }
3250:
1.1 frystyk 3251:
3252: hashTableIterInit(&iter, &(dtd.generalEntities));
3253: for (;;) {
3254: const XML_Char *s;
3255: ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
3256: if (!e)
3257: break;
3258: if (!e->open)
3259: continue;
1.3 kahan 3260: if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
1.1 frystyk 3261: return 0;
3262: for (s = e->name; *s; s++)
3263: if (!poolAppendChar(&tempPool, *s))
3264: return 0;
1.3 kahan 3265: needSep = 1;
1.1 frystyk 3266: }
3267:
3268: if (!poolAppendChar(&tempPool, XML_T('\0')))
3269: return 0;
3270: return tempPool.start;
3271: }
3272:
3273: static
1.3 kahan 3274: int setContext(XML_Parser parser, const XML_Char *context)
1.1 frystyk 3275: {
1.3 kahan 3276: const XML_Char *s = context;
3277:
3278: while (*context != XML_T('\0')) {
3279: if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
1.1 frystyk 3280: ENTITY *e;
3281: if (!poolAppendChar(&tempPool, XML_T('\0')))
3282: return 0;
3283: e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
3284: if (e)
3285: e->open = 1;
1.3 kahan 3286: if (*s != XML_T('\0'))
1.1 frystyk 3287: s++;
1.3 kahan 3288: context = s;
3289: poolDiscard(&tempPool);
3290: }
3291: else if (*s == '=') {
3292: PREFIX *prefix;
3293: if (poolLength(&tempPool) == 0)
3294: prefix = &dtd.defaultPrefix;
3295: else {
3296: if (!poolAppendChar(&tempPool, XML_T('\0')))
3297: return 0;
3298: prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX));
3299: if (!prefix)
3300: return 0;
3301: if (prefix->name == poolStart(&tempPool))
3302: poolFinish(&tempPool);
3303: else
3304: poolDiscard(&tempPool);
3305: }
3306: for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++)
3307: if (!poolAppendChar(&tempPool, *context))
3308: return 0;
3309: if (!poolAppendChar(&tempPool, XML_T('\0')))
3310: return 0;
3311: if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings))
3312: return 0;
1.1 frystyk 3313: poolDiscard(&tempPool);
1.3 kahan 3314: if (*context != XML_T('\0'))
3315: ++context;
3316: s = context;
1.1 frystyk 3317: }
3318: else {
3319: if (!poolAppendChar(&tempPool, *s))
3320: return 0;
3321: s++;
3322: }
3323: }
3324: return 1;
3325: }
3326:
3327:
3328: static
3329: void normalizePublicId(XML_Char *publicId)
3330: {
3331: XML_Char *p = publicId;
3332: XML_Char *s;
3333: for (s = publicId; *s; s++) {
3334: switch (*s) {
1.3 kahan 3335: case 0x20:
3336: case 0xD:
3337: case 0xA:
3338: if (p != publicId && p[-1] != 0x20)
3339: *p++ = 0x20;
1.1 frystyk 3340: break;
3341: default:
3342: *p++ = *s;
3343: }
3344: }
1.3 kahan 3345: if (p != publicId && p[-1] == 0x20)
1.1 frystyk 3346: --p;
3347: *p = XML_T('\0');
3348: }
3349:
3350: static int dtdInit(DTD *p)
3351: {
3352: poolInit(&(p->pool));
3353: hashTableInit(&(p->generalEntities));
3354: hashTableInit(&(p->elementTypes));
3355: hashTableInit(&(p->attributeIds));
1.3 kahan 3356: hashTableInit(&(p->prefixes));
1.1 frystyk 3357: p->complete = 1;
3358: p->standalone = 0;
1.4 ! kahan 3359: #ifdef XML_DTD
! 3360: hashTableInit(&(p->paramEntities));
! 3361: #endif /* XML_DTD */
1.3 kahan 3362: p->defaultPrefix.name = 0;
3363: p->defaultPrefix.binding = 0;
1.1 frystyk 3364: return 1;
3365: }
3366:
1.4 ! kahan 3367: #ifdef XML_DTD
! 3368:
! 3369: static void dtdSwap(DTD *p1, DTD *p2)
! 3370: {
! 3371: DTD tem;
! 3372: memcpy(&tem, p1, sizeof(DTD));
! 3373: memcpy(p1, p2, sizeof(DTD));
! 3374: memcpy(p2, &tem, sizeof(DTD));
! 3375: }
! 3376:
! 3377: #endif /* XML_DTD */
! 3378:
1.1 frystyk 3379: static void dtdDestroy(DTD *p)
3380: {
3381: HASH_TABLE_ITER iter;
3382: hashTableIterInit(&iter, &(p->elementTypes));
3383: for (;;) {
3384: ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3385: if (!e)
3386: break;
3387: if (e->allocDefaultAtts != 0)
3388: free(e->defaultAtts);
3389: }
3390: hashTableDestroy(&(p->generalEntities));
1.4 ! kahan 3391: #ifdef XML_DTD
! 3392: hashTableDestroy(&(p->paramEntities));
! 3393: #endif /* XML_DTD */
1.1 frystyk 3394: hashTableDestroy(&(p->elementTypes));
3395: hashTableDestroy(&(p->attributeIds));
1.3 kahan 3396: hashTableDestroy(&(p->prefixes));
1.1 frystyk 3397: poolDestroy(&(p->pool));
3398: }
3399:
3400: /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
3401: The new DTD has already been initialized. */
3402:
3403: static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
3404: {
3405: HASH_TABLE_ITER iter;
3406:
1.3 kahan 3407: /* Copy the prefix table. */
3408:
3409: hashTableIterInit(&iter, &(oldDtd->prefixes));
3410: for (;;) {
3411: const XML_Char *name;
3412: const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
3413: if (!oldP)
3414: break;
3415: name = poolCopyString(&(newDtd->pool), oldP->name);
3416: if (!name)
3417: return 0;
3418: if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX)))
3419: return 0;
3420: }
3421:
1.1 frystyk 3422: hashTableIterInit(&iter, &(oldDtd->attributeIds));
3423:
3424: /* Copy the attribute id table. */
3425:
3426: for (;;) {
3427: ATTRIBUTE_ID *newA;
3428: const XML_Char *name;
3429: const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
3430:
3431: if (!oldA)
3432: break;
3433: /* Remember to allocate the scratch byte before the name. */
3434: if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
3435: return 0;
3436: name = poolCopyString(&(newDtd->pool), oldA->name);
3437: if (!name)
3438: return 0;
3439: ++name;
3440: newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
3441: if (!newA)
3442: return 0;
3443: newA->maybeTokenized = oldA->maybeTokenized;
1.3 kahan 3444: if (oldA->prefix) {
3445: newA->xmlns = oldA->xmlns;
3446: if (oldA->prefix == &oldDtd->defaultPrefix)
3447: newA->prefix = &newDtd->defaultPrefix;
3448: else
3449: newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0);
3450: }
1.1 frystyk 3451: }
3452:
3453: /* Copy the element type table. */
3454:
3455: hashTableIterInit(&iter, &(oldDtd->elementTypes));
3456:
3457: for (;;) {
3458: int i;
3459: ELEMENT_TYPE *newE;
3460: const XML_Char *name;
3461: const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3462: if (!oldE)
3463: break;
3464: name = poolCopyString(&(newDtd->pool), oldE->name);
3465: if (!name)
3466: return 0;
3467: newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
3468: if (!newE)
3469: return 0;
1.3 kahan 3470: if (oldE->nDefaultAtts) {
3471: newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
3472: if (!newE->defaultAtts)
3473: return 0;
3474: }
1.1 frystyk 3475: newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
1.3 kahan 3476: if (oldE->prefix)
3477: newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0);
1.1 frystyk 3478: for (i = 0; i < newE->nDefaultAtts; i++) {
3479: newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
3480: newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
3481: if (oldE->defaultAtts[i].value) {
3482: newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
3483: if (!newE->defaultAtts[i].value)
3484: return 0;
3485: }
3486: else
3487: newE->defaultAtts[i].value = 0;
3488: }
3489: }
3490:
1.4 ! kahan 3491: /* Copy the entity tables. */
! 3492: if (!copyEntityTable(&(newDtd->generalEntities),
! 3493: &(newDtd->pool),
! 3494: &(oldDtd->generalEntities)))
! 3495: return 0;
1.1 frystyk 3496:
1.4 ! kahan 3497: #ifdef XML_DTD
! 3498: if (!copyEntityTable(&(newDtd->paramEntities),
! 3499: &(newDtd->pool),
! 3500: &(oldDtd->paramEntities)))
! 3501: return 0;
! 3502: #endif /* XML_DTD */
! 3503:
! 3504: newDtd->complete = oldDtd->complete;
! 3505: newDtd->standalone = oldDtd->standalone;
! 3506: return 1;
! 3507: }
! 3508:
! 3509: static int copyEntityTable(HASH_TABLE *newTable,
! 3510: STRING_POOL *newPool,
! 3511: const HASH_TABLE *oldTable)
! 3512: {
! 3513: HASH_TABLE_ITER iter;
! 3514: const XML_Char *cachedOldBase = 0;
! 3515: const XML_Char *cachedNewBase = 0;
! 3516:
! 3517: hashTableIterInit(&iter, oldTable);
1.1 frystyk 3518:
3519: for (;;) {
3520: ENTITY *newE;
3521: const XML_Char *name;
3522: const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
3523: if (!oldE)
3524: break;
1.4 ! kahan 3525: name = poolCopyString(newPool, oldE->name);
1.1 frystyk 3526: if (!name)
3527: return 0;
1.4 ! kahan 3528: newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY));
1.1 frystyk 3529: if (!newE)
3530: return 0;
3531: if (oldE->systemId) {
1.4 ! kahan 3532: const XML_Char *tem = poolCopyString(newPool, oldE->systemId);
1.1 frystyk 3533: if (!tem)
3534: return 0;
3535: newE->systemId = tem;
3536: if (oldE->base) {
1.4 ! kahan 3537: if (oldE->base == cachedOldBase)
! 3538: newE->base = cachedNewBase;
! 3539: else {
! 3540: cachedOldBase = oldE->base;
! 3541: tem = poolCopyString(newPool, cachedOldBase);
! 3542: if (!tem)
! 3543: return 0;
! 3544: cachedNewBase = newE->base = tem;
! 3545: }
1.1 frystyk 3546: }
3547: }
3548: else {
1.4 ! kahan 3549: const XML_Char *tem = poolCopyStringN(newPool, oldE->textPtr, oldE->textLen);
1.1 frystyk 3550: if (!tem)
3551: return 0;
3552: newE->textPtr = tem;
3553: newE->textLen = oldE->textLen;
3554: }
3555: if (oldE->notation) {
1.4 ! kahan 3556: const XML_Char *tem = poolCopyString(newPool, oldE->notation);
1.1 frystyk 3557: if (!tem)
3558: return 0;
3559: newE->notation = tem;
3560: }
3561: }
3562: return 1;
3563: }
3564:
3565: static
3566: void poolInit(STRING_POOL *pool)
3567: {
3568: pool->blocks = 0;
3569: pool->freeBlocks = 0;
3570: pool->start = 0;
3571: pool->ptr = 0;
3572: pool->end = 0;
3573: }
3574:
3575: static
3576: void poolClear(STRING_POOL *pool)
3577: {
3578: if (!pool->freeBlocks)
3579: pool->freeBlocks = pool->blocks;
3580: else {
3581: BLOCK *p = pool->blocks;
3582: while (p) {
3583: BLOCK *tem = p->next;
3584: p->next = pool->freeBlocks;
3585: pool->freeBlocks = p;
3586: p = tem;
3587: }
3588: }
3589: pool->blocks = 0;
3590: pool->start = 0;
3591: pool->ptr = 0;
3592: pool->end = 0;
3593: }
3594:
3595: static
3596: void poolDestroy(STRING_POOL *pool)
3597: {
3598: BLOCK *p = pool->blocks;
3599: while (p) {
3600: BLOCK *tem = p->next;
3601: free(p);
3602: p = tem;
3603: }
3604: pool->blocks = 0;
3605: p = pool->freeBlocks;
3606: while (p) {
3607: BLOCK *tem = p->next;
3608: free(p);
3609: p = tem;
3610: }
3611: pool->freeBlocks = 0;
3612: pool->ptr = 0;
3613: pool->start = 0;
3614: pool->end = 0;
3615: }
3616:
3617: static
3618: XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
3619: const char *ptr, const char *end)
3620: {
3621: if (!pool->ptr && !poolGrow(pool))
3622: return 0;
3623: for (;;) {
3624: XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
3625: if (ptr == end)
3626: break;
3627: if (!poolGrow(pool))
3628: return 0;
3629: }
3630: return pool->start;
3631: }
3632:
3633: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
3634: {
3635: do {
3636: if (!poolAppendChar(pool, *s))
3637: return 0;
3638: } while (*s++);
3639: s = pool->start;
3640: poolFinish(pool);
3641: return s;
3642: }
3643:
3644: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
3645: {
3646: if (!pool->ptr && !poolGrow(pool))
3647: return 0;
3648: for (; n > 0; --n, s++) {
3649: if (!poolAppendChar(pool, *s))
3650: return 0;
3651:
3652: }
3653: s = pool->start;
3654: poolFinish(pool);
3655: return s;
3656: }
3657:
3658: static
3659: XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
3660: const char *ptr, const char *end)
3661: {
3662: if (!poolAppend(pool, enc, ptr, end))
3663: return 0;
3664: if (pool->ptr == pool->end && !poolGrow(pool))
3665: return 0;
3666: *(pool->ptr)++ = 0;
3667: return pool->start;
3668: }
3669:
3670: static
3671: int poolGrow(STRING_POOL *pool)
3672: {
3673: if (pool->freeBlocks) {
3674: if (pool->start == 0) {
3675: pool->blocks = pool->freeBlocks;
3676: pool->freeBlocks = pool->freeBlocks->next;
3677: pool->blocks->next = 0;
3678: pool->start = pool->blocks->s;
3679: pool->end = pool->start + pool->blocks->size;
3680: pool->ptr = pool->start;
3681: return 1;
3682: }
3683: if (pool->end - pool->start < pool->freeBlocks->size) {
3684: BLOCK *tem = pool->freeBlocks->next;
3685: pool->freeBlocks->next = pool->blocks;
3686: pool->blocks = pool->freeBlocks;
3687: pool->freeBlocks = tem;
3688: memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
3689: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3690: pool->start = pool->blocks->s;
3691: pool->end = pool->start + pool->blocks->size;
3692: return 1;
3693: }
3694: }
3695: if (pool->blocks && pool->start == pool->blocks->s) {
3696: int blockSize = (pool->end - pool->start)*2;
3697: pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3698: if (!pool->blocks)
3699: return 0;
3700: pool->blocks->size = blockSize;
3701: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3702: pool->start = pool->blocks->s;
3703: pool->end = pool->start + blockSize;
3704: }
3705: else {
3706: BLOCK *tem;
3707: int blockSize = pool->end - pool->start;
3708: if (blockSize < INIT_BLOCK_SIZE)
3709: blockSize = INIT_BLOCK_SIZE;
3710: else
3711: blockSize *= 2;
3712: tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3713: if (!tem)
3714: return 0;
3715: tem->size = blockSize;
3716: tem->next = pool->blocks;
3717: pool->blocks = tem;
3718: memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
3719: pool->ptr = tem->s + (pool->ptr - pool->start);
3720: pool->start = tem->s;
3721: pool->end = tem->s + blockSize;
3722: }
3723: return 1;
3724: }
Webmaster