Annotation of libwww/modules/expat/xmlparse/xmlparse.c, revision 1.3
1.1 frystyk 1: /*
2: The contents of this file are subject to the Mozilla Public License
1.3 ! kahan 3: Version 1.1 (the "License"); you may not use this file except in
1.1 frystyk 4: compliance with the License. You may obtain a copy of the License at
5: http://www.mozilla.org/MPL/
6:
7: Software distributed under the License is distributed on an "AS IS"
8: basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9: License for the specific language governing rights and limitations
10: under the License.
11:
12: The Original Code is expat.
13:
14: The Initial Developer of the Original Code is James Clark.
1.3 ! kahan 15: Portions created by James Clark are Copyright (C) 1998, 1999
1.1 frystyk 16: James Clark. All Rights Reserved.
17:
18: Contributor(s):
1.3 ! kahan 19:
! 20: Alternatively, the contents of this file may be used under the terms
! 21: of the GNU General Public License (the "GPL"), in which case the
! 22: provisions of the GPL are applicable instead of those above. If you
! 23: wish to allow use of your version of this file only under the terms of
! 24: the GPL and not to allow others to use your version of this file under
! 25: the MPL, indicate your decision by deleting the provisions above and
! 26: replace them with the notice and other provisions required by the
! 27: GPL. If you do not delete the provisions above, a recipient may use
! 28: your version of this file under either the MPL or the GPL.
1.1 frystyk 29: */
30:
31: #include "xmldef.h"
1.3 ! kahan 32: #include "xmlparse.h"
1.1 frystyk 33:
34: #ifdef XML_UNICODE
35: #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
36: #define XmlConvert XmlUtf16Convert
37: #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
1.3 ! kahan 38: #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
1.1 frystyk 39: #define XmlEncode XmlUtf16Encode
40: #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
41: typedef unsigned short ICHAR;
42: #else
43: #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
44: #define XmlConvert XmlUtf8Convert
45: #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
1.3 ! kahan 46: #define XmlGetInternalEncodingNS XmlGetUtf8InternalEncodingNS
1.1 frystyk 47: #define XmlEncode XmlUtf8Encode
48: #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
49: typedef char ICHAR;
50: #endif
51:
1.3 ! kahan 52:
! 53: #ifndef XML_NS
! 54:
! 55: #define XmlInitEncodingNS XmlInitEncoding
! 56: #define XmlInitUnknownEncodingNS XmlInitUnknownEncoding
! 57: #undef XmlGetInternalEncodingNS
! 58: #define XmlGetInternalEncodingNS XmlGetInternalEncoding
! 59: #define XmlParseXmlDeclNS XmlParseXmlDecl
! 60:
! 61: #endif
! 62:
! 63:
1.1 frystyk 64: #ifdef XML_UNICODE_WCHAR_T
65: #define XML_T(x) L ## x
66: #else
67: #define XML_T(x) x
68: #endif
69:
70: /* Round up n to be a multiple of sz, where sz is a power of 2. */
71: #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
72:
73: #include "xmltok.h"
74: #include "xmlrole.h"
75: #include "hashtable.h"
76:
77: #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
78: #define INIT_DATA_BUF_SIZE 1024
79: #define INIT_ATTS_SIZE 16
80: #define INIT_BLOCK_SIZE 1024
81: #define INIT_BUFFER_SIZE 1024
82:
1.3 ! kahan 83: #define EXPAND_SPARE 24
! 84:
! 85: typedef struct binding {
! 86: struct prefix *prefix;
! 87: struct binding *nextTagBinding;
! 88: struct binding *prevPrefixBinding;
! 89: const struct attribute_id *attId;
! 90: XML_Char *uri;
! 91: int uriLen;
! 92: int uriAlloc;
! 93: } BINDING;
! 94:
! 95: typedef struct prefix {
! 96: const XML_Char *name;
! 97: BINDING *binding;
! 98: } PREFIX;
! 99:
! 100: typedef struct {
! 101: const XML_Char *str;
! 102: const XML_Char *localPart;
! 103: int uriLen;
! 104: } TAG_NAME;
! 105:
1.1 frystyk 106: typedef struct tag {
107: struct tag *parent;
108: const char *rawName;
109: int rawNameLength;
1.3 ! kahan 110: TAG_NAME name;
1.1 frystyk 111: char *buf;
112: char *bufEnd;
1.3 ! kahan 113: BINDING *bindings;
1.1 frystyk 114: } TAG;
115:
116: typedef struct {
117: const XML_Char *name;
118: const XML_Char *textPtr;
119: int textLen;
120: const XML_Char *systemId;
121: const XML_Char *base;
122: const XML_Char *publicId;
123: const XML_Char *notation;
124: char open;
125: } ENTITY;
126:
127: typedef struct block {
128: struct block *next;
129: int size;
130: XML_Char s[1];
131: } BLOCK;
132:
133: typedef struct {
134: BLOCK *blocks;
135: BLOCK *freeBlocks;
136: const XML_Char *end;
137: XML_Char *ptr;
138: XML_Char *start;
139: } STRING_POOL;
140:
141: /* The XML_Char before the name is used to determine whether
142: an attribute has been specified. */
1.3 ! kahan 143: typedef struct attribute_id {
1.1 frystyk 144: XML_Char *name;
1.3 ! kahan 145: PREFIX *prefix;
1.1 frystyk 146: char maybeTokenized;
1.3 ! kahan 147: char xmlns;
1.1 frystyk 148: } ATTRIBUTE_ID;
149:
150: typedef struct {
151: const ATTRIBUTE_ID *id;
152: char isCdata;
153: const XML_Char *value;
154: } DEFAULT_ATTRIBUTE;
155:
156: typedef struct {
157: const XML_Char *name;
1.3 ! kahan 158: PREFIX *prefix;
1.1 frystyk 159: int nDefaultAtts;
160: int allocDefaultAtts;
161: DEFAULT_ATTRIBUTE *defaultAtts;
162: } ELEMENT_TYPE;
163:
164: typedef struct {
165: HASH_TABLE generalEntities;
166: HASH_TABLE elementTypes;
167: HASH_TABLE attributeIds;
1.3 ! kahan 168: HASH_TABLE prefixes;
1.1 frystyk 169: STRING_POOL pool;
170: int complete;
171: int standalone;
172: const XML_Char *base;
1.3 ! kahan 173: PREFIX defaultPrefix;
1.1 frystyk 174: } DTD;
175:
1.3 ! kahan 176: typedef struct open_internal_entity {
! 177: const char *internalEventPtr;
! 178: const char *internalEventEndPtr;
! 179: struct open_internal_entity *next;
! 180: ENTITY *entity;
! 181: } OPEN_INTERNAL_ENTITY;
! 182:
1.1 frystyk 183: typedef enum XML_Error Processor(XML_Parser parser,
184: const char *start,
185: const char *end,
186: const char **endPtr);
187:
188: static Processor prologProcessor;
189: static Processor prologInitProcessor;
190: static Processor contentProcessor;
191: static Processor cdataSectionProcessor;
192: static Processor epilogProcessor;
193: static Processor errorProcessor;
194: static Processor externalEntityInitProcessor;
195: static Processor externalEntityInitProcessor2;
196: static Processor externalEntityInitProcessor3;
197: static Processor externalEntityContentProcessor;
198:
199: static enum XML_Error
200: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
201: static enum XML_Error
202: processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
203: static enum XML_Error
204: initializeEncoding(XML_Parser parser);
205: static enum XML_Error
206: doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
207: const char *start, const char *end, const char **endPtr);
208: static enum XML_Error
209: doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
1.3 ! kahan 210: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s,
! 211: TAG_NAME *tagNamePtr, BINDING **bindingsPtr);
! 212: static
! 213: int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr);
1.1 frystyk 214: static int
215: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
216: static enum XML_Error
217: storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
218: STRING_POOL *);
219: static enum XML_Error
220: appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
221: STRING_POOL *);
222: static ATTRIBUTE_ID *
223: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.3 ! kahan 224: static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *);
1.1 frystyk 225: static enum XML_Error
226: storeEntityValue(XML_Parser parser, const char *start, const char *end);
227: static int
228: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.3 ! kahan 229: static int
! 230: reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
1.1 frystyk 231: static void
232: reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
233:
1.3 ! kahan 234: static const XML_Char *getContext(XML_Parser parser);
! 235: static int setContext(XML_Parser parser, const XML_Char *context);
1.1 frystyk 236: static void normalizePublicId(XML_Char *s);
237: static int dtdInit(DTD *);
238: static void dtdDestroy(DTD *);
239: static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
240: static void poolInit(STRING_POOL *);
241: static void poolClear(STRING_POOL *);
242: static void poolDestroy(STRING_POOL *);
243: static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
244: const char *ptr, const char *end);
245: static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
246: const char *ptr, const char *end);
247: static int poolGrow(STRING_POOL *pool);
248: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
249: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
250:
251: #define poolStart(pool) ((pool)->start)
252: #define poolEnd(pool) ((pool)->ptr)
253: #define poolLength(pool) ((pool)->ptr - (pool)->start)
254: #define poolChop(pool) ((void)--(pool->ptr))
255: #define poolLastChar(pool) (((pool)->ptr)[-1])
256: #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
257: #define poolFinish(pool) ((pool)->start = (pool)->ptr)
258: #define poolAppendChar(pool, c) \
259: (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
260: ? 0 \
261: : ((*((pool)->ptr)++ = c), 1))
262:
263: typedef struct {
264: /* The first member must be userData so that the XML_GetUserData macro works. */
1.3 ! kahan 265: void *m_userData;
! 266: void *m_handlerArg;
! 267: char *m_buffer;
1.1 frystyk 268: /* first character to be parsed */
1.3 ! kahan 269: const char *m_bufferPtr;
1.1 frystyk 270: /* past last character to be parsed */
1.3 ! kahan 271: char *m_bufferEnd;
1.1 frystyk 272: /* allocated end of buffer */
1.3 ! kahan 273: const char *m_bufferLim;
! 274: long m_parseEndByteIndex;
! 275: const char *m_parseEndPtr;
! 276: XML_Char *m_dataBuf;
! 277: XML_Char *m_dataBufEnd;
! 278: XML_StartElementHandler m_startElementHandler;
! 279: XML_EndElementHandler m_endElementHandler;
! 280: XML_CharacterDataHandler m_characterDataHandler;
! 281: XML_ProcessingInstructionHandler m_processingInstructionHandler;
! 282: XML_CommentHandler m_commentHandler;
! 283: XML_StartCdataSectionHandler m_startCdataSectionHandler;
! 284: XML_EndCdataSectionHandler m_endCdataSectionHandler;
! 285: XML_DefaultHandler m_defaultHandler;
! 286: XML_UnparsedEntityDeclHandler m_unparsedEntityDeclHandler;
! 287: XML_NotationDeclHandler m_notationDeclHandler;
! 288: XML_StartNamespaceDeclHandler m_startNamespaceDeclHandler;
! 289: XML_EndNamespaceDeclHandler m_endNamespaceDeclHandler;
! 290: XML_NotStandaloneHandler m_notStandaloneHandler;
! 291: XML_ExternalEntityRefHandler m_externalEntityRefHandler;
! 292: void *m_externalEntityRefHandlerArg;
! 293: XML_UnknownEncodingHandler m_unknownEncodingHandler;
! 294: const ENCODING *m_encoding;
! 295: INIT_ENCODING m_initEncoding;
! 296: const XML_Char *m_protocolEncodingName;
! 297: int m_ns;
! 298: void *m_unknownEncodingMem;
! 299: void *m_unknownEncodingData;
! 300: void *m_unknownEncodingHandlerData;
! 301: void (*m_unknownEncodingRelease)(void *);
! 302: PROLOG_STATE m_prologState;
! 303: Processor *m_processor;
! 304: enum XML_Error m_errorCode;
! 305: const char *m_eventPtr;
! 306: const char *m_eventEndPtr;
! 307: const char *m_positionPtr;
! 308: OPEN_INTERNAL_ENTITY *m_openInternalEntities;
! 309: int m_defaultExpandInternalEntities;
! 310: int m_tagLevel;
! 311: ENTITY *m_declEntity;
! 312: const XML_Char *m_declNotationName;
! 313: const XML_Char *m_declNotationPublicId;
! 314: ELEMENT_TYPE *m_declElementType;
! 315: ATTRIBUTE_ID *m_declAttributeId;
! 316: char m_declAttributeIsCdata;
! 317: DTD m_dtd;
! 318: TAG *m_tagStack;
! 319: TAG *m_freeTagList;
! 320: BINDING *m_inheritedBindings;
! 321: BINDING *m_freeBindingList;
! 322: int m_attsSize;
! 323: int m_nSpecifiedAtts;
! 324: ATTRIBUTE *m_atts;
! 325: POSITION m_position;
! 326: STRING_POOL m_tempPool;
! 327: STRING_POOL m_temp2Pool;
! 328: char *m_groupConnector;
! 329: unsigned m_groupSize;
! 330: int m_hadExternalDoctype;
! 331: XML_Char m_namespaceSeparator;
1.1 frystyk 332: } Parser;
333:
1.3 ! kahan 334: #define userData (((Parser *)parser)->m_userData)
! 335: #define handlerArg (((Parser *)parser)->m_handlerArg)
! 336: #define startElementHandler (((Parser *)parser)->m_startElementHandler)
! 337: #define endElementHandler (((Parser *)parser)->m_endElementHandler)
! 338: #define characterDataHandler (((Parser *)parser)->m_characterDataHandler)
! 339: #define processingInstructionHandler (((Parser *)parser)->m_processingInstructionHandler)
! 340: #define commentHandler (((Parser *)parser)->m_commentHandler)
! 341: #define startCdataSectionHandler (((Parser *)parser)->m_startCdataSectionHandler)
! 342: #define endCdataSectionHandler (((Parser *)parser)->m_endCdataSectionHandler)
! 343: #define defaultHandler (((Parser *)parser)->m_defaultHandler)
! 344: #define unparsedEntityDeclHandler (((Parser *)parser)->m_unparsedEntityDeclHandler)
! 345: #define notationDeclHandler (((Parser *)parser)->m_notationDeclHandler)
! 346: #define startNamespaceDeclHandler (((Parser *)parser)->m_startNamespaceDeclHandler)
! 347: #define endNamespaceDeclHandler (((Parser *)parser)->m_endNamespaceDeclHandler)
! 348: #define notStandaloneHandler (((Parser *)parser)->m_notStandaloneHandler)
! 349: #define externalEntityRefHandler (((Parser *)parser)->m_externalEntityRefHandler)
! 350: #define externalEntityRefHandlerArg (((Parser *)parser)->m_externalEntityRefHandlerArg)
! 351: #define unknownEncodingHandler (((Parser *)parser)->m_unknownEncodingHandler)
! 352: #define encoding (((Parser *)parser)->m_encoding)
! 353: #define initEncoding (((Parser *)parser)->m_initEncoding)
! 354: #define unknownEncodingMem (((Parser *)parser)->m_unknownEncodingMem)
! 355: #define unknownEncodingData (((Parser *)parser)->m_unknownEncodingData)
1.1 frystyk 356: #define unknownEncodingHandlerData \
1.3 ! kahan 357: (((Parser *)parser)->m_unknownEncodingHandlerData)
! 358: #define unknownEncodingRelease (((Parser *)parser)->m_unknownEncodingRelease)
! 359: #define protocolEncodingName (((Parser *)parser)->m_protocolEncodingName)
! 360: #define ns (((Parser *)parser)->m_ns)
! 361: #define prologState (((Parser *)parser)->m_prologState)
! 362: #define processor (((Parser *)parser)->m_processor)
! 363: #define errorCode (((Parser *)parser)->m_errorCode)
! 364: #define eventPtr (((Parser *)parser)->m_eventPtr)
! 365: #define eventEndPtr (((Parser *)parser)->m_eventEndPtr)
! 366: #define positionPtr (((Parser *)parser)->m_positionPtr)
! 367: #define position (((Parser *)parser)->m_position)
! 368: #define openInternalEntities (((Parser *)parser)->m_openInternalEntities)
! 369: #define defaultExpandInternalEntities (((Parser *)parser)->m_defaultExpandInternalEntities)
! 370: #define tagLevel (((Parser *)parser)->m_tagLevel)
! 371: #define buffer (((Parser *)parser)->m_buffer)
! 372: #define bufferPtr (((Parser *)parser)->m_bufferPtr)
! 373: #define bufferEnd (((Parser *)parser)->m_bufferEnd)
! 374: #define parseEndByteIndex (((Parser *)parser)->m_parseEndByteIndex)
! 375: #define parseEndPtr (((Parser *)parser)->m_parseEndPtr)
! 376: #define bufferLim (((Parser *)parser)->m_bufferLim)
! 377: #define dataBuf (((Parser *)parser)->m_dataBuf)
! 378: #define dataBufEnd (((Parser *)parser)->m_dataBufEnd)
! 379: #define dtd (((Parser *)parser)->m_dtd)
! 380: #define declEntity (((Parser *)parser)->m_declEntity)
! 381: #define declNotationName (((Parser *)parser)->m_declNotationName)
! 382: #define declNotationPublicId (((Parser *)parser)->m_declNotationPublicId)
! 383: #define declElementType (((Parser *)parser)->m_declElementType)
! 384: #define declAttributeId (((Parser *)parser)->m_declAttributeId)
! 385: #define declAttributeIsCdata (((Parser *)parser)->m_declAttributeIsCdata)
! 386: #define freeTagList (((Parser *)parser)->m_freeTagList)
! 387: #define freeBindingList (((Parser *)parser)->m_freeBindingList)
! 388: #define inheritedBindings (((Parser *)parser)->m_inheritedBindings)
! 389: #define tagStack (((Parser *)parser)->m_tagStack)
! 390: #define atts (((Parser *)parser)->m_atts)
! 391: #define attsSize (((Parser *)parser)->m_attsSize)
! 392: #define nSpecifiedAtts (((Parser *)parser)->m_nSpecifiedAtts)
! 393: #define tempPool (((Parser *)parser)->m_tempPool)
! 394: #define temp2Pool (((Parser *)parser)->m_temp2Pool)
! 395: #define groupConnector (((Parser *)parser)->m_groupConnector)
! 396: #define groupSize (((Parser *)parser)->m_groupSize)
! 397: #define hadExternalDoctype (((Parser *)parser)->m_hadExternalDoctype)
! 398: #define namespaceSeparator (((Parser *)parser)->m_namespaceSeparator)
! 399:
! 400: #ifdef _MSC_VER
! 401: #ifdef _DEBUG
! 402: Parser *asParser(XML_Parser parser)
! 403: {
! 404: return parser;
! 405: }
! 406: #endif
! 407: #endif
1.1 frystyk 408:
409: XML_Parser XML_ParserCreate(const XML_Char *encodingName)
410: {
411: XML_Parser parser = malloc(sizeof(Parser));
412: if (!parser)
413: return parser;
414: processor = prologInitProcessor;
415: XmlPrologStateInit(&prologState);
416: userData = 0;
417: handlerArg = 0;
418: startElementHandler = 0;
419: endElementHandler = 0;
420: characterDataHandler = 0;
421: processingInstructionHandler = 0;
1.3 ! kahan 422: commentHandler = 0;
! 423: startCdataSectionHandler = 0;
! 424: endCdataSectionHandler = 0;
1.1 frystyk 425: defaultHandler = 0;
426: unparsedEntityDeclHandler = 0;
427: notationDeclHandler = 0;
1.3 ! kahan 428: startNamespaceDeclHandler = 0;
! 429: endNamespaceDeclHandler = 0;
! 430: notStandaloneHandler = 0;
1.1 frystyk 431: externalEntityRefHandler = 0;
1.3 ! kahan 432: externalEntityRefHandlerArg = parser;
1.1 frystyk 433: unknownEncodingHandler = 0;
434: buffer = 0;
435: bufferPtr = 0;
436: bufferEnd = 0;
437: parseEndByteIndex = 0;
438: parseEndPtr = 0;
439: bufferLim = 0;
440: declElementType = 0;
441: declAttributeId = 0;
442: declEntity = 0;
443: declNotationName = 0;
444: declNotationPublicId = 0;
445: memset(&position, 0, sizeof(POSITION));
446: errorCode = XML_ERROR_NONE;
447: eventPtr = 0;
448: eventEndPtr = 0;
449: positionPtr = 0;
1.3 ! kahan 450: openInternalEntities = 0;
1.1 frystyk 451: tagLevel = 0;
452: tagStack = 0;
453: freeTagList = 0;
1.3 ! kahan 454: freeBindingList = 0;
! 455: inheritedBindings = 0;
1.1 frystyk 456: attsSize = INIT_ATTS_SIZE;
457: atts = malloc(attsSize * sizeof(ATTRIBUTE));
1.3 ! kahan 458: nSpecifiedAtts = 0;
1.1 frystyk 459: dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
460: groupSize = 0;
461: groupConnector = 0;
462: hadExternalDoctype = 0;
463: unknownEncodingMem = 0;
464: unknownEncodingRelease = 0;
465: unknownEncodingData = 0;
466: unknownEncodingHandlerData = 0;
1.3 ! kahan 467: namespaceSeparator = '!';
! 468: ns = 0;
1.1 frystyk 469: poolInit(&tempPool);
470: poolInit(&temp2Pool);
471: protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
472: if (!dtdInit(&dtd) || !atts || !dataBuf
473: || (encodingName && !protocolEncodingName)) {
474: XML_ParserFree(parser);
475: return 0;
476: }
477: dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
478: XmlInitEncoding(&initEncoding, &encoding, 0);
479: return parser;
480: }
481:
1.3 ! kahan 482: XML_Parser XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep)
! 483: {
! 484: static
! 485: const XML_Char implicitContext[] = {
! 486: XML_T('x'), XML_T('m'), XML_T('l'), XML_T('='),
! 487: XML_T('h'), XML_T('t'), XML_T('t'), XML_T('p'), XML_T(':'),
! 488: XML_T('/'), XML_T('/'), XML_T('w'), XML_T('w'), XML_T('w'),
! 489: XML_T('.'), XML_T('w'), XML_T('3'),
! 490: XML_T('.'), XML_T('o'), XML_T('r'), XML_T('g'),
! 491: XML_T('/'), XML_T('X'), XML_T('M'), XML_T('L'),
! 492: XML_T('/'), XML_T('1'), XML_T('9'), XML_T('9'), XML_T('8'),
! 493: XML_T('/'), XML_T('n'), XML_T('a'), XML_T('m'), XML_T('e'),
! 494: XML_T('s'), XML_T('p'), XML_T('a'), XML_T('c'), XML_T('e'),
! 495: XML_T('\0')
! 496: };
! 497:
! 498: XML_Parser parser = XML_ParserCreate(encodingName);
! 499: if (parser) {
! 500: XmlInitEncodingNS(&initEncoding, &encoding, 0);
! 501: ns = 1;
! 502: namespaceSeparator = nsSep;
! 503: }
! 504: if (!setContext(parser, implicitContext)) {
! 505: XML_ParserFree(parser);
! 506: return 0;
! 507: }
! 508: return parser;
! 509: }
! 510:
! 511: int XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName)
! 512: {
! 513: if (!encodingName)
! 514: protocolEncodingName = 0;
! 515: else {
! 516: protocolEncodingName = poolCopyString(&tempPool, encodingName);
! 517: if (!protocolEncodingName)
! 518: return 0;
! 519: }
! 520: return 1;
! 521: }
! 522:
1.1 frystyk 523: XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
1.3 ! kahan 524: const XML_Char *context,
1.1 frystyk 525: const XML_Char *encodingName)
526: {
527: XML_Parser parser = oldParser;
528: DTD *oldDtd = &dtd;
529: XML_StartElementHandler oldStartElementHandler = startElementHandler;
530: XML_EndElementHandler oldEndElementHandler = endElementHandler;
531: XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
532: XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
1.3 ! kahan 533: XML_CommentHandler oldCommentHandler = commentHandler;
! 534: XML_StartCdataSectionHandler oldStartCdataSectionHandler = startCdataSectionHandler;
! 535: XML_EndCdataSectionHandler oldEndCdataSectionHandler = endCdataSectionHandler;
1.1 frystyk 536: XML_DefaultHandler oldDefaultHandler = defaultHandler;
1.3 ! kahan 537: XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler = startNamespaceDeclHandler;
! 538: XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler = endNamespaceDeclHandler;
! 539: XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler;
1.1 frystyk 540: XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
541: XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
542: void *oldUserData = userData;
543: void *oldHandlerArg = handlerArg;
1.3 ! kahan 544: int oldDefaultExpandInternalEntities = defaultExpandInternalEntities;
! 545: void *oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg;
1.1 frystyk 546:
1.3 ! kahan 547: parser = (ns
! 548: ? XML_ParserCreateNS(encodingName, namespaceSeparator)
! 549: : XML_ParserCreate(encodingName));
1.1 frystyk 550: if (!parser)
551: return 0;
552: startElementHandler = oldStartElementHandler;
553: endElementHandler = oldEndElementHandler;
554: characterDataHandler = oldCharacterDataHandler;
555: processingInstructionHandler = oldProcessingInstructionHandler;
1.3 ! kahan 556: commentHandler = oldCommentHandler;
! 557: startCdataSectionHandler = oldStartCdataSectionHandler;
! 558: endCdataSectionHandler = oldEndCdataSectionHandler;
1.1 frystyk 559: defaultHandler = oldDefaultHandler;
1.3 ! kahan 560: startNamespaceDeclHandler = oldStartNamespaceDeclHandler;
! 561: endNamespaceDeclHandler = oldEndNamespaceDeclHandler;
! 562: notStandaloneHandler = oldNotStandaloneHandler;
1.1 frystyk 563: externalEntityRefHandler = oldExternalEntityRefHandler;
564: unknownEncodingHandler = oldUnknownEncodingHandler;
565: userData = oldUserData;
566: if (oldUserData == oldHandlerArg)
567: handlerArg = userData;
568: else
569: handlerArg = parser;
1.3 ! kahan 570: if (oldExternalEntityRefHandlerArg != oldParser)
! 571: externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg;
! 572: defaultExpandInternalEntities = oldDefaultExpandInternalEntities;
! 573: if (!dtdCopy(&dtd, oldDtd) || !setContext(parser, context)) {
1.1 frystyk 574: XML_ParserFree(parser);
575: return 0;
576: }
577: processor = externalEntityInitProcessor;
578: return parser;
579: }
580:
1.3 ! kahan 581: static
! 582: void destroyBindings(BINDING *bindings)
! 583: {
! 584: for (;;) {
! 585: BINDING *b = bindings;
! 586: if (!b)
! 587: break;
! 588: bindings = b->nextTagBinding;
! 589: free(b->uri);
! 590: free(b);
! 591: }
! 592: }
! 593:
1.1 frystyk 594: void XML_ParserFree(XML_Parser parser)
595: {
596: for (;;) {
597: TAG *p;
598: if (tagStack == 0) {
599: if (freeTagList == 0)
600: break;
601: tagStack = freeTagList;
602: freeTagList = 0;
603: }
604: p = tagStack;
605: tagStack = tagStack->parent;
606: free(p->buf);
1.3 ! kahan 607: destroyBindings(p->bindings);
1.1 frystyk 608: free(p);
609: }
1.3 ! kahan 610: destroyBindings(freeBindingList);
! 611: destroyBindings(inheritedBindings);
1.1 frystyk 612: poolDestroy(&tempPool);
613: poolDestroy(&temp2Pool);
614: dtdDestroy(&dtd);
615: free((void *)atts);
616: free(groupConnector);
617: free(buffer);
618: free(dataBuf);
619: free(unknownEncodingMem);
620: if (unknownEncodingRelease)
621: unknownEncodingRelease(unknownEncodingData);
622: free(parser);
623: }
624:
625: void XML_UseParserAsHandlerArg(XML_Parser parser)
626: {
627: handlerArg = parser;
628: }
629:
630: void XML_SetUserData(XML_Parser parser, void *p)
631: {
632: if (handlerArg == userData)
633: handlerArg = userData = p;
634: else
635: userData = p;
636: }
637:
638: int XML_SetBase(XML_Parser parser, const XML_Char *p)
639: {
640: if (p) {
641: p = poolCopyString(&dtd.pool, p);
642: if (!p)
643: return 0;
644: dtd.base = p;
645: }
646: else
647: dtd.base = 0;
648: return 1;
649: }
650:
651: const XML_Char *XML_GetBase(XML_Parser parser)
652: {
653: return dtd.base;
654: }
655:
1.3 ! kahan 656: int XML_GetSpecifiedAttributeCount(XML_Parser parser)
! 657: {
! 658: return nSpecifiedAtts;
! 659: }
! 660:
1.1 frystyk 661: void XML_SetElementHandler(XML_Parser parser,
662: XML_StartElementHandler start,
663: XML_EndElementHandler end)
664: {
665: startElementHandler = start;
666: endElementHandler = end;
667: }
668:
669: void XML_SetCharacterDataHandler(XML_Parser parser,
670: XML_CharacterDataHandler handler)
671: {
672: characterDataHandler = handler;
673: }
674:
675: void XML_SetProcessingInstructionHandler(XML_Parser parser,
676: XML_ProcessingInstructionHandler handler)
677: {
678: processingInstructionHandler = handler;
679: }
680:
1.3 ! kahan 681: void XML_SetCommentHandler(XML_Parser parser,
! 682: XML_CommentHandler handler)
! 683: {
! 684: commentHandler = handler;
! 685: }
! 686:
! 687: void XML_SetCdataSectionHandler(XML_Parser parser,
! 688: XML_StartCdataSectionHandler start,
! 689: XML_EndCdataSectionHandler end)
! 690: {
! 691: startCdataSectionHandler = start;
! 692: endCdataSectionHandler = end;
! 693: }
! 694:
1.1 frystyk 695: void XML_SetDefaultHandler(XML_Parser parser,
696: XML_DefaultHandler handler)
697: {
698: defaultHandler = handler;
1.3 ! kahan 699: defaultExpandInternalEntities = 0;
! 700: }
! 701:
! 702: void XML_SetDefaultHandlerExpand(XML_Parser parser,
! 703: XML_DefaultHandler handler)
! 704: {
! 705: defaultHandler = handler;
! 706: defaultExpandInternalEntities = 1;
1.1 frystyk 707: }
708:
709: void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
710: XML_UnparsedEntityDeclHandler handler)
711: {
712: unparsedEntityDeclHandler = handler;
713: }
714:
715: void XML_SetNotationDeclHandler(XML_Parser parser,
716: XML_NotationDeclHandler handler)
717: {
718: notationDeclHandler = handler;
719: }
720:
1.3 ! kahan 721: void XML_SetNamespaceDeclHandler(XML_Parser parser,
! 722: XML_StartNamespaceDeclHandler start,
! 723: XML_EndNamespaceDeclHandler end)
! 724: {
! 725: startNamespaceDeclHandler = start;
! 726: endNamespaceDeclHandler = end;
! 727: }
! 728:
! 729: void XML_SetNotStandaloneHandler(XML_Parser parser,
! 730: XML_NotStandaloneHandler handler)
! 731: {
! 732: notStandaloneHandler = handler;
! 733: }
! 734:
1.1 frystyk 735: void XML_SetExternalEntityRefHandler(XML_Parser parser,
736: XML_ExternalEntityRefHandler handler)
737: {
738: externalEntityRefHandler = handler;
739: }
740:
1.3 ! kahan 741: void XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg)
! 742: {
! 743: if (arg)
! 744: externalEntityRefHandlerArg = arg;
! 745: else
! 746: externalEntityRefHandlerArg = parser;
! 747: }
! 748:
1.1 frystyk 749: void XML_SetUnknownEncodingHandler(XML_Parser parser,
750: XML_UnknownEncodingHandler handler,
751: void *data)
752: {
753: unknownEncodingHandler = handler;
754: unknownEncodingHandlerData = data;
755: }
756:
757: int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
758: {
759: if (len == 0) {
760: if (!isFinal)
761: return 1;
1.3 ! kahan 762: positionPtr = bufferPtr;
1.1 frystyk 763: errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
764: if (errorCode == XML_ERROR_NONE)
765: return 1;
766: eventEndPtr = eventPtr;
767: return 0;
768: }
769: else if (bufferPtr == bufferEnd) {
770: const char *end;
771: int nLeftOver;
772: parseEndByteIndex += len;
773: positionPtr = s;
774: if (isFinal) {
775: errorCode = processor(parser, s, parseEndPtr = s + len, 0);
776: if (errorCode == XML_ERROR_NONE)
777: return 1;
778: eventEndPtr = eventPtr;
779: return 0;
780: }
781: errorCode = processor(parser, s, parseEndPtr = s + len, &end);
782: if (errorCode != XML_ERROR_NONE) {
783: eventEndPtr = eventPtr;
784: return 0;
785: }
786: XmlUpdatePosition(encoding, positionPtr, end, &position);
787: nLeftOver = s + len - end;
788: if (nLeftOver) {
789: if (buffer == 0 || nLeftOver > bufferLim - buffer) {
790: /* FIXME avoid integer overflow */
791: buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
792: if (!buffer) {
793: errorCode = XML_ERROR_NO_MEMORY;
794: eventPtr = eventEndPtr = 0;
795: return 0;
796: }
797: bufferLim = buffer + len * 2;
798: }
799: memcpy(buffer, end, nLeftOver);
800: bufferPtr = buffer;
801: bufferEnd = buffer + nLeftOver;
802: }
803: return 1;
804: }
805: else {
806: memcpy(XML_GetBuffer(parser, len), s, len);
807: return XML_ParseBuffer(parser, len, isFinal);
808: }
809: }
810:
811: int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
812: {
813: const char *start = bufferPtr;
814: positionPtr = start;
815: bufferEnd += len;
816: parseEndByteIndex += len;
817: errorCode = processor(parser, start, parseEndPtr = bufferEnd,
818: isFinal ? (const char **)0 : &bufferPtr);
819: if (errorCode == XML_ERROR_NONE) {
820: if (!isFinal)
821: XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
822: return 1;
823: }
824: else {
825: eventEndPtr = eventPtr;
826: return 0;
827: }
828: }
829:
830: void *XML_GetBuffer(XML_Parser parser, int len)
831: {
832: if (len > bufferLim - bufferEnd) {
833: /* FIXME avoid integer overflow */
834: int neededSize = len + (bufferEnd - bufferPtr);
835: if (neededSize <= bufferLim - buffer) {
836: memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
837: bufferEnd = buffer + (bufferEnd - bufferPtr);
838: bufferPtr = buffer;
839: }
840: else {
841: char *newBuf;
842: int bufferSize = bufferLim - bufferPtr;
843: if (bufferSize == 0)
844: bufferSize = INIT_BUFFER_SIZE;
845: do {
846: bufferSize *= 2;
847: } while (bufferSize < neededSize);
848: newBuf = malloc(bufferSize);
849: if (newBuf == 0) {
850: errorCode = XML_ERROR_NO_MEMORY;
851: return 0;
852: }
853: bufferLim = newBuf + bufferSize;
854: if (bufferPtr) {
855: memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
856: free(buffer);
857: }
858: bufferEnd = newBuf + (bufferEnd - bufferPtr);
859: bufferPtr = buffer = newBuf;
860: }
861: }
862: return bufferEnd;
863: }
864:
865: enum XML_Error XML_GetErrorCode(XML_Parser parser)
866: {
867: return errorCode;
868: }
869:
870: long XML_GetCurrentByteIndex(XML_Parser parser)
871: {
872: if (eventPtr)
873: return parseEndByteIndex - (parseEndPtr - eventPtr);
874: return -1;
875: }
876:
1.3 ! kahan 877: int XML_GetCurrentByteCount(XML_Parser parser)
! 878: {
! 879: if (eventEndPtr && eventPtr)
! 880: return eventEndPtr - eventPtr;
! 881: return 0;
! 882: }
! 883:
1.1 frystyk 884: int XML_GetCurrentLineNumber(XML_Parser parser)
885: {
886: if (eventPtr) {
887: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
888: positionPtr = eventPtr;
889: }
890: return position.lineNumber + 1;
891: }
892:
893: int XML_GetCurrentColumnNumber(XML_Parser parser)
894: {
895: if (eventPtr) {
896: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
897: positionPtr = eventPtr;
898: }
899: return position.columnNumber;
900: }
901:
902: void XML_DefaultCurrent(XML_Parser parser)
903: {
1.3 ! kahan 904: if (defaultHandler) {
! 905: if (openInternalEntities)
! 906: reportDefault(parser,
! 907: ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding(),
! 908: openInternalEntities->internalEventPtr,
! 909: openInternalEntities->internalEventEndPtr);
! 910: else
! 911: reportDefault(parser, encoding, eventPtr, eventEndPtr);
! 912: }
1.1 frystyk 913: }
914:
915: const XML_LChar *XML_ErrorString(int code)
916: {
917: static const XML_LChar *message[] = {
918: 0,
919: XML_T("out of memory"),
920: XML_T("syntax error"),
921: XML_T("no element found"),
922: XML_T("not well-formed"),
923: XML_T("unclosed token"),
924: XML_T("unclosed token"),
925: XML_T("mismatched tag"),
926: XML_T("duplicate attribute"),
927: XML_T("junk after document element"),
928: XML_T("illegal parameter entity reference"),
929: XML_T("undefined entity"),
930: XML_T("recursive entity reference"),
931: XML_T("asynchronous entity"),
932: XML_T("reference to invalid character number"),
933: XML_T("reference to binary entity"),
934: XML_T("reference to external entity in attribute"),
935: XML_T("xml processing instruction not at start of external entity"),
936: XML_T("unknown encoding"),
937: XML_T("encoding specified in XML declaration is incorrect"),
938: XML_T("unclosed CDATA section"),
1.3 ! kahan 939: XML_T("error in processing external entity reference"),
! 940: XML_T("document is not standalone")
1.1 frystyk 941: };
942: if (code > 0 && code < sizeof(message)/sizeof(message[0]))
943: return message[code];
944: return 0;
945: }
946:
947: static
948: enum XML_Error contentProcessor(XML_Parser parser,
949: const char *start,
950: const char *end,
951: const char **endPtr)
952: {
953: return doContent(parser, 0, encoding, start, end, endPtr);
954: }
955:
956: static
957: enum XML_Error externalEntityInitProcessor(XML_Parser parser,
958: const char *start,
959: const char *end,
960: const char **endPtr)
961: {
962: enum XML_Error result = initializeEncoding(parser);
963: if (result != XML_ERROR_NONE)
964: return result;
965: processor = externalEntityInitProcessor2;
966: return externalEntityInitProcessor2(parser, start, end, endPtr);
967: }
968:
969: static
970: enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
971: const char *start,
972: const char *end,
973: const char **endPtr)
974: {
975: const char *next;
976: int tok = XmlContentTok(encoding, start, end, &next);
977: switch (tok) {
978: case XML_TOK_BOM:
979: start = next;
980: break;
981: case XML_TOK_PARTIAL:
982: if (endPtr) {
983: *endPtr = start;
984: return XML_ERROR_NONE;
985: }
986: eventPtr = start;
987: return XML_ERROR_UNCLOSED_TOKEN;
988: case XML_TOK_PARTIAL_CHAR:
989: if (endPtr) {
990: *endPtr = start;
991: return XML_ERROR_NONE;
992: }
993: eventPtr = start;
994: return XML_ERROR_PARTIAL_CHAR;
995: }
996: processor = externalEntityInitProcessor3;
997: return externalEntityInitProcessor3(parser, start, end, endPtr);
998: }
999:
1000: static
1001: enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
1002: const char *start,
1003: const char *end,
1004: const char **endPtr)
1005: {
1006: const char *next;
1007: int tok = XmlContentTok(encoding, start, end, &next);
1008: switch (tok) {
1009: case XML_TOK_XML_DECL:
1010: {
1011: enum XML_Error result = processXmlDecl(parser, 1, start, next);
1012: if (result != XML_ERROR_NONE)
1013: return result;
1014: start = next;
1015: }
1016: break;
1017: case XML_TOK_PARTIAL:
1018: if (endPtr) {
1019: *endPtr = start;
1020: return XML_ERROR_NONE;
1021: }
1022: eventPtr = start;
1023: return XML_ERROR_UNCLOSED_TOKEN;
1024: case XML_TOK_PARTIAL_CHAR:
1025: if (endPtr) {
1026: *endPtr = start;
1027: return XML_ERROR_NONE;
1028: }
1029: eventPtr = start;
1030: return XML_ERROR_PARTIAL_CHAR;
1031: }
1032: processor = externalEntityContentProcessor;
1033: tagLevel = 1;
1034: return doContent(parser, 1, encoding, start, end, endPtr);
1035: }
1036:
1037: static
1038: enum XML_Error externalEntityContentProcessor(XML_Parser parser,
1039: const char *start,
1040: const char *end,
1041: const char **endPtr)
1042: {
1043: return doContent(parser, 1, encoding, start, end, endPtr);
1044: }
1045:
1046: static enum XML_Error
1047: doContent(XML_Parser parser,
1048: int startTagLevel,
1049: const ENCODING *enc,
1050: const char *s,
1051: const char *end,
1052: const char **nextPtr)
1053: {
1.3 ! kahan 1054: const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
1.1 frystyk 1055: const char **eventPP;
1056: const char **eventEndPP;
1057: if (enc == encoding) {
1058: eventPP = &eventPtr;
1059: eventEndPP = &eventEndPtr;
1060: }
1.3 ! kahan 1061: else {
! 1062: eventPP = &(openInternalEntities->internalEventPtr);
! 1063: eventEndPP = &(openInternalEntities->internalEventEndPtr);
! 1064: }
! 1065: *eventPP = s;
1.1 frystyk 1066: for (;;) {
1.3 ! kahan 1067: const char *next = s; /* XmlContentTok doesn't always set the last arg */
1.1 frystyk 1068: int tok = XmlContentTok(enc, s, end, &next);
1069: *eventEndPP = next;
1070: switch (tok) {
1071: case XML_TOK_TRAILING_CR:
1072: if (nextPtr) {
1073: *nextPtr = s;
1074: return XML_ERROR_NONE;
1075: }
1076: *eventEndPP = end;
1077: if (characterDataHandler) {
1.3 ! kahan 1078: XML_Char c = 0xA;
1.1 frystyk 1079: characterDataHandler(handlerArg, &c, 1);
1080: }
1081: else if (defaultHandler)
1082: reportDefault(parser, enc, s, end);
1083: if (startTagLevel == 0)
1084: return XML_ERROR_NO_ELEMENTS;
1085: if (tagLevel != startTagLevel)
1086: return XML_ERROR_ASYNC_ENTITY;
1087: return XML_ERROR_NONE;
1088: case XML_TOK_NONE:
1089: if (nextPtr) {
1090: *nextPtr = s;
1091: return XML_ERROR_NONE;
1092: }
1093: if (startTagLevel > 0) {
1094: if (tagLevel != startTagLevel)
1095: return XML_ERROR_ASYNC_ENTITY;
1096: return XML_ERROR_NONE;
1097: }
1098: return XML_ERROR_NO_ELEMENTS;
1099: case XML_TOK_INVALID:
1100: *eventPP = next;
1101: return XML_ERROR_INVALID_TOKEN;
1102: case XML_TOK_PARTIAL:
1103: if (nextPtr) {
1104: *nextPtr = s;
1105: return XML_ERROR_NONE;
1106: }
1107: return XML_ERROR_UNCLOSED_TOKEN;
1108: case XML_TOK_PARTIAL_CHAR:
1109: if (nextPtr) {
1110: *nextPtr = s;
1111: return XML_ERROR_NONE;
1112: }
1113: return XML_ERROR_PARTIAL_CHAR;
1114: case XML_TOK_ENTITY_REF:
1115: {
1116: const XML_Char *name;
1117: ENTITY *entity;
1118: XML_Char ch = XmlPredefinedEntityName(enc,
1119: s + enc->minBytesPerChar,
1120: next - enc->minBytesPerChar);
1121: if (ch) {
1122: if (characterDataHandler)
1123: characterDataHandler(handlerArg, &ch, 1);
1124: else if (defaultHandler)
1125: reportDefault(parser, enc, s, next);
1126: break;
1127: }
1128: name = poolStoreString(&dtd.pool, enc,
1129: s + enc->minBytesPerChar,
1130: next - enc->minBytesPerChar);
1131: if (!name)
1132: return XML_ERROR_NO_MEMORY;
1133: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1134: poolDiscard(&dtd.pool);
1135: if (!entity) {
1136: if (dtd.complete || dtd.standalone)
1137: return XML_ERROR_UNDEFINED_ENTITY;
1138: if (defaultHandler)
1139: reportDefault(parser, enc, s, next);
1140: break;
1141: }
1142: if (entity->open)
1143: return XML_ERROR_RECURSIVE_ENTITY_REF;
1144: if (entity->notation)
1145: return XML_ERROR_BINARY_ENTITY_REF;
1146: if (entity) {
1147: if (entity->textPtr) {
1148: enum XML_Error result;
1.3 ! kahan 1149: OPEN_INTERNAL_ENTITY openEntity;
! 1150: if (defaultHandler && !defaultExpandInternalEntities) {
1.1 frystyk 1151: reportDefault(parser, enc, s, next);
1152: break;
1153: }
1154: entity->open = 1;
1.3 ! kahan 1155: openEntity.next = openInternalEntities;
! 1156: openInternalEntities = &openEntity;
! 1157: openEntity.entity = entity;
! 1158: openEntity.internalEventPtr = 0;
! 1159: openEntity.internalEventEndPtr = 0;
1.1 frystyk 1160: result = doContent(parser,
1161: tagLevel,
1162: internalEnc,
1163: (char *)entity->textPtr,
1164: (char *)(entity->textPtr + entity->textLen),
1165: 0);
1166: entity->open = 0;
1.3 ! kahan 1167: openInternalEntities = openEntity.next;
1.1 frystyk 1168: if (result)
1169: return result;
1170: }
1171: else if (externalEntityRefHandler) {
1.3 ! kahan 1172: const XML_Char *context;
1.1 frystyk 1173: entity->open = 1;
1.3 ! kahan 1174: context = getContext(parser);
1.1 frystyk 1175: entity->open = 0;
1.3 ! kahan 1176: if (!context)
1.1 frystyk 1177: return XML_ERROR_NO_MEMORY;
1.3 ! kahan 1178: if (!externalEntityRefHandler(externalEntityRefHandlerArg,
! 1179: context,
! 1180: dtd.base,
! 1181: entity->systemId,
! 1182: entity->publicId))
1.1 frystyk 1183: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
1184: poolDiscard(&tempPool);
1185: }
1186: else if (defaultHandler)
1187: reportDefault(parser, enc, s, next);
1188: }
1189: break;
1190: }
1191: case XML_TOK_START_TAG_WITH_ATTS:
1192: if (!startElementHandler) {
1.3 ! kahan 1193: enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1.1 frystyk 1194: if (result)
1195: return result;
1196: }
1197: /* fall through */
1198: case XML_TOK_START_TAG_NO_ATTS:
1199: {
1200: TAG *tag;
1201: if (freeTagList) {
1202: tag = freeTagList;
1203: freeTagList = freeTagList->parent;
1204: }
1205: else {
1206: tag = malloc(sizeof(TAG));
1207: if (!tag)
1208: return XML_ERROR_NO_MEMORY;
1209: tag->buf = malloc(INIT_TAG_BUF_SIZE);
1210: if (!tag->buf)
1211: return XML_ERROR_NO_MEMORY;
1212: tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
1213: }
1.3 ! kahan 1214: tag->bindings = 0;
1.1 frystyk 1215: tag->parent = tagStack;
1216: tagStack = tag;
1.3 ! kahan 1217: tag->name.localPart = 0;
1.1 frystyk 1218: tag->rawName = s + enc->minBytesPerChar;
1219: tag->rawNameLength = XmlNameLength(enc, tag->rawName);
1220: if (nextPtr) {
1.3 ! kahan 1221: /* Need to guarantee that:
! 1222: tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)) <= tag->bufEnd - sizeof(XML_Char) */
! 1223: if (tag->rawNameLength + (int)(sizeof(XML_Char) - 1) + (int)sizeof(XML_Char) > tag->bufEnd - tag->buf) {
1.1 frystyk 1224: int bufSize = tag->rawNameLength * 4;
1225: bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
1226: tag->buf = realloc(tag->buf, bufSize);
1227: if (!tag->buf)
1228: return XML_ERROR_NO_MEMORY;
1229: tag->bufEnd = tag->buf + bufSize;
1230: }
1231: memcpy(tag->buf, tag->rawName, tag->rawNameLength);
1232: tag->rawName = tag->buf;
1233: }
1234: ++tagLevel;
1235: if (startElementHandler) {
1236: enum XML_Error result;
1237: XML_Char *toPtr;
1238: for (;;) {
1239: const char *rawNameEnd = tag->rawName + tag->rawNameLength;
1240: const char *fromPtr = tag->rawName;
1241: int bufSize;
1242: if (nextPtr)
1243: toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
1244: else
1245: toPtr = (XML_Char *)tag->buf;
1.3 ! kahan 1246: tag->name.str = toPtr;
1.1 frystyk 1247: XmlConvert(enc,
1248: &fromPtr, rawNameEnd,
1249: (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
1250: if (fromPtr == rawNameEnd)
1251: break;
1252: bufSize = (tag->bufEnd - tag->buf) << 1;
1253: tag->buf = realloc(tag->buf, bufSize);
1254: if (!tag->buf)
1255: return XML_ERROR_NO_MEMORY;
1256: tag->bufEnd = tag->buf + bufSize;
1257: if (nextPtr)
1258: tag->rawName = tag->buf;
1259: }
1260: *toPtr = XML_T('\0');
1.3 ! kahan 1261: result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings));
1.1 frystyk 1262: if (result)
1263: return result;
1.3 ! kahan 1264: startElementHandler(handlerArg, tag->name.str, (const XML_Char **)atts);
1.1 frystyk 1265: poolClear(&tempPool);
1266: }
1267: else {
1.3 ! kahan 1268: tag->name.str = 0;
1.1 frystyk 1269: if (defaultHandler)
1270: reportDefault(parser, enc, s, next);
1271: }
1272: break;
1273: }
1274: case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1275: if (!startElementHandler) {
1.3 ! kahan 1276: enum XML_Error result = storeAtts(parser, enc, s, 0, 0);
1.1 frystyk 1277: if (result)
1278: return result;
1279: }
1280: /* fall through */
1281: case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1282: if (startElementHandler || endElementHandler) {
1283: const char *rawName = s + enc->minBytesPerChar;
1.3 ! kahan 1284: enum XML_Error result;
! 1285: BINDING *bindings = 0;
! 1286: TAG_NAME name;
! 1287: name.str = poolStoreString(&tempPool, enc, rawName,
! 1288: rawName + XmlNameLength(enc, rawName));
! 1289: if (!name.str)
1.1 frystyk 1290: return XML_ERROR_NO_MEMORY;
1291: poolFinish(&tempPool);
1.3 ! kahan 1292: result = storeAtts(parser, enc, s, &name, &bindings);
! 1293: if (result)
! 1294: return result;
! 1295: poolFinish(&tempPool);
! 1296: if (startElementHandler)
! 1297: startElementHandler(handlerArg, name.str, (const XML_Char **)atts);
1.1 frystyk 1298: if (endElementHandler) {
1299: if (startElementHandler)
1300: *eventPP = *eventEndPP;
1.3 ! kahan 1301: endElementHandler(handlerArg, name.str);
1.1 frystyk 1302: }
1303: poolClear(&tempPool);
1.3 ! kahan 1304: while (bindings) {
! 1305: BINDING *b = bindings;
! 1306: if (endNamespaceDeclHandler)
! 1307: endNamespaceDeclHandler(handlerArg, b->prefix->name);
! 1308: bindings = bindings->nextTagBinding;
! 1309: b->nextTagBinding = freeBindingList;
! 1310: freeBindingList = b;
! 1311: b->prefix->binding = b->prevPrefixBinding;
! 1312: }
1.1 frystyk 1313: }
1314: else if (defaultHandler)
1315: reportDefault(parser, enc, s, next);
1316: if (tagLevel == 0)
1317: return epilogProcessor(parser, next, end, nextPtr);
1318: break;
1319: case XML_TOK_END_TAG:
1320: if (tagLevel == startTagLevel)
1321: return XML_ERROR_ASYNC_ENTITY;
1322: else {
1323: int len;
1324: const char *rawName;
1325: TAG *tag = tagStack;
1326: tagStack = tag->parent;
1327: tag->parent = freeTagList;
1328: freeTagList = tag;
1329: rawName = s + enc->minBytesPerChar*2;
1330: len = XmlNameLength(enc, rawName);
1331: if (len != tag->rawNameLength
1332: || memcmp(tag->rawName, rawName, len) != 0) {
1333: *eventPP = rawName;
1334: return XML_ERROR_TAG_MISMATCH;
1335: }
1336: --tagLevel;
1.3 ! kahan 1337: if (endElementHandler && tag->name.str) {
! 1338: if (tag->name.localPart) {
! 1339: XML_Char *to = (XML_Char *)tag->name.str + tag->name.uriLen;
! 1340: const XML_Char *from = tag->name.localPart;
! 1341: while ((*to++ = *from++) != 0)
! 1342: ;
1.1 frystyk 1343: }
1.3 ! kahan 1344: endElementHandler(handlerArg, tag->name.str);
1.1 frystyk 1345: }
1346: else if (defaultHandler)
1347: reportDefault(parser, enc, s, next);
1.3 ! kahan 1348: while (tag->bindings) {
! 1349: BINDING *b = tag->bindings;
! 1350: if (endNamespaceDeclHandler)
! 1351: endNamespaceDeclHandler(handlerArg, b->prefix->name);
! 1352: tag->bindings = tag->bindings->nextTagBinding;
! 1353: b->nextTagBinding = freeBindingList;
! 1354: freeBindingList = b;
! 1355: b->prefix->binding = b->prevPrefixBinding;
! 1356: }
1.1 frystyk 1357: if (tagLevel == 0)
1358: return epilogProcessor(parser, next, end, nextPtr);
1359: }
1360: break;
1361: case XML_TOK_CHAR_REF:
1362: {
1363: int n = XmlCharRefNumber(enc, s);
1364: if (n < 0)
1365: return XML_ERROR_BAD_CHAR_REF;
1366: if (characterDataHandler) {
1367: XML_Char buf[XML_ENCODE_MAX];
1368: characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1369: }
1370: else if (defaultHandler)
1371: reportDefault(parser, enc, s, next);
1372: }
1373: break;
1374: case XML_TOK_XML_DECL:
1375: return XML_ERROR_MISPLACED_XML_PI;
1376: case XML_TOK_DATA_NEWLINE:
1377: if (characterDataHandler) {
1.3 ! kahan 1378: XML_Char c = 0xA;
1.1 frystyk 1379: characterDataHandler(handlerArg, &c, 1);
1380: }
1381: else if (defaultHandler)
1382: reportDefault(parser, enc, s, next);
1383: break;
1384: case XML_TOK_CDATA_SECT_OPEN:
1385: {
1386: enum XML_Error result;
1.3 ! kahan 1387: if (startCdataSectionHandler)
! 1388: startCdataSectionHandler(handlerArg);
! 1389: #if 0
! 1390: /* Suppose you doing a transformation on a document that involves
! 1391: changing only the character data. You set up a defaultHandler
! 1392: and a characterDataHandler. The defaultHandler simply copies
! 1393: characters through. The characterDataHandler does the transformation
! 1394: and writes the characters out escaping them as necessary. This case
! 1395: will fail to work if we leave out the following two lines (because &
! 1396: and < inside CDATA sections will be incorrectly escaped).
! 1397:
! 1398: However, now we have a start/endCdataSectionHandler, so it seems
! 1399: easier to let the user deal with this. */
! 1400:
! 1401: else if (characterDataHandler)
1.1 frystyk 1402: characterDataHandler(handlerArg, dataBuf, 0);
1.3 ! kahan 1403: #endif
1.1 frystyk 1404: else if (defaultHandler)
1405: reportDefault(parser, enc, s, next);
1406: result = doCdataSection(parser, enc, &next, end, nextPtr);
1407: if (!next) {
1408: processor = cdataSectionProcessor;
1409: return result;
1410: }
1411: }
1412: break;
1413: case XML_TOK_TRAILING_RSQB:
1414: if (nextPtr) {
1415: *nextPtr = s;
1416: return XML_ERROR_NONE;
1417: }
1418: if (characterDataHandler) {
1419: if (MUST_CONVERT(enc, s)) {
1420: ICHAR *dataPtr = (ICHAR *)dataBuf;
1421: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1422: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1423: }
1424: else
1425: characterDataHandler(handlerArg,
1426: (XML_Char *)s,
1427: (XML_Char *)end - (XML_Char *)s);
1428: }
1429: else if (defaultHandler)
1430: reportDefault(parser, enc, s, end);
1431: if (startTagLevel == 0) {
1432: *eventPP = end;
1433: return XML_ERROR_NO_ELEMENTS;
1434: }
1435: if (tagLevel != startTagLevel) {
1436: *eventPP = end;
1437: return XML_ERROR_ASYNC_ENTITY;
1438: }
1439: return XML_ERROR_NONE;
1440: case XML_TOK_DATA_CHARS:
1441: if (characterDataHandler) {
1442: if (MUST_CONVERT(enc, s)) {
1443: for (;;) {
1444: ICHAR *dataPtr = (ICHAR *)dataBuf;
1445: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1446: *eventEndPP = s;
1447: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1448: if (s == next)
1449: break;
1450: *eventPP = s;
1451: }
1452: }
1453: else
1454: characterDataHandler(handlerArg,
1455: (XML_Char *)s,
1456: (XML_Char *)next - (XML_Char *)s);
1457: }
1458: else if (defaultHandler)
1459: reportDefault(parser, enc, s, next);
1460: break;
1461: case XML_TOK_PI:
1462: if (!reportProcessingInstruction(parser, enc, s, next))
1463: return XML_ERROR_NO_MEMORY;
1464: break;
1.3 ! kahan 1465: case XML_TOK_COMMENT:
! 1466: if (!reportComment(parser, enc, s, next))
! 1467: return XML_ERROR_NO_MEMORY;
! 1468: break;
1.1 frystyk 1469: default:
1470: if (defaultHandler)
1471: reportDefault(parser, enc, s, next);
1472: break;
1473: }
1474: *eventPP = s = next;
1475: }
1476: /* not reached */
1477: }
1478:
1.3 ! kahan 1479: /* If tagNamePtr is non-null, build a real list of attributes,
1.1 frystyk 1480: otherwise just check the attributes for well-formedness. */
1481:
1482: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1.3 ! kahan 1483: const char *s, TAG_NAME *tagNamePtr,
! 1484: BINDING **bindingsPtr)
1.1 frystyk 1485: {
1486: ELEMENT_TYPE *elementType = 0;
1487: int nDefaultAtts = 0;
1488: const XML_Char **appAtts;
1.3 ! kahan 1489: int attIndex = 0;
1.1 frystyk 1490: int i;
1491: int n;
1.3 ! kahan 1492: int nPrefixes = 0;
! 1493: BINDING *binding;
! 1494: const XML_Char *localPart;
! 1495:
! 1496: if (tagNamePtr) {
! 1497: elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, 0);
! 1498: if (!elementType) {
! 1499: tagNamePtr->str = poolCopyString(&dtd.pool, tagNamePtr->str);
! 1500: if (!tagNamePtr->str)
! 1501: return XML_ERROR_NO_MEMORY;
! 1502: elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagNamePtr->str, sizeof(ELEMENT_TYPE));
! 1503: if (!elementType)
! 1504: return XML_ERROR_NO_MEMORY;
! 1505: if (ns && !setElementTypePrefix(parser, elementType))
! 1506: return XML_ERROR_NO_MEMORY;
! 1507: }
! 1508: nDefaultAtts = elementType->nDefaultAtts;
1.1 frystyk 1509: }
1510: n = XmlGetAttributes(enc, s, attsSize, atts);
1511: if (n + nDefaultAtts > attsSize) {
1512: int oldAttsSize = attsSize;
1513: attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1514: atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
1515: if (!atts)
1516: return XML_ERROR_NO_MEMORY;
1517: if (n > oldAttsSize)
1518: XmlGetAttributes(enc, s, n, atts);
1519: }
1520: appAtts = (const XML_Char **)atts;
1521: for (i = 0; i < n; i++) {
1522: ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1.3 ! kahan 1523: atts[i].name
! 1524: + XmlNameLength(enc, atts[i].name));
1.1 frystyk 1525: if (!attId)
1526: return XML_ERROR_NO_MEMORY;
1527: if ((attId->name)[-1]) {
1528: if (enc == encoding)
1529: eventPtr = atts[i].name;
1530: return XML_ERROR_DUPLICATE_ATTRIBUTE;
1531: }
1532: (attId->name)[-1] = 1;
1.3 ! kahan 1533: appAtts[attIndex++] = attId->name;
1.1 frystyk 1534: if (!atts[i].normalized) {
1535: enum XML_Error result;
1536: int isCdata = 1;
1537:
1538: if (attId->maybeTokenized) {
1539: int j;
1540: for (j = 0; j < nDefaultAtts; j++) {
1541: if (attId == elementType->defaultAtts[j].id) {
1542: isCdata = elementType->defaultAtts[j].isCdata;
1543: break;
1544: }
1545: }
1546: }
1547:
1548: result = storeAttributeValue(parser, enc, isCdata,
1549: atts[i].valuePtr, atts[i].valueEnd,
1550: &tempPool);
1551: if (result)
1552: return result;
1.3 ! kahan 1553: if (tagNamePtr) {
! 1554: appAtts[attIndex] = poolStart(&tempPool);
1.1 frystyk 1555: poolFinish(&tempPool);
1556: }
1557: else
1558: poolDiscard(&tempPool);
1559: }
1.3 ! kahan 1560: else if (tagNamePtr) {
! 1561: appAtts[attIndex] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
! 1562: if (appAtts[attIndex] == 0)
1.1 frystyk 1563: return XML_ERROR_NO_MEMORY;
1564: poolFinish(&tempPool);
1565: }
1.3 ! kahan 1566: if (attId->prefix && tagNamePtr) {
! 1567: if (attId->xmlns) {
! 1568: if (!addBinding(parser, attId->prefix, attId, appAtts[attIndex], bindingsPtr))
! 1569: return XML_ERROR_NO_MEMORY;
! 1570: --attIndex;
! 1571: }
! 1572: else {
! 1573: attIndex++;
! 1574: nPrefixes++;
! 1575: (attId->name)[-1] = 2;
! 1576: }
! 1577: }
! 1578: else
! 1579: attIndex++;
1.1 frystyk 1580: }
1.3 ! kahan 1581: nSpecifiedAtts = attIndex;
! 1582: if (tagNamePtr) {
1.1 frystyk 1583: int j;
1584: for (j = 0; j < nDefaultAtts; j++) {
1585: const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
1586: if (!(da->id->name)[-1] && da->value) {
1.3 ! kahan 1587: if (da->id->prefix) {
! 1588: if (da->id->xmlns) {
! 1589: if (!addBinding(parser, da->id->prefix, da->id, da->value, bindingsPtr))
! 1590: return XML_ERROR_NO_MEMORY;
! 1591: }
! 1592: else {
! 1593: (da->id->name)[-1] = 2;
! 1594: nPrefixes++;
! 1595: appAtts[attIndex++] = da->id->name;
! 1596: appAtts[attIndex++] = da->value;
! 1597: }
! 1598: }
! 1599: else {
! 1600: (da->id->name)[-1] = 1;
! 1601: appAtts[attIndex++] = da->id->name;
! 1602: appAtts[attIndex++] = da->value;
! 1603: }
! 1604: }
! 1605: }
! 1606: appAtts[attIndex] = 0;
! 1607: }
! 1608: i = 0;
! 1609: if (nPrefixes) {
! 1610: for (; i < attIndex; i += 2) {
! 1611: if (appAtts[i][-1] == 2) {
! 1612: ATTRIBUTE_ID *id;
! 1613: ((XML_Char *)(appAtts[i]))[-1] = 0;
! 1614: id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, appAtts[i], 0);
! 1615: if (id->prefix->binding) {
! 1616: int j;
! 1617: const BINDING *b = id->prefix->binding;
! 1618: const XML_Char *s = appAtts[i];
! 1619: for (j = 0; j < b->uriLen; j++) {
! 1620: if (!poolAppendChar(&tempPool, b->uri[j]))
! 1621: return XML_ERROR_NO_MEMORY;
! 1622: }
! 1623: while (*s++ != ':')
! 1624: ;
! 1625: do {
! 1626: if (!poolAppendChar(&tempPool, *s))
! 1627: return XML_ERROR_NO_MEMORY;
! 1628: } while (*s++);
! 1629: appAtts[i] = poolStart(&tempPool);
! 1630: poolFinish(&tempPool);
! 1631: }
! 1632: if (!--nPrefixes)
! 1633: break;
1.1 frystyk 1634: }
1.3 ! kahan 1635: else
! 1636: ((XML_Char *)(appAtts[i]))[-1] = 0;
1.1 frystyk 1637: }
1638: }
1.3 ! kahan 1639: for (; i < attIndex; i += 2)
! 1640: ((XML_Char *)(appAtts[i]))[-1] = 0;
! 1641: if (!tagNamePtr)
! 1642: return XML_ERROR_NONE;
! 1643: for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding)
! 1644: binding->attId->name[-1] = 0;
! 1645: if (elementType->prefix) {
! 1646: binding = elementType->prefix->binding;
! 1647: if (!binding)
! 1648: return XML_ERROR_NONE;
! 1649: localPart = tagNamePtr->str;
! 1650: while (*localPart++ != XML_T(':'))
! 1651: ;
! 1652: }
! 1653: else if (dtd.defaultPrefix.binding) {
! 1654: binding = dtd.defaultPrefix.binding;
! 1655: localPart = tagNamePtr->str;
! 1656: }
! 1657: else
! 1658: return XML_ERROR_NONE;
! 1659: tagNamePtr->localPart = localPart;
! 1660: tagNamePtr->uriLen = binding->uriLen;
! 1661: i = binding->uriLen;
! 1662: do {
! 1663: if (i == binding->uriAlloc) {
! 1664: binding->uri = realloc(binding->uri, binding->uriAlloc *= 2);
! 1665: if (!binding->uri)
! 1666: return XML_ERROR_NO_MEMORY;
! 1667: }
! 1668: binding->uri[i++] = *localPart;
! 1669: } while (*localPart++);
! 1670: tagNamePtr->str = binding->uri;
1.1 frystyk 1671: return XML_ERROR_NONE;
1672: }
1673:
1.3 ! kahan 1674: static
! 1675: int addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr)
! 1676: {
! 1677: BINDING *b;
! 1678: int len;
! 1679: for (len = 0; uri[len]; len++)
! 1680: ;
! 1681: if (namespaceSeparator)
! 1682: len++;
! 1683: if (freeBindingList) {
! 1684: b = freeBindingList;
! 1685: if (len > b->uriAlloc) {
! 1686: b->uri = realloc(b->uri, len + EXPAND_SPARE);
! 1687: if (!b->uri)
! 1688: return 0;
! 1689: b->uriAlloc = len + EXPAND_SPARE;
! 1690: }
! 1691: freeBindingList = b->nextTagBinding;
! 1692: }
! 1693: else {
! 1694: b = malloc(sizeof(BINDING));
! 1695: if (!b)
! 1696: return 0;
! 1697: b->uri = malloc(sizeof(XML_Char) * len + EXPAND_SPARE);
! 1698: if (!b->uri) {
! 1699: free(b);
! 1700: return 0;
! 1701: }
! 1702: b->uriAlloc = len;
! 1703: }
! 1704: b->uriLen = len;
! 1705: memcpy(b->uri, uri, len * sizeof(XML_Char));
! 1706: if (namespaceSeparator)
! 1707: b->uri[len - 1] = namespaceSeparator;
! 1708: b->prefix = prefix;
! 1709: b->attId = attId;
! 1710: b->prevPrefixBinding = prefix->binding;
! 1711: if (*uri == XML_T('\0') && prefix == &dtd.defaultPrefix)
! 1712: prefix->binding = 0;
! 1713: else
! 1714: prefix->binding = b;
! 1715: b->nextTagBinding = *bindingsPtr;
! 1716: *bindingsPtr = b;
! 1717: if (startNamespaceDeclHandler)
! 1718: startNamespaceDeclHandler(handlerArg, prefix->name,
! 1719: prefix->binding ? uri : 0);
! 1720: return 1;
! 1721: }
! 1722:
1.1 frystyk 1723: /* The idea here is to avoid using stack for each CDATA section when
1724: the whole file is parsed with one call. */
1725:
1726: static
1727: enum XML_Error cdataSectionProcessor(XML_Parser parser,
1728: const char *start,
1729: const char *end,
1730: const char **endPtr)
1731: {
1732: enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
1733: if (start) {
1734: processor = contentProcessor;
1735: return contentProcessor(parser, start, end, endPtr);
1736: }
1737: return result;
1738: }
1739:
1740: /* startPtr gets set to non-null is the section is closed, and to null if
1741: the section is not yet closed. */
1742:
1743: static
1744: enum XML_Error doCdataSection(XML_Parser parser,
1745: const ENCODING *enc,
1746: const char **startPtr,
1747: const char *end,
1748: const char **nextPtr)
1749: {
1750: const char *s = *startPtr;
1751: const char **eventPP;
1752: const char **eventEndPP;
1753: if (enc == encoding) {
1754: eventPP = &eventPtr;
1755: *eventPP = s;
1756: eventEndPP = &eventEndPtr;
1757: }
1.3 ! kahan 1758: else {
! 1759: eventPP = &(openInternalEntities->internalEventPtr);
! 1760: eventEndPP = &(openInternalEntities->internalEventEndPtr);
! 1761: }
! 1762: *eventPP = s;
1.1 frystyk 1763: *startPtr = 0;
1764: for (;;) {
1765: const char *next;
1766: int tok = XmlCdataSectionTok(enc, s, end, &next);
1767: *eventEndPP = next;
1768: switch (tok) {
1769: case XML_TOK_CDATA_SECT_CLOSE:
1.3 ! kahan 1770: if (endCdataSectionHandler)
! 1771: endCdataSectionHandler(handlerArg);
! 1772: #if 0
! 1773: /* see comment under XML_TOK_CDATA_SECT_OPEN */
! 1774: else if (characterDataHandler)
1.1 frystyk 1775: characterDataHandler(handlerArg, dataBuf, 0);
1.3 ! kahan 1776: #endif
1.1 frystyk 1777: else if (defaultHandler)
1778: reportDefault(parser, enc, s, next);
1779: *startPtr = next;
1780: return XML_ERROR_NONE;
1781: case XML_TOK_DATA_NEWLINE:
1782: if (characterDataHandler) {
1.3 ! kahan 1783: XML_Char c = 0xA;
1.1 frystyk 1784: characterDataHandler(handlerArg, &c, 1);
1785: }
1786: else if (defaultHandler)
1787: reportDefault(parser, enc, s, next);
1788: break;
1789: case XML_TOK_DATA_CHARS:
1790: if (characterDataHandler) {
1791: if (MUST_CONVERT(enc, s)) {
1792: for (;;) {
1793: ICHAR *dataPtr = (ICHAR *)dataBuf;
1794: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1795: *eventEndPP = next;
1796: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1797: if (s == next)
1798: break;
1799: *eventPP = s;
1800: }
1801: }
1802: else
1803: characterDataHandler(handlerArg,
1804: (XML_Char *)s,
1805: (XML_Char *)next - (XML_Char *)s);
1806: }
1807: else if (defaultHandler)
1808: reportDefault(parser, enc, s, next);
1809: break;
1810: case XML_TOK_INVALID:
1811: *eventPP = next;
1812: return XML_ERROR_INVALID_TOKEN;
1813: case XML_TOK_PARTIAL_CHAR:
1814: if (nextPtr) {
1815: *nextPtr = s;
1816: return XML_ERROR_NONE;
1817: }
1818: return XML_ERROR_PARTIAL_CHAR;
1819: case XML_TOK_PARTIAL:
1820: case XML_TOK_NONE:
1821: if (nextPtr) {
1822: *nextPtr = s;
1823: return XML_ERROR_NONE;
1824: }
1825: return XML_ERROR_UNCLOSED_CDATA_SECTION;
1826: default:
1827: abort();
1828: }
1829: *eventPP = s = next;
1830: }
1831: /* not reached */
1832: }
1833:
1834: static enum XML_Error
1835: initializeEncoding(XML_Parser parser)
1836: {
1837: const char *s;
1838: #ifdef XML_UNICODE
1839: char encodingBuf[128];
1840: if (!protocolEncodingName)
1841: s = 0;
1842: else {
1843: int i;
1844: for (i = 0; protocolEncodingName[i]; i++) {
1845: if (i == sizeof(encodingBuf) - 1
1846: || protocolEncodingName[i] >= 0x80
1847: || protocolEncodingName[i] < 0) {
1848: encodingBuf[0] = '\0';
1849: break;
1850: }
1851: encodingBuf[i] = (char)protocolEncodingName[i];
1852: }
1853: encodingBuf[i] = '\0';
1854: s = encodingBuf;
1855: }
1856: #else
1857: s = protocolEncodingName;
1858: #endif
1.3 ! kahan 1859: if ((ns ? XmlInitEncodingNS : XmlInitEncoding)(&initEncoding, &encoding, s))
1.1 frystyk 1860: return XML_ERROR_NONE;
1861: return handleUnknownEncoding(parser, protocolEncodingName);
1862: }
1863:
1864: static enum XML_Error
1865: processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
1866: const char *s, const char *next)
1867: {
1868: const char *encodingName = 0;
1869: const ENCODING *newEncoding = 0;
1870: const char *version;
1871: int standalone = -1;
1.3 ! kahan 1872: if (!(ns
! 1873: ? XmlParseXmlDeclNS
! 1874: : XmlParseXmlDecl)(isGeneralTextEntity,
! 1875: encoding,
! 1876: s,
! 1877: next,
! 1878: &eventPtr,
! 1879: &version,
! 1880: &encodingName,
! 1881: &newEncoding,
! 1882: &standalone))
1.1 frystyk 1883: return XML_ERROR_SYNTAX;
1884: if (!isGeneralTextEntity && standalone == 1)
1885: dtd.standalone = 1;
1886: if (defaultHandler)
1887: reportDefault(parser, encoding, s, next);
1888: if (!protocolEncodingName) {
1889: if (newEncoding) {
1890: if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
1891: eventPtr = encodingName;
1892: return XML_ERROR_INCORRECT_ENCODING;
1893: }
1894: encoding = newEncoding;
1895: }
1896: else if (encodingName) {
1897: enum XML_Error result;
1898: const XML_Char *s = poolStoreString(&tempPool,
1899: encoding,
1900: encodingName,
1901: encodingName
1902: + XmlNameLength(encoding, encodingName));
1903: if (!s)
1904: return XML_ERROR_NO_MEMORY;
1905: result = handleUnknownEncoding(parser, s);
1906: poolDiscard(&tempPool);
1907: if (result == XML_ERROR_UNKNOWN_ENCODING)
1908: eventPtr = encodingName;
1909: return result;
1910: }
1911: }
1912: return XML_ERROR_NONE;
1913: }
1914:
1915: static enum XML_Error
1916: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
1917: {
1918: if (unknownEncodingHandler) {
1919: XML_Encoding info;
1920: int i;
1921: for (i = 0; i < 256; i++)
1922: info.map[i] = -1;
1923: info.convert = 0;
1924: info.data = 0;
1925: info.release = 0;
1926: if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
1927: ENCODING *enc;
1928: unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding());
1929: if (!unknownEncodingMem) {
1930: if (info.release)
1931: info.release(info.data);
1932: return XML_ERROR_NO_MEMORY;
1933: }
1.3 ! kahan 1934: enc = (ns
! 1935: ? XmlInitUnknownEncodingNS
! 1936: : XmlInitUnknownEncoding)(unknownEncodingMem,
! 1937: info.map,
! 1938: info.convert,
! 1939: info.data);
1.1 frystyk 1940: if (enc) {
1941: unknownEncodingData = info.data;
1942: unknownEncodingRelease = info.release;
1943: encoding = enc;
1944: return XML_ERROR_NONE;
1945: }
1946: }
1947: if (info.release)
1948: info.release(info.data);
1949: }
1950: return XML_ERROR_UNKNOWN_ENCODING;
1951: }
1952:
1953: static enum XML_Error
1954: prologInitProcessor(XML_Parser parser,
1955: const char *s,
1956: const char *end,
1957: const char **nextPtr)
1958: {
1959: enum XML_Error result = initializeEncoding(parser);
1960: if (result != XML_ERROR_NONE)
1961: return result;
1962: processor = prologProcessor;
1963: return prologProcessor(parser, s, end, nextPtr);
1964: }
1965:
1966: static enum XML_Error
1967: prologProcessor(XML_Parser parser,
1968: const char *s,
1969: const char *end,
1970: const char **nextPtr)
1971: {
1972: for (;;) {
1973: const char *next;
1974: int tok = XmlPrologTok(encoding, s, end, &next);
1975: if (tok <= 0) {
1976: if (nextPtr != 0 && tok != XML_TOK_INVALID) {
1977: *nextPtr = s;
1978: return XML_ERROR_NONE;
1979: }
1980: switch (tok) {
1981: case XML_TOK_INVALID:
1982: eventPtr = next;
1983: return XML_ERROR_INVALID_TOKEN;
1984: case XML_TOK_NONE:
1985: return XML_ERROR_NO_ELEMENTS;
1986: case XML_TOK_PARTIAL:
1987: return XML_ERROR_UNCLOSED_TOKEN;
1988: case XML_TOK_PARTIAL_CHAR:
1989: return XML_ERROR_PARTIAL_CHAR;
1990: case XML_TOK_TRAILING_CR:
1991: eventPtr = s + encoding->minBytesPerChar;
1992: return XML_ERROR_NO_ELEMENTS;
1993: default:
1994: abort();
1995: }
1996: }
1997: switch (XmlTokenRole(&prologState, tok, s, next, encoding)) {
1998: case XML_ROLE_XML_DECL:
1999: {
2000: enum XML_Error result = processXmlDecl(parser, 0, s, next);
2001: if (result != XML_ERROR_NONE)
2002: return result;
2003: }
2004: break;
2005: case XML_ROLE_DOCTYPE_SYSTEM_ID:
1.3 ! kahan 2006: if (!dtd.standalone
! 2007: && notStandaloneHandler
! 2008: && !notStandaloneHandler(handlerArg))
! 2009: return XML_ERROR_NOT_STANDALONE;
1.1 frystyk 2010: hadExternalDoctype = 1;
2011: break;
2012: case XML_ROLE_DOCTYPE_PUBLIC_ID:
2013: case XML_ROLE_ENTITY_PUBLIC_ID:
2014: if (!XmlIsPublicId(encoding, s, next, &eventPtr))
2015: return XML_ERROR_SYNTAX;
2016: if (declEntity) {
2017: XML_Char *tem = poolStoreString(&dtd.pool,
2018: encoding,
2019: s + encoding->minBytesPerChar,
2020: next - encoding->minBytesPerChar);
2021: if (!tem)
2022: return XML_ERROR_NO_MEMORY;
2023: normalizePublicId(tem);
2024: declEntity->publicId = tem;
2025: poolFinish(&dtd.pool);
2026: }
2027: break;
2028: case XML_ROLE_INSTANCE_START:
2029: processor = contentProcessor;
2030: if (hadExternalDoctype)
2031: dtd.complete = 0;
2032: return contentProcessor(parser, s, end, nextPtr);
2033: case XML_ROLE_ATTLIST_ELEMENT_NAME:
2034: {
2035: const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next);
2036: if (!name)
2037: return XML_ERROR_NO_MEMORY;
2038: declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
2039: if (!declElementType)
2040: return XML_ERROR_NO_MEMORY;
2041: if (declElementType->name != name)
2042: poolDiscard(&dtd.pool);
1.3 ! kahan 2043: else {
1.1 frystyk 2044: poolFinish(&dtd.pool);
1.3 ! kahan 2045: if (!setElementTypePrefix(parser, declElementType))
! 2046: return XML_ERROR_NO_MEMORY;
! 2047: }
1.1 frystyk 2048: break;
2049: }
2050: case XML_ROLE_ATTRIBUTE_NAME:
2051: declAttributeId = getAttributeId(parser, encoding, s, next);
2052: if (!declAttributeId)
2053: return XML_ERROR_NO_MEMORY;
2054: declAttributeIsCdata = 0;
2055: break;
2056: case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
2057: declAttributeIsCdata = 1;
2058: break;
2059: case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
2060: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
2061: if (dtd.complete
2062: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
2063: return XML_ERROR_NO_MEMORY;
2064: break;
2065: case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
2066: case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
2067: {
2068: const XML_Char *attVal;
2069: enum XML_Error result
2070: = storeAttributeValue(parser, encoding, declAttributeIsCdata,
2071: s + encoding->minBytesPerChar,
2072: next - encoding->minBytesPerChar,
2073: &dtd.pool);
2074: if (result)
2075: return result;
2076: attVal = poolStart(&dtd.pool);
2077: poolFinish(&dtd.pool);
2078: if (dtd.complete
2079: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
2080: return XML_ERROR_NO_MEMORY;
2081: break;
2082: }
2083: case XML_ROLE_ENTITY_VALUE:
2084: {
2085: enum XML_Error result = storeEntityValue(parser, s, next);
2086: if (result != XML_ERROR_NONE)
2087: return result;
2088: }
2089: break;
2090: case XML_ROLE_ENTITY_SYSTEM_ID:
2091: if (declEntity) {
2092: declEntity->systemId = poolStoreString(&dtd.pool, encoding,
2093: s + encoding->minBytesPerChar,
2094: next - encoding->minBytesPerChar);
2095: if (!declEntity->systemId)
2096: return XML_ERROR_NO_MEMORY;
2097: declEntity->base = dtd.base;
2098: poolFinish(&dtd.pool);
2099: }
2100: break;
2101: case XML_ROLE_ENTITY_NOTATION_NAME:
2102: if (declEntity) {
2103: declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next);
2104: if (!declEntity->notation)
2105: return XML_ERROR_NO_MEMORY;
2106: poolFinish(&dtd.pool);
2107: if (unparsedEntityDeclHandler) {
2108: eventPtr = eventEndPtr = s;
2109: unparsedEntityDeclHandler(handlerArg,
2110: declEntity->name,
2111: declEntity->base,
2112: declEntity->systemId,
2113: declEntity->publicId,
2114: declEntity->notation);
2115: }
2116:
2117: }
2118: break;
2119: case XML_ROLE_GENERAL_ENTITY_NAME:
2120: {
2121: const XML_Char *name;
2122: if (XmlPredefinedEntityName(encoding, s, next)) {
2123: declEntity = 0;
2124: break;
2125: }
2126: name = poolStoreString(&dtd.pool, encoding, s, next);
2127: if (!name)
2128: return XML_ERROR_NO_MEMORY;
2129: if (dtd.complete) {
2130: declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
2131: if (!declEntity)
2132: return XML_ERROR_NO_MEMORY;
2133: if (declEntity->name != name) {
2134: poolDiscard(&dtd.pool);
2135: declEntity = 0;
2136: }
2137: else
2138: poolFinish(&dtd.pool);
2139: }
2140: else {
2141: poolDiscard(&dtd.pool);
2142: declEntity = 0;
2143: }
2144: }
2145: break;
2146: case XML_ROLE_PARAM_ENTITY_NAME:
2147: declEntity = 0;
2148: break;
2149: case XML_ROLE_NOTATION_NAME:
2150: declNotationPublicId = 0;
2151: declNotationName = 0;
2152: if (notationDeclHandler) {
2153: declNotationName = poolStoreString(&tempPool, encoding, s, next);
2154: if (!declNotationName)
2155: return XML_ERROR_NO_MEMORY;
2156: poolFinish(&tempPool);
2157: }
2158: break;
2159: case XML_ROLE_NOTATION_PUBLIC_ID:
2160: if (!XmlIsPublicId(encoding, s, next, &eventPtr))
2161: return XML_ERROR_SYNTAX;
2162: if (declNotationName) {
2163: XML_Char *tem = poolStoreString(&tempPool,
2164: encoding,
2165: s + encoding->minBytesPerChar,
2166: next - encoding->minBytesPerChar);
2167: if (!tem)
2168: return XML_ERROR_NO_MEMORY;
2169: normalizePublicId(tem);
2170: declNotationPublicId = tem;
2171: poolFinish(&tempPool);
2172: }
2173: break;
2174: case XML_ROLE_NOTATION_SYSTEM_ID:
2175: if (declNotationName && notationDeclHandler) {
2176: const XML_Char *systemId
2177: = poolStoreString(&tempPool, encoding,
2178: s + encoding->minBytesPerChar,
2179: next - encoding->minBytesPerChar);
2180: if (!systemId)
2181: return XML_ERROR_NO_MEMORY;
2182: eventPtr = eventEndPtr = s;
2183: notationDeclHandler(handlerArg,
2184: declNotationName,
2185: dtd.base,
2186: systemId,
2187: declNotationPublicId);
2188: }
2189: poolClear(&tempPool);
2190: break;
2191: case XML_ROLE_NOTATION_NO_SYSTEM_ID:
2192: if (declNotationPublicId && notationDeclHandler) {
2193: eventPtr = eventEndPtr = s;
2194: notationDeclHandler(handlerArg,
2195: declNotationName,
2196: dtd.base,
2197: 0,
2198: declNotationPublicId);
2199: }
2200: poolClear(&tempPool);
2201: break;
2202: case XML_ROLE_ERROR:
2203: eventPtr = s;
2204: switch (tok) {
2205: case XML_TOK_PARAM_ENTITY_REF:
2206: return XML_ERROR_PARAM_ENTITY_REF;
2207: case XML_TOK_XML_DECL:
2208: return XML_ERROR_MISPLACED_XML_PI;
2209: default:
2210: return XML_ERROR_SYNTAX;
2211: }
2212: case XML_ROLE_GROUP_OPEN:
2213: if (prologState.level >= groupSize) {
2214: if (groupSize)
2215: groupConnector = realloc(groupConnector, groupSize *= 2);
2216: else
2217: groupConnector = malloc(groupSize = 32);
2218: if (!groupConnector)
2219: return XML_ERROR_NO_MEMORY;
2220: }
2221: groupConnector[prologState.level] = 0;
2222: break;
2223: case XML_ROLE_GROUP_SEQUENCE:
2224: if (groupConnector[prologState.level] == '|') {
2225: eventPtr = s;
2226: return XML_ERROR_SYNTAX;
2227: }
2228: groupConnector[prologState.level] = ',';
2229: break;
2230: case XML_ROLE_GROUP_CHOICE:
2231: if (groupConnector[prologState.level] == ',') {
2232: eventPtr = s;
2233: return XML_ERROR_SYNTAX;
2234: }
2235: groupConnector[prologState.level] = '|';
2236: break;
2237: case XML_ROLE_PARAM_ENTITY_REF:
1.3 ! kahan 2238: if (!dtd.standalone
! 2239: && notStandaloneHandler
! 2240: && !notStandaloneHandler(handlerArg))
! 2241: return XML_ERROR_NOT_STANDALONE;
1.1 frystyk 2242: dtd.complete = 0;
2243: break;
2244: case XML_ROLE_NONE:
2245: switch (tok) {
2246: case XML_TOK_PI:
2247: eventPtr = s;
2248: eventEndPtr = next;
2249: if (!reportProcessingInstruction(parser, encoding, s, next))
2250: return XML_ERROR_NO_MEMORY;
2251: break;
1.3 ! kahan 2252: case XML_TOK_COMMENT:
! 2253: eventPtr = s;
! 2254: eventEndPtr = next;
! 2255: if (!reportComment(parser, encoding, s, next))
! 2256: return XML_ERROR_NO_MEMORY;
! 2257: break;
1.1 frystyk 2258: }
2259: break;
2260: }
2261: if (defaultHandler) {
2262: switch (tok) {
2263: case XML_TOK_PI:
1.3 ! kahan 2264: case XML_TOK_COMMENT:
1.1 frystyk 2265: case XML_TOK_BOM:
2266: case XML_TOK_XML_DECL:
2267: break;
2268: default:
2269: eventPtr = s;
2270: eventEndPtr = next;
2271: reportDefault(parser, encoding, s, next);
2272: }
2273: }
2274: s = next;
2275: }
2276: /* not reached */
2277: }
2278:
2279: static
2280: enum XML_Error epilogProcessor(XML_Parser parser,
2281: const char *s,
2282: const char *end,
2283: const char **nextPtr)
2284: {
2285: processor = epilogProcessor;
2286: eventPtr = s;
2287: for (;;) {
2288: const char *next;
2289: int tok = XmlPrologTok(encoding, s, end, &next);
2290: eventEndPtr = next;
2291: switch (tok) {
2292: case XML_TOK_TRAILING_CR:
2293: if (defaultHandler) {
2294: eventEndPtr = end;
2295: reportDefault(parser, encoding, s, end);
2296: }
2297: /* fall through */
2298: case XML_TOK_NONE:
2299: if (nextPtr)
2300: *nextPtr = end;
2301: return XML_ERROR_NONE;
2302: case XML_TOK_PROLOG_S:
2303: if (defaultHandler)
2304: reportDefault(parser, encoding, s, next);
2305: break;
2306: case XML_TOK_PI:
2307: if (!reportProcessingInstruction(parser, encoding, s, next))
2308: return XML_ERROR_NO_MEMORY;
2309: break;
1.3 ! kahan 2310: case XML_TOK_COMMENT:
! 2311: if (!reportComment(parser, encoding, s, next))
! 2312: return XML_ERROR_NO_MEMORY;
! 2313: break;
1.1 frystyk 2314: case XML_TOK_INVALID:
2315: eventPtr = next;
2316: return XML_ERROR_INVALID_TOKEN;
2317: case XML_TOK_PARTIAL:
2318: if (nextPtr) {
2319: *nextPtr = s;
2320: return XML_ERROR_NONE;
2321: }
2322: return XML_ERROR_UNCLOSED_TOKEN;
2323: case XML_TOK_PARTIAL_CHAR:
2324: if (nextPtr) {
2325: *nextPtr = s;
2326: return XML_ERROR_NONE;
2327: }
2328: return XML_ERROR_PARTIAL_CHAR;
2329: default:
2330: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
2331: }
2332: eventPtr = s = next;
2333: }
2334: }
2335:
2336: static
2337: enum XML_Error errorProcessor(XML_Parser parser,
2338: const char *s,
2339: const char *end,
2340: const char **nextPtr)
2341: {
2342: return errorCode;
2343: }
2344:
2345: static enum XML_Error
2346: storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2347: const char *ptr, const char *end,
2348: STRING_POOL *pool)
2349: {
2350: enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
2351: if (result)
2352: return result;
1.3 ! kahan 2353: if (!isCdata && poolLength(pool) && poolLastChar(pool) == 0x20)
1.1 frystyk 2354: poolChop(pool);
2355: if (!poolAppendChar(pool, XML_T('\0')))
2356: return XML_ERROR_NO_MEMORY;
2357: return XML_ERROR_NONE;
2358: }
2359:
2360: static enum XML_Error
2361: appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
2362: const char *ptr, const char *end,
2363: STRING_POOL *pool)
2364: {
1.3 ! kahan 2365: const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
1.1 frystyk 2366: for (;;) {
2367: const char *next;
2368: int tok = XmlAttributeValueTok(enc, ptr, end, &next);
2369: switch (tok) {
2370: case XML_TOK_NONE:
2371: return XML_ERROR_NONE;
2372: case XML_TOK_INVALID:
2373: if (enc == encoding)
2374: eventPtr = next;
2375: return XML_ERROR_INVALID_TOKEN;
2376: case XML_TOK_PARTIAL:
2377: if (enc == encoding)
2378: eventPtr = ptr;
2379: return XML_ERROR_INVALID_TOKEN;
2380: case XML_TOK_CHAR_REF:
2381: {
2382: XML_Char buf[XML_ENCODE_MAX];
2383: int i;
2384: int n = XmlCharRefNumber(enc, ptr);
2385: if (n < 0) {
2386: if (enc == encoding)
2387: eventPtr = ptr;
2388: return XML_ERROR_BAD_CHAR_REF;
2389: }
2390: if (!isCdata
2391: && n == 0x20 /* space */
1.3 ! kahan 2392: && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
1.1 frystyk 2393: break;
2394: n = XmlEncode(n, (ICHAR *)buf);
2395: if (!n) {
2396: if (enc == encoding)
2397: eventPtr = ptr;
2398: return XML_ERROR_BAD_CHAR_REF;
2399: }
2400: for (i = 0; i < n; i++) {
2401: if (!poolAppendChar(pool, buf[i]))
2402: return XML_ERROR_NO_MEMORY;
2403: }
2404: }
2405: break;
2406: case XML_TOK_DATA_CHARS:
2407: if (!poolAppend(pool, enc, ptr, next))
2408: return XML_ERROR_NO_MEMORY;
2409: break;
2410: break;
2411: case XML_TOK_TRAILING_CR:
2412: next = ptr + enc->minBytesPerChar;
2413: /* fall through */
2414: case XML_TOK_ATTRIBUTE_VALUE_S:
2415: case XML_TOK_DATA_NEWLINE:
1.3 ! kahan 2416: if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == 0x20))
1.1 frystyk 2417: break;
1.3 ! kahan 2418: if (!poolAppendChar(pool, 0x20))
1.1 frystyk 2419: return XML_ERROR_NO_MEMORY;
2420: break;
2421: case XML_TOK_ENTITY_REF:
2422: {
2423: const XML_Char *name;
2424: ENTITY *entity;
2425: XML_Char ch = XmlPredefinedEntityName(enc,
2426: ptr + enc->minBytesPerChar,
2427: next - enc->minBytesPerChar);
2428: if (ch) {
2429: if (!poolAppendChar(pool, ch))
2430: return XML_ERROR_NO_MEMORY;
2431: break;
2432: }
2433: name = poolStoreString(&temp2Pool, enc,
2434: ptr + enc->minBytesPerChar,
2435: next - enc->minBytesPerChar);
2436: if (!name)
2437: return XML_ERROR_NO_MEMORY;
2438: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
2439: poolDiscard(&temp2Pool);
2440: if (!entity) {
2441: if (dtd.complete) {
2442: if (enc == encoding)
2443: eventPtr = ptr;
2444: return XML_ERROR_UNDEFINED_ENTITY;
2445: }
2446: }
2447: else if (entity->open) {
2448: if (enc == encoding)
2449: eventPtr = ptr;
2450: return XML_ERROR_RECURSIVE_ENTITY_REF;
2451: }
2452: else if (entity->notation) {
2453: if (enc == encoding)
2454: eventPtr = ptr;
2455: return XML_ERROR_BINARY_ENTITY_REF;
2456: }
2457: else if (!entity->textPtr) {
2458: if (enc == encoding)
2459: eventPtr = ptr;
2460: return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
2461: }
2462: else {
2463: enum XML_Error result;
2464: const XML_Char *textEnd = entity->textPtr + entity->textLen;
2465: entity->open = 1;
2466: result = appendAttributeValue(parser, internalEnc, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
2467: entity->open = 0;
2468: if (result)
2469: return result;
2470: }
2471: }
2472: break;
2473: default:
2474: abort();
2475: }
2476: ptr = next;
2477: }
2478: /* not reached */
2479: }
2480:
2481: static
2482: enum XML_Error storeEntityValue(XML_Parser parser,
2483: const char *entityTextPtr,
2484: const char *entityTextEnd)
2485: {
1.3 ! kahan 2486: const ENCODING *internalEnc = ns ? XmlGetInternalEncodingNS() : XmlGetInternalEncoding();
1.1 frystyk 2487: STRING_POOL *pool = &(dtd.pool);
2488: entityTextPtr += encoding->minBytesPerChar;
2489: entityTextEnd -= encoding->minBytesPerChar;
2490: for (;;) {
2491: const char *next;
2492: int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next);
2493: switch (tok) {
2494: case XML_TOK_PARAM_ENTITY_REF:
2495: eventPtr = entityTextPtr;
2496: return XML_ERROR_SYNTAX;
2497: case XML_TOK_NONE:
2498: if (declEntity) {
2499: declEntity->textPtr = pool->start;
2500: declEntity->textLen = pool->ptr - pool->start;
2501: poolFinish(pool);
2502: }
2503: else
2504: poolDiscard(pool);
2505: return XML_ERROR_NONE;
2506: case XML_TOK_ENTITY_REF:
2507: case XML_TOK_DATA_CHARS:
2508: if (!poolAppend(pool, encoding, entityTextPtr, next))
2509: return XML_ERROR_NO_MEMORY;
2510: break;
2511: case XML_TOK_TRAILING_CR:
2512: next = entityTextPtr + encoding->minBytesPerChar;
2513: /* fall through */
2514: case XML_TOK_DATA_NEWLINE:
2515: if (pool->end == pool->ptr && !poolGrow(pool))
2516: return XML_ERROR_NO_MEMORY;
1.3 ! kahan 2517: *(pool->ptr)++ = 0xA;
1.1 frystyk 2518: break;
2519: case XML_TOK_CHAR_REF:
2520: {
2521: XML_Char buf[XML_ENCODE_MAX];
2522: int i;
2523: int n = XmlCharRefNumber(encoding, entityTextPtr);
2524: if (n < 0) {
2525: eventPtr = entityTextPtr;
2526: return XML_ERROR_BAD_CHAR_REF;
2527: }
2528: n = XmlEncode(n, (ICHAR *)buf);
2529: if (!n) {
2530: eventPtr = entityTextPtr;
2531: return XML_ERROR_BAD_CHAR_REF;
2532: }
2533: for (i = 0; i < n; i++) {
2534: if (pool->end == pool->ptr && !poolGrow(pool))
2535: return XML_ERROR_NO_MEMORY;
2536: *(pool->ptr)++ = buf[i];
2537: }
2538: }
2539: break;
2540: case XML_TOK_PARTIAL:
2541: eventPtr = entityTextPtr;
2542: return XML_ERROR_INVALID_TOKEN;
2543: case XML_TOK_INVALID:
2544: eventPtr = next;
2545: return XML_ERROR_INVALID_TOKEN;
2546: default:
2547: abort();
2548: }
2549: entityTextPtr = next;
2550: }
2551: /* not reached */
2552: }
2553:
2554: static void
2555: normalizeLines(XML_Char *s)
2556: {
2557: XML_Char *p;
2558: for (;; s++) {
2559: if (*s == XML_T('\0'))
2560: return;
1.3 ! kahan 2561: if (*s == 0xD)
1.1 frystyk 2562: break;
2563: }
2564: p = s;
2565: do {
1.3 ! kahan 2566: if (*s == 0xD) {
! 2567: *p++ = 0xA;
! 2568: if (*++s == 0xA)
1.1 frystyk 2569: s++;
2570: }
2571: else
2572: *p++ = *s++;
2573: } while (*s);
2574: *p = XML_T('\0');
2575: }
2576:
2577: static int
2578: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2579: {
2580: const XML_Char *target;
2581: XML_Char *data;
2582: const char *tem;
2583: if (!processingInstructionHandler) {
2584: if (defaultHandler)
2585: reportDefault(parser, enc, start, end);
2586: return 1;
2587: }
2588: start += enc->minBytesPerChar * 2;
2589: tem = start + XmlNameLength(enc, start);
2590: target = poolStoreString(&tempPool, enc, start, tem);
2591: if (!target)
2592: return 0;
2593: poolFinish(&tempPool);
2594: data = poolStoreString(&tempPool, enc,
2595: XmlSkipS(enc, tem),
2596: end - enc->minBytesPerChar*2);
2597: if (!data)
2598: return 0;
2599: normalizeLines(data);
2600: processingInstructionHandler(handlerArg, target, data);
2601: poolClear(&tempPool);
2602: return 1;
2603: }
2604:
1.3 ! kahan 2605: static int
! 2606: reportComment(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
! 2607: {
! 2608: XML_Char *data;
! 2609: if (!commentHandler) {
! 2610: if (defaultHandler)
! 2611: reportDefault(parser, enc, start, end);
! 2612: return 1;
! 2613: }
! 2614: data = poolStoreString(&tempPool,
! 2615: enc,
! 2616: start + enc->minBytesPerChar * 4,
! 2617: end - enc->minBytesPerChar * 3);
! 2618: if (!data)
! 2619: return 0;
! 2620: normalizeLines(data);
! 2621: commentHandler(handlerArg, data);
! 2622: poolClear(&tempPool);
! 2623: return 1;
! 2624: }
! 2625:
1.1 frystyk 2626: static void
2627: reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
2628: {
2629: if (MUST_CONVERT(enc, s)) {
1.3 ! kahan 2630: const char **eventPP;
! 2631: const char **eventEndPP;
! 2632: if (enc == encoding) {
! 2633: eventPP = &eventPtr;
! 2634: eventEndPP = &eventEndPtr;
! 2635: }
! 2636: else {
! 2637: eventPP = &(openInternalEntities->internalEventPtr);
! 2638: eventEndPP = &(openInternalEntities->internalEventEndPtr);
! 2639: }
! 2640: do {
1.1 frystyk 2641: ICHAR *dataPtr = (ICHAR *)dataBuf;
2642: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1.3 ! kahan 2643: *eventEndPP = s;
! 2644: defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
! 2645: *eventPP = s;
! 2646: } while (s != end);
1.1 frystyk 2647: }
2648: else
2649: defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
2650: }
2651:
2652:
2653: static int
2654: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
2655: {
2656: DEFAULT_ATTRIBUTE *att;
2657: if (type->nDefaultAtts == type->allocDefaultAtts) {
2658: if (type->allocDefaultAtts == 0) {
2659: type->allocDefaultAtts = 8;
2660: type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2661: }
2662: else {
2663: type->allocDefaultAtts *= 2;
2664: type->defaultAtts = realloc(type->defaultAtts,
2665: type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2666: }
2667: if (!type->defaultAtts)
2668: return 0;
2669: }
2670: att = type->defaultAtts + type->nDefaultAtts;
2671: att->id = attId;
2672: att->value = value;
2673: att->isCdata = isCdata;
2674: if (!isCdata)
2675: attId->maybeTokenized = 1;
2676: type->nDefaultAtts += 1;
2677: return 1;
2678: }
2679:
1.3 ! kahan 2680: static int setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType)
! 2681: {
! 2682: const XML_Char *name;
! 2683: for (name = elementType->name; *name; name++) {
! 2684: if (*name == XML_T(':')) {
! 2685: PREFIX *prefix;
! 2686: const XML_Char *s;
! 2687: for (s = elementType->name; s != name; s++) {
! 2688: if (!poolAppendChar(&dtd.pool, *s))
! 2689: return 0;
! 2690: }
! 2691: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
! 2692: return 0;
! 2693: prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
! 2694: if (!prefix)
! 2695: return 0;
! 2696: if (prefix->name == poolStart(&dtd.pool))
! 2697: poolFinish(&dtd.pool);
! 2698: else
! 2699: poolDiscard(&dtd.pool);
! 2700: elementType->prefix = prefix;
! 2701:
! 2702: }
! 2703: }
! 2704: return 1;
! 2705: }
! 2706:
1.1 frystyk 2707: static ATTRIBUTE_ID *
2708: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2709: {
2710: ATTRIBUTE_ID *id;
2711: const XML_Char *name;
2712: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
2713: return 0;
2714: name = poolStoreString(&dtd.pool, enc, start, end);
2715: if (!name)
2716: return 0;
2717: ++name;
2718: id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
2719: if (!id)
2720: return 0;
2721: if (id->name != name)
2722: poolDiscard(&dtd.pool);
1.3 ! kahan 2723: else {
1.1 frystyk 2724: poolFinish(&dtd.pool);
1.3 ! kahan 2725: if (!ns)
! 2726: ;
! 2727: else if (name[0] == 'x'
! 2728: && name[1] == 'm'
! 2729: && name[2] == 'l'
! 2730: && name[3] == 'n'
! 2731: && name[4] == 's'
! 2732: && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) {
! 2733: if (name[5] == '\0')
! 2734: id->prefix = &dtd.defaultPrefix;
! 2735: else
! 2736: id->prefix = (PREFIX *)lookup(&dtd.prefixes, name + 6, sizeof(PREFIX));
! 2737: id->xmlns = 1;
! 2738: }
! 2739: else {
! 2740: int i;
! 2741: for (i = 0; name[i]; i++) {
! 2742: if (name[i] == XML_T(':')) {
! 2743: int j;
! 2744: for (j = 0; j < i; j++) {
! 2745: if (!poolAppendChar(&dtd.pool, name[j]))
! 2746: return 0;
! 2747: }
! 2748: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
! 2749: return 0;
! 2750: id->prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&dtd.pool), sizeof(PREFIX));
! 2751: if (id->prefix->name == poolStart(&dtd.pool))
! 2752: poolFinish(&dtd.pool);
! 2753: else
! 2754: poolDiscard(&dtd.pool);
! 2755: break;
! 2756: }
! 2757: }
! 2758: }
! 2759: }
1.1 frystyk 2760: return id;
2761: }
2762:
1.3 ! kahan 2763: #define CONTEXT_SEP XML_T('\f')
! 2764:
1.1 frystyk 2765: static
1.3 ! kahan 2766: const XML_Char *getContext(XML_Parser parser)
1.1 frystyk 2767: {
2768: HASH_TABLE_ITER iter;
1.3 ! kahan 2769: int needSep = 0;
! 2770:
! 2771: if (dtd.defaultPrefix.binding) {
! 2772: int i;
! 2773: int len;
! 2774: if (!poolAppendChar(&tempPool, XML_T('=')))
! 2775: return 0;
! 2776: len = dtd.defaultPrefix.binding->uriLen;
! 2777: if (namespaceSeparator != XML_T('\0'))
! 2778: len--;
! 2779: for (i = 0; i < len; i++)
! 2780: if (!poolAppendChar(&tempPool, dtd.defaultPrefix.binding->uri[i]))
! 2781: return 0;
! 2782: needSep = 1;
! 2783: }
! 2784:
! 2785: hashTableIterInit(&iter, &(dtd.prefixes));
! 2786: for (;;) {
! 2787: int i;
! 2788: int len;
! 2789: const XML_Char *s;
! 2790: PREFIX *prefix = (PREFIX *)hashTableIterNext(&iter);
! 2791: if (!prefix)
! 2792: break;
! 2793: if (!prefix->binding)
! 2794: continue;
! 2795: if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
! 2796: return 0;
! 2797: for (s = prefix->name; *s; s++)
! 2798: if (!poolAppendChar(&tempPool, *s))
! 2799: return 0;
! 2800: if (!poolAppendChar(&tempPool, XML_T('=')))
! 2801: return 0;
! 2802: len = prefix->binding->uriLen;
! 2803: if (namespaceSeparator != XML_T('\0'))
! 2804: len--;
! 2805: for (i = 0; i < len; i++)
! 2806: if (!poolAppendChar(&tempPool, prefix->binding->uri[i]))
! 2807: return 0;
! 2808: needSep = 1;
! 2809: }
! 2810:
1.1 frystyk 2811:
2812: hashTableIterInit(&iter, &(dtd.generalEntities));
2813: for (;;) {
2814: const XML_Char *s;
2815: ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
2816: if (!e)
2817: break;
2818: if (!e->open)
2819: continue;
1.3 ! kahan 2820: if (needSep && !poolAppendChar(&tempPool, CONTEXT_SEP))
1.1 frystyk 2821: return 0;
2822: for (s = e->name; *s; s++)
2823: if (!poolAppendChar(&tempPool, *s))
2824: return 0;
1.3 ! kahan 2825: needSep = 1;
1.1 frystyk 2826: }
2827:
2828: if (!poolAppendChar(&tempPool, XML_T('\0')))
2829: return 0;
2830: return tempPool.start;
2831: }
2832:
2833: static
1.3 ! kahan 2834: int setContext(XML_Parser parser, const XML_Char *context)
1.1 frystyk 2835: {
1.3 ! kahan 2836: const XML_Char *s = context;
! 2837:
! 2838: while (*context != XML_T('\0')) {
! 2839: if (*s == CONTEXT_SEP || *s == XML_T('\0')) {
1.1 frystyk 2840: ENTITY *e;
2841: if (!poolAppendChar(&tempPool, XML_T('\0')))
2842: return 0;
2843: e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
2844: if (e)
2845: e->open = 1;
1.3 ! kahan 2846: if (*s != XML_T('\0'))
1.1 frystyk 2847: s++;
1.3 ! kahan 2848: context = s;
! 2849: poolDiscard(&tempPool);
! 2850: }
! 2851: else if (*s == '=') {
! 2852: PREFIX *prefix;
! 2853: if (poolLength(&tempPool) == 0)
! 2854: prefix = &dtd.defaultPrefix;
! 2855: else {
! 2856: if (!poolAppendChar(&tempPool, XML_T('\0')))
! 2857: return 0;
! 2858: prefix = (PREFIX *)lookup(&dtd.prefixes, poolStart(&tempPool), sizeof(PREFIX));
! 2859: if (!prefix)
! 2860: return 0;
! 2861: if (prefix->name == poolStart(&tempPool))
! 2862: poolFinish(&tempPool);
! 2863: else
! 2864: poolDiscard(&tempPool);
! 2865: }
! 2866: for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); context++)
! 2867: if (!poolAppendChar(&tempPool, *context))
! 2868: return 0;
! 2869: if (!poolAppendChar(&tempPool, XML_T('\0')))
! 2870: return 0;
! 2871: if (!addBinding(parser, prefix, 0, poolStart(&tempPool), &inheritedBindings))
! 2872: return 0;
1.1 frystyk 2873: poolDiscard(&tempPool);
1.3 ! kahan 2874: if (*context != XML_T('\0'))
! 2875: ++context;
! 2876: s = context;
1.1 frystyk 2877: }
2878: else {
2879: if (!poolAppendChar(&tempPool, *s))
2880: return 0;
2881: s++;
2882: }
2883: }
2884: return 1;
2885: }
2886:
2887:
2888: static
2889: void normalizePublicId(XML_Char *publicId)
2890: {
2891: XML_Char *p = publicId;
2892: XML_Char *s;
2893: for (s = publicId; *s; s++) {
2894: switch (*s) {
1.3 ! kahan 2895: case 0x20:
! 2896: case 0xD:
! 2897: case 0xA:
! 2898: if (p != publicId && p[-1] != 0x20)
! 2899: *p++ = 0x20;
1.1 frystyk 2900: break;
2901: default:
2902: *p++ = *s;
2903: }
2904: }
1.3 ! kahan 2905: if (p != publicId && p[-1] == 0x20)
1.1 frystyk 2906: --p;
2907: *p = XML_T('\0');
2908: }
2909:
2910: static int dtdInit(DTD *p)
2911: {
2912: poolInit(&(p->pool));
2913: hashTableInit(&(p->generalEntities));
2914: hashTableInit(&(p->elementTypes));
2915: hashTableInit(&(p->attributeIds));
1.3 ! kahan 2916: hashTableInit(&(p->prefixes));
1.1 frystyk 2917: p->complete = 1;
2918: p->standalone = 0;
2919: p->base = 0;
1.3 ! kahan 2920: p->defaultPrefix.name = 0;
! 2921: p->defaultPrefix.binding = 0;
1.1 frystyk 2922: return 1;
2923: }
2924:
2925: static void dtdDestroy(DTD *p)
2926: {
2927: HASH_TABLE_ITER iter;
2928: hashTableIterInit(&iter, &(p->elementTypes));
2929: for (;;) {
2930: ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
2931: if (!e)
2932: break;
2933: if (e->allocDefaultAtts != 0)
2934: free(e->defaultAtts);
2935: }
2936: hashTableDestroy(&(p->generalEntities));
2937: hashTableDestroy(&(p->elementTypes));
2938: hashTableDestroy(&(p->attributeIds));
1.3 ! kahan 2939: hashTableDestroy(&(p->prefixes));
1.1 frystyk 2940: poolDestroy(&(p->pool));
2941: }
2942:
2943: /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
2944: The new DTD has already been initialized. */
2945:
2946: static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
2947: {
2948: HASH_TABLE_ITER iter;
2949:
2950: if (oldDtd->base) {
2951: const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->base);
2952: if (!tem)
2953: return 0;
2954: newDtd->base = tem;
2955: }
2956:
1.3 ! kahan 2957: /* Copy the prefix table. */
! 2958:
! 2959: hashTableIterInit(&iter, &(oldDtd->prefixes));
! 2960: for (;;) {
! 2961: const XML_Char *name;
! 2962: const PREFIX *oldP = (PREFIX *)hashTableIterNext(&iter);
! 2963: if (!oldP)
! 2964: break;
! 2965: name = poolCopyString(&(newDtd->pool), oldP->name);
! 2966: if (!name)
! 2967: return 0;
! 2968: if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX)))
! 2969: return 0;
! 2970: }
! 2971:
1.1 frystyk 2972: hashTableIterInit(&iter, &(oldDtd->attributeIds));
2973:
2974: /* Copy the attribute id table. */
2975:
2976: for (;;) {
2977: ATTRIBUTE_ID *newA;
2978: const XML_Char *name;
2979: const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
2980:
2981: if (!oldA)
2982: break;
2983: /* Remember to allocate the scratch byte before the name. */
2984: if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
2985: return 0;
2986: name = poolCopyString(&(newDtd->pool), oldA->name);
2987: if (!name)
2988: return 0;
2989: ++name;
2990: newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
2991: if (!newA)
2992: return 0;
2993: newA->maybeTokenized = oldA->maybeTokenized;
1.3 ! kahan 2994: if (oldA->prefix) {
! 2995: newA->xmlns = oldA->xmlns;
! 2996: if (oldA->prefix == &oldDtd->defaultPrefix)
! 2997: newA->prefix = &newDtd->defaultPrefix;
! 2998: else
! 2999: newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldA->prefix->name, 0);
! 3000: }
1.1 frystyk 3001: }
3002:
3003: /* Copy the element type table. */
3004:
3005: hashTableIterInit(&iter, &(oldDtd->elementTypes));
3006:
3007: for (;;) {
3008: int i;
3009: ELEMENT_TYPE *newE;
3010: const XML_Char *name;
3011: const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
3012: if (!oldE)
3013: break;
3014: name = poolCopyString(&(newDtd->pool), oldE->name);
3015: if (!name)
3016: return 0;
3017: newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
3018: if (!newE)
3019: return 0;
1.3 ! kahan 3020: if (oldE->nDefaultAtts) {
! 3021: newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
! 3022: if (!newE->defaultAtts)
! 3023: return 0;
! 3024: }
1.1 frystyk 3025: newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
1.3 ! kahan 3026: if (oldE->prefix)
! 3027: newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), oldE->prefix->name, 0);
1.1 frystyk 3028: for (i = 0; i < newE->nDefaultAtts; i++) {
3029: newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
3030: newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
3031: if (oldE->defaultAtts[i].value) {
3032: newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
3033: if (!newE->defaultAtts[i].value)
3034: return 0;
3035: }
3036: else
3037: newE->defaultAtts[i].value = 0;
3038: }
3039: }
3040:
3041: /* Copy the entity table. */
3042:
3043: hashTableIterInit(&iter, &(oldDtd->generalEntities));
3044:
3045: for (;;) {
3046: ENTITY *newE;
3047: const XML_Char *name;
3048: const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
3049: if (!oldE)
3050: break;
3051: name = poolCopyString(&(newDtd->pool), oldE->name);
3052: if (!name)
3053: return 0;
3054: newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY));
3055: if (!newE)
3056: return 0;
3057: if (oldE->systemId) {
3058: const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId);
3059: if (!tem)
3060: return 0;
3061: newE->systemId = tem;
3062: if (oldE->base) {
3063: if (oldE->base == oldDtd->base)
3064: newE->base = newDtd->base;
3065: tem = poolCopyString(&(newDtd->pool), oldE->base);
3066: if (!tem)
3067: return 0;
3068: newE->base = tem;
3069: }
3070: }
3071: else {
3072: const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen);
3073: if (!tem)
3074: return 0;
3075: newE->textPtr = tem;
3076: newE->textLen = oldE->textLen;
3077: }
3078: if (oldE->notation) {
3079: const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation);
3080: if (!tem)
3081: return 0;
3082: newE->notation = tem;
3083: }
3084: }
3085:
3086: newDtd->complete = oldDtd->complete;
3087: newDtd->standalone = oldDtd->standalone;
3088: return 1;
3089: }
3090:
3091: static
3092: void poolInit(STRING_POOL *pool)
3093: {
3094: pool->blocks = 0;
3095: pool->freeBlocks = 0;
3096: pool->start = 0;
3097: pool->ptr = 0;
3098: pool->end = 0;
3099: }
3100:
3101: static
3102: void poolClear(STRING_POOL *pool)
3103: {
3104: if (!pool->freeBlocks)
3105: pool->freeBlocks = pool->blocks;
3106: else {
3107: BLOCK *p = pool->blocks;
3108: while (p) {
3109: BLOCK *tem = p->next;
3110: p->next = pool->freeBlocks;
3111: pool->freeBlocks = p;
3112: p = tem;
3113: }
3114: }
3115: pool->blocks = 0;
3116: pool->start = 0;
3117: pool->ptr = 0;
3118: pool->end = 0;
3119: }
3120:
3121: static
3122: void poolDestroy(STRING_POOL *pool)
3123: {
3124: BLOCK *p = pool->blocks;
3125: while (p) {
3126: BLOCK *tem = p->next;
3127: free(p);
3128: p = tem;
3129: }
3130: pool->blocks = 0;
3131: p = pool->freeBlocks;
3132: while (p) {
3133: BLOCK *tem = p->next;
3134: free(p);
3135: p = tem;
3136: }
3137: pool->freeBlocks = 0;
3138: pool->ptr = 0;
3139: pool->start = 0;
3140: pool->end = 0;
3141: }
3142:
3143: static
3144: XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
3145: const char *ptr, const char *end)
3146: {
3147: if (!pool->ptr && !poolGrow(pool))
3148: return 0;
3149: for (;;) {
3150: XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
3151: if (ptr == end)
3152: break;
3153: if (!poolGrow(pool))
3154: return 0;
3155: }
3156: return pool->start;
3157: }
3158:
3159: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
3160: {
3161: do {
3162: if (!poolAppendChar(pool, *s))
3163: return 0;
3164: } while (*s++);
3165: s = pool->start;
3166: poolFinish(pool);
3167: return s;
3168: }
3169:
3170: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
3171: {
3172: if (!pool->ptr && !poolGrow(pool))
3173: return 0;
3174: for (; n > 0; --n, s++) {
3175: if (!poolAppendChar(pool, *s))
3176: return 0;
3177:
3178: }
3179: s = pool->start;
3180: poolFinish(pool);
3181: return s;
3182: }
3183:
3184: static
3185: XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
3186: const char *ptr, const char *end)
3187: {
3188: if (!poolAppend(pool, enc, ptr, end))
3189: return 0;
3190: if (pool->ptr == pool->end && !poolGrow(pool))
3191: return 0;
3192: *(pool->ptr)++ = 0;
3193: return pool->start;
3194: }
3195:
3196: static
3197: int poolGrow(STRING_POOL *pool)
3198: {
3199: if (pool->freeBlocks) {
3200: if (pool->start == 0) {
3201: pool->blocks = pool->freeBlocks;
3202: pool->freeBlocks = pool->freeBlocks->next;
3203: pool->blocks->next = 0;
3204: pool->start = pool->blocks->s;
3205: pool->end = pool->start + pool->blocks->size;
3206: pool->ptr = pool->start;
3207: return 1;
3208: }
3209: if (pool->end - pool->start < pool->freeBlocks->size) {
3210: BLOCK *tem = pool->freeBlocks->next;
3211: pool->freeBlocks->next = pool->blocks;
3212: pool->blocks = pool->freeBlocks;
3213: pool->freeBlocks = tem;
3214: memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
3215: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3216: pool->start = pool->blocks->s;
3217: pool->end = pool->start + pool->blocks->size;
3218: return 1;
3219: }
3220: }
3221: if (pool->blocks && pool->start == pool->blocks->s) {
3222: int blockSize = (pool->end - pool->start)*2;
3223: pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3224: if (!pool->blocks)
3225: return 0;
3226: pool->blocks->size = blockSize;
3227: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
3228: pool->start = pool->blocks->s;
3229: pool->end = pool->start + blockSize;
3230: }
3231: else {
3232: BLOCK *tem;
3233: int blockSize = pool->end - pool->start;
3234: if (blockSize < INIT_BLOCK_SIZE)
3235: blockSize = INIT_BLOCK_SIZE;
3236: else
3237: blockSize *= 2;
3238: tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
3239: if (!tem)
3240: return 0;
3241: tem->size = blockSize;
3242: tem->next = pool->blocks;
3243: pool->blocks = tem;
3244: memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
3245: pool->ptr = tem->s + (pool->ptr - pool->start);
3246: pool->start = tem->s;
3247: pool->end = tem->s + blockSize;
3248: }
3249: return 1;
3250: }
Webmaster