Annotation of libwww/modules/expat/xmlparse/xmlparse.c, revision 1.2
1.1 frystyk 1: /*
2: The contents of this file are subject to the Mozilla Public License
3: Version 1.0 (the "License"); you may not use this file except in
4: compliance with the License. You may obtain a copy of the License at
5: http://www.mozilla.org/MPL/
6:
7: Software distributed under the License is distributed on an "AS IS"
8: basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9: License for the specific language governing rights and limitations
10: under the License.
11:
12: The Original Code is expat.
13:
14: The Initial Developer of the Original Code is James Clark.
15: Portions created by James Clark are Copyright (C) 1998
16: James Clark. All Rights Reserved.
17:
18: Contributor(s):
19: */
20:
21: #include <stdlib.h>
22: #include <string.h>
23: #include <stddef.h>
24:
25: #include "xmldef.h"
26:
27: #ifdef XML_UNICODE
28: #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX
29: #define XmlConvert XmlUtf16Convert
30: #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
31: #define XmlEncode XmlUtf16Encode
32: #define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
33: typedef unsigned short ICHAR;
34: #else
35: #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
36: #define XmlConvert XmlUtf8Convert
37: #define XmlGetInternalEncoding XmlGetUtf8InternalEncoding
38: #define XmlEncode XmlUtf8Encode
39: #define MUST_CONVERT(enc, s) (!(enc)->isUtf8)
40: typedef char ICHAR;
41: #endif
42:
43: #ifdef XML_UNICODE_WCHAR_T
44: #define XML_T(x) L ## x
45: #else
46: #define XML_T(x) x
47: #endif
48:
49: /* Round up n to be a multiple of sz, where sz is a power of 2. */
50: #define ROUND_UP(n, sz) (((n) + ((sz) - 1)) & ~((sz) - 1))
51:
52: #include "xmlparse.h"
53: #include "xmltok.h"
54: #include "xmlrole.h"
55: #include "hashtable.h"
56:
57: #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */
58: #define INIT_DATA_BUF_SIZE 1024
59: #define INIT_ATTS_SIZE 16
60: #define INIT_BLOCK_SIZE 1024
61: #define INIT_BUFFER_SIZE 1024
62:
63: typedef struct tag {
64: struct tag *parent;
65: const char *rawName;
66: int rawNameLength;
67: const XML_Char *name;
68: char *buf;
69: char *bufEnd;
70: } TAG;
71:
72: typedef struct {
73: const XML_Char *name;
74: const XML_Char *textPtr;
75: int textLen;
76: const XML_Char *systemId;
77: const XML_Char *base;
78: const XML_Char *publicId;
79: const XML_Char *notation;
80: char open;
81: } ENTITY;
82:
83: typedef struct block {
84: struct block *next;
85: int size;
86: XML_Char s[1];
87: } BLOCK;
88:
89: typedef struct {
90: BLOCK *blocks;
91: BLOCK *freeBlocks;
92: const XML_Char *end;
93: XML_Char *ptr;
94: XML_Char *start;
95: } STRING_POOL;
96:
97: /* The XML_Char before the name is used to determine whether
98: an attribute has been specified. */
99: typedef struct {
100: XML_Char *name;
101: char maybeTokenized;
102: } ATTRIBUTE_ID;
103:
104: typedef struct {
105: const ATTRIBUTE_ID *id;
106: char isCdata;
107: const XML_Char *value;
108: } DEFAULT_ATTRIBUTE;
109:
110: typedef struct {
111: const XML_Char *name;
112: int nDefaultAtts;
113: int allocDefaultAtts;
114: DEFAULT_ATTRIBUTE *defaultAtts;
115: } ELEMENT_TYPE;
116:
117: typedef struct {
118: HASH_TABLE generalEntities;
119: HASH_TABLE elementTypes;
120: HASH_TABLE attributeIds;
121: STRING_POOL pool;
122: int complete;
123: int standalone;
124: const XML_Char *base;
125: } DTD;
126:
127: typedef enum XML_Error Processor(XML_Parser parser,
128: const char *start,
129: const char *end,
130: const char **endPtr);
131:
132: static Processor prologProcessor;
133: static Processor prologInitProcessor;
134: static Processor contentProcessor;
135: static Processor cdataSectionProcessor;
136: static Processor epilogProcessor;
137: static Processor errorProcessor;
138: static Processor externalEntityInitProcessor;
139: static Processor externalEntityInitProcessor2;
140: static Processor externalEntityInitProcessor3;
141: static Processor externalEntityContentProcessor;
142:
143: static enum XML_Error
144: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName);
145: static enum XML_Error
146: processXmlDecl(XML_Parser parser, int isGeneralTextEntity, const char *, const char *);
147: static enum XML_Error
148: initializeEncoding(XML_Parser parser);
149: static enum XML_Error
150: doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
151: const char *start, const char *end, const char **endPtr);
152: static enum XML_Error
153: doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr);
154: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const XML_Char *tagName, const char *s);
155: static int
156: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, int isCdata, const XML_Char *dfltValue);
157: static enum XML_Error
158: storeAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
159: STRING_POOL *);
160: static enum XML_Error
161: appendAttributeValue(XML_Parser parser, const ENCODING *, int isCdata, const char *, const char *,
162: STRING_POOL *);
163: static ATTRIBUTE_ID *
164: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
165: static enum XML_Error
166: storeEntityValue(XML_Parser parser, const char *start, const char *end);
167: static int
168: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
169: static void
170: reportDefault(XML_Parser parser, const ENCODING *enc, const char *start, const char *end);
171:
172: static const XML_Char *getOpenEntityNames(XML_Parser parser);
173: static int setOpenEntityNames(XML_Parser parser, const XML_Char *openEntityNames);
174: static void normalizePublicId(XML_Char *s);
175: static int dtdInit(DTD *);
176: static void dtdDestroy(DTD *);
177: static int dtdCopy(DTD *newDtd, const DTD *oldDtd);
178: static void poolInit(STRING_POOL *);
179: static void poolClear(STRING_POOL *);
180: static void poolDestroy(STRING_POOL *);
181: static XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
182: const char *ptr, const char *end);
183: static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
184: const char *ptr, const char *end);
185: static int poolGrow(STRING_POOL *pool);
186: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s);
187: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n);
188:
189: #define poolStart(pool) ((pool)->start)
190: #define poolEnd(pool) ((pool)->ptr)
191: #define poolLength(pool) ((pool)->ptr - (pool)->start)
192: #define poolChop(pool) ((void)--(pool->ptr))
193: #define poolLastChar(pool) (((pool)->ptr)[-1])
194: #define poolDiscard(pool) ((pool)->ptr = (pool)->start)
195: #define poolFinish(pool) ((pool)->start = (pool)->ptr)
196: #define poolAppendChar(pool, c) \
197: (((pool)->ptr == (pool)->end && !poolGrow(pool)) \
198: ? 0 \
199: : ((*((pool)->ptr)++ = c), 1))
200:
201: typedef struct {
202: /* The first member must be userData so that the XML_GetUserData macro works. */
203: void *userData;
204: void *handlerArg;
205: char *buffer;
206: /* first character to be parsed */
207: const char *bufferPtr;
208: /* past last character to be parsed */
209: char *bufferEnd;
210: /* allocated end of buffer */
211: const char *bufferLim;
212: long parseEndByteIndex;
213: const char *parseEndPtr;
214: XML_Char *dataBuf;
215: XML_Char *dataBufEnd;
216: XML_StartElementHandler startElementHandler;
217: XML_EndElementHandler endElementHandler;
218: XML_CharacterDataHandler characterDataHandler;
219: XML_ProcessingInstructionHandler processingInstructionHandler;
220: XML_DefaultHandler defaultHandler;
221: XML_UnparsedEntityDeclHandler unparsedEntityDeclHandler;
222: XML_NotationDeclHandler notationDeclHandler;
223: XML_ExternalEntityRefHandler externalEntityRefHandler;
224: XML_UnknownEncodingHandler unknownEncodingHandler;
1.2 ! frystyk 225: const ENCODING *encodingMEMBER;
1.1 frystyk 226: INIT_ENCODING initEncoding;
227: const XML_Char *protocolEncodingName;
228: void *unknownEncodingMem;
229: void *unknownEncodingData;
230: void *unknownEncodingHandlerData;
231: void (*unknownEncodingRelease)(void *);
232: PROLOG_STATE prologState;
233: Processor *processor;
234: enum XML_Error errorCode;
235: const char *eventPtr;
236: const char *eventEndPtr;
237: const char *positionPtr;
238: int tagLevel;
239: ENTITY *declEntity;
240: const XML_Char *declNotationName;
241: const XML_Char *declNotationPublicId;
242: ELEMENT_TYPE *declElementType;
243: ATTRIBUTE_ID *declAttributeId;
244: char declAttributeIsCdata;
245: DTD dtd;
246: TAG *tagStack;
247: TAG *freeTagList;
248: int attsSize;
249: ATTRIBUTE *atts;
250: POSITION position;
251: STRING_POOL tempPool;
252: STRING_POOL temp2Pool;
253: char *groupConnector;
254: unsigned groupSize;
255: int hadExternalDoctype;
256: } Parser;
257:
258: #define userData (((Parser *)parser)->userData)
259: #define handlerArg (((Parser *)parser)->handlerArg)
260: #define startElementHandler (((Parser *)parser)->startElementHandler)
261: #define endElementHandler (((Parser *)parser)->endElementHandler)
262: #define characterDataHandler (((Parser *)parser)->characterDataHandler)
263: #define processingInstructionHandler (((Parser *)parser)->processingInstructionHandler)
264: #define defaultHandler (((Parser *)parser)->defaultHandler)
265: #define unparsedEntityDeclHandler (((Parser *)parser)->unparsedEntityDeclHandler)
266: #define notationDeclHandler (((Parser *)parser)->notationDeclHandler)
267: #define externalEntityRefHandler (((Parser *)parser)->externalEntityRefHandler)
268: #define unknownEncodingHandler (((Parser *)parser)->unknownEncodingHandler)
1.2 ! frystyk 269: #define encoding (((Parser *)parser)->encodingMEMBER)
1.1 frystyk 270: #define initEncoding (((Parser *)parser)->initEncoding)
271: #define unknownEncodingMem (((Parser *)parser)->unknownEncodingMem)
272: #define unknownEncodingData (((Parser *)parser)->unknownEncodingData)
273: #define unknownEncodingHandlerData \
274: (((Parser *)parser)->unknownEncodingHandlerData)
275: #define unknownEncodingRelease (((Parser *)parser)->unknownEncodingRelease)
276: #define protocolEncodingName (((Parser *)parser)->protocolEncodingName)
277: #define prologState (((Parser *)parser)->prologState)
278: #define processor (((Parser *)parser)->processor)
279: #define errorCode (((Parser *)parser)->errorCode)
280: #define eventPtr (((Parser *)parser)->eventPtr)
281: #define eventEndPtr (((Parser *)parser)->eventEndPtr)
282: #define positionPtr (((Parser *)parser)->positionPtr)
283: #define position (((Parser *)parser)->position)
284: #define tagLevel (((Parser *)parser)->tagLevel)
285: #define buffer (((Parser *)parser)->buffer)
286: #define bufferPtr (((Parser *)parser)->bufferPtr)
287: #define bufferEnd (((Parser *)parser)->bufferEnd)
288: #define parseEndByteIndex (((Parser *)parser)->parseEndByteIndex)
289: #define parseEndPtr (((Parser *)parser)->parseEndPtr)
290: #define bufferLim (((Parser *)parser)->bufferLim)
291: #define dataBuf (((Parser *)parser)->dataBuf)
292: #define dataBufEnd (((Parser *)parser)->dataBufEnd)
293: #define dtd (((Parser *)parser)->dtd)
294: #define declEntity (((Parser *)parser)->declEntity)
295: #define declNotationName (((Parser *)parser)->declNotationName)
296: #define declNotationPublicId (((Parser *)parser)->declNotationPublicId)
297: #define declElementType (((Parser *)parser)->declElementType)
298: #define declAttributeId (((Parser *)parser)->declAttributeId)
299: #define declAttributeIsCdata (((Parser *)parser)->declAttributeIsCdata)
300: #define freeTagList (((Parser *)parser)->freeTagList)
301: #define tagStack (((Parser *)parser)->tagStack)
302: #define atts (((Parser *)parser)->atts)
303: #define attsSize (((Parser *)parser)->attsSize)
304: #define tempPool (((Parser *)parser)->tempPool)
305: #define temp2Pool (((Parser *)parser)->temp2Pool)
306: #define groupConnector (((Parser *)parser)->groupConnector)
307: #define groupSize (((Parser *)parser)->groupSize)
308: #define hadExternalDoctype (((Parser *)parser)->hadExternalDoctype)
309:
310: XML_Parser XML_ParserCreate(const XML_Char *encodingName)
311: {
312: XML_Parser parser = malloc(sizeof(Parser));
313: if (!parser)
314: return parser;
315: processor = prologInitProcessor;
316: XmlPrologStateInit(&prologState);
317: userData = 0;
318: handlerArg = 0;
319: startElementHandler = 0;
320: endElementHandler = 0;
321: characterDataHandler = 0;
322: processingInstructionHandler = 0;
323: defaultHandler = 0;
324: unparsedEntityDeclHandler = 0;
325: notationDeclHandler = 0;
326: externalEntityRefHandler = 0;
327: unknownEncodingHandler = 0;
328: buffer = 0;
329: bufferPtr = 0;
330: bufferEnd = 0;
331: parseEndByteIndex = 0;
332: parseEndPtr = 0;
333: bufferLim = 0;
334: declElementType = 0;
335: declAttributeId = 0;
336: declEntity = 0;
337: declNotationName = 0;
338: declNotationPublicId = 0;
339: memset(&position, 0, sizeof(POSITION));
340: errorCode = XML_ERROR_NONE;
341: eventPtr = 0;
342: eventEndPtr = 0;
343: positionPtr = 0;
344: tagLevel = 0;
345: tagStack = 0;
346: freeTagList = 0;
347: attsSize = INIT_ATTS_SIZE;
348: atts = malloc(attsSize * sizeof(ATTRIBUTE));
349: dataBuf = malloc(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
350: groupSize = 0;
351: groupConnector = 0;
352: hadExternalDoctype = 0;
353: unknownEncodingMem = 0;
354: unknownEncodingRelease = 0;
355: unknownEncodingData = 0;
356: unknownEncodingHandlerData = 0;
357: poolInit(&tempPool);
358: poolInit(&temp2Pool);
359: protocolEncodingName = encodingName ? poolCopyString(&tempPool, encodingName) : 0;
360: if (!dtdInit(&dtd) || !atts || !dataBuf
361: || (encodingName && !protocolEncodingName)) {
362: XML_ParserFree(parser);
363: return 0;
364: }
365: dataBufEnd = dataBuf + INIT_DATA_BUF_SIZE;
366: XmlInitEncoding(&initEncoding, &encoding, 0);
367: return parser;
368: }
369:
370: XML_Parser XML_ExternalEntityParserCreate(XML_Parser oldParser,
371: const XML_Char *openEntityNames,
372: const XML_Char *encodingName)
373: {
374: XML_Parser parser = oldParser;
375: DTD *oldDtd = &dtd;
376: XML_StartElementHandler oldStartElementHandler = startElementHandler;
377: XML_EndElementHandler oldEndElementHandler = endElementHandler;
378: XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler;
379: XML_ProcessingInstructionHandler oldProcessingInstructionHandler = processingInstructionHandler;
380: XML_DefaultHandler oldDefaultHandler = defaultHandler;
381: XML_ExternalEntityRefHandler oldExternalEntityRefHandler = externalEntityRefHandler;
382: XML_UnknownEncodingHandler oldUnknownEncodingHandler = unknownEncodingHandler;
383: void *oldUserData = userData;
384: void *oldHandlerArg = handlerArg;
385:
386: parser = XML_ParserCreate(encodingName);
387: if (!parser)
388: return 0;
389: startElementHandler = oldStartElementHandler;
390: endElementHandler = oldEndElementHandler;
391: characterDataHandler = oldCharacterDataHandler;
392: processingInstructionHandler = oldProcessingInstructionHandler;
393: defaultHandler = oldDefaultHandler;
394: externalEntityRefHandler = oldExternalEntityRefHandler;
395: unknownEncodingHandler = oldUnknownEncodingHandler;
396: userData = oldUserData;
397: if (oldUserData == oldHandlerArg)
398: handlerArg = userData;
399: else
400: handlerArg = parser;
401: if (!dtdCopy(&dtd, oldDtd) || !setOpenEntityNames(parser, openEntityNames)) {
402: XML_ParserFree(parser);
403: return 0;
404: }
405: processor = externalEntityInitProcessor;
406: return parser;
407: }
408:
409: void XML_ParserFree(XML_Parser parser)
410: {
411: for (;;) {
412: TAG *p;
413: if (tagStack == 0) {
414: if (freeTagList == 0)
415: break;
416: tagStack = freeTagList;
417: freeTagList = 0;
418: }
419: p = tagStack;
420: tagStack = tagStack->parent;
421: free(p->buf);
422: free(p);
423: }
424: poolDestroy(&tempPool);
425: poolDestroy(&temp2Pool);
426: dtdDestroy(&dtd);
427: free((void *)atts);
428: free(groupConnector);
429: free(buffer);
430: free(dataBuf);
431: free(unknownEncodingMem);
432: if (unknownEncodingRelease)
433: unknownEncodingRelease(unknownEncodingData);
434: free(parser);
435: }
436:
437: void XML_UseParserAsHandlerArg(XML_Parser parser)
438: {
439: handlerArg = parser;
440: }
441:
442: void XML_SetUserData(XML_Parser parser, void *p)
443: {
444: if (handlerArg == userData)
445: handlerArg = userData = p;
446: else
447: userData = p;
448: }
449:
450: int XML_SetBase(XML_Parser parser, const XML_Char *p)
451: {
452: if (p) {
453: p = poolCopyString(&dtd.pool, p);
454: if (!p)
455: return 0;
456: dtd.base = p;
457: }
458: else
459: dtd.base = 0;
460: return 1;
461: }
462:
463: const XML_Char *XML_GetBase(XML_Parser parser)
464: {
465: return dtd.base;
466: }
467:
468: void XML_SetElementHandler(XML_Parser parser,
469: XML_StartElementHandler start,
470: XML_EndElementHandler end)
471: {
472: startElementHandler = start;
473: endElementHandler = end;
474: }
475:
476: void XML_SetCharacterDataHandler(XML_Parser parser,
477: XML_CharacterDataHandler handler)
478: {
479: characterDataHandler = handler;
480: }
481:
482: void XML_SetProcessingInstructionHandler(XML_Parser parser,
483: XML_ProcessingInstructionHandler handler)
484: {
485: processingInstructionHandler = handler;
486: }
487:
488: void XML_SetDefaultHandler(XML_Parser parser,
489: XML_DefaultHandler handler)
490: {
491: defaultHandler = handler;
492: }
493:
494: void XML_SetUnparsedEntityDeclHandler(XML_Parser parser,
495: XML_UnparsedEntityDeclHandler handler)
496: {
497: unparsedEntityDeclHandler = handler;
498: }
499:
500: void XML_SetNotationDeclHandler(XML_Parser parser,
501: XML_NotationDeclHandler handler)
502: {
503: notationDeclHandler = handler;
504: }
505:
506: void XML_SetExternalEntityRefHandler(XML_Parser parser,
507: XML_ExternalEntityRefHandler handler)
508: {
509: externalEntityRefHandler = handler;
510: }
511:
512: void XML_SetUnknownEncodingHandler(XML_Parser parser,
513: XML_UnknownEncodingHandler handler,
514: void *data)
515: {
516: unknownEncodingHandler = handler;
517: unknownEncodingHandlerData = data;
518: }
519:
520: int XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)
521: {
522: if (len == 0) {
523: if (!isFinal)
524: return 1;
525: errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0);
526: if (errorCode == XML_ERROR_NONE)
527: return 1;
528: eventEndPtr = eventPtr;
529: return 0;
530: }
531: else if (bufferPtr == bufferEnd) {
532: const char *end;
533: int nLeftOver;
534: parseEndByteIndex += len;
535: positionPtr = s;
536: if (isFinal) {
537: errorCode = processor(parser, s, parseEndPtr = s + len, 0);
538: if (errorCode == XML_ERROR_NONE)
539: return 1;
540: eventEndPtr = eventPtr;
541: return 0;
542: }
543: errorCode = processor(parser, s, parseEndPtr = s + len, &end);
544: if (errorCode != XML_ERROR_NONE) {
545: eventEndPtr = eventPtr;
546: return 0;
547: }
548: XmlUpdatePosition(encoding, positionPtr, end, &position);
549: nLeftOver = s + len - end;
550: if (nLeftOver) {
551: if (buffer == 0 || nLeftOver > bufferLim - buffer) {
552: /* FIXME avoid integer overflow */
553: buffer = buffer == 0 ? malloc(len * 2) : realloc(buffer, len * 2);
554: if (!buffer) {
555: errorCode = XML_ERROR_NO_MEMORY;
556: eventPtr = eventEndPtr = 0;
557: return 0;
558: }
559: bufferLim = buffer + len * 2;
560: }
561: memcpy(buffer, end, nLeftOver);
562: bufferPtr = buffer;
563: bufferEnd = buffer + nLeftOver;
564: }
565: return 1;
566: }
567: else {
568: memcpy(XML_GetBuffer(parser, len), s, len);
569: return XML_ParseBuffer(parser, len, isFinal);
570: }
571: }
572:
573: int XML_ParseBuffer(XML_Parser parser, int len, int isFinal)
574: {
575: const char *start = bufferPtr;
576: positionPtr = start;
577: bufferEnd += len;
578: parseEndByteIndex += len;
579: errorCode = processor(parser, start, parseEndPtr = bufferEnd,
580: isFinal ? (const char **)0 : &bufferPtr);
581: if (errorCode == XML_ERROR_NONE) {
582: if (!isFinal)
583: XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
584: return 1;
585: }
586: else {
587: eventEndPtr = eventPtr;
588: return 0;
589: }
590: }
591:
592: void *XML_GetBuffer(XML_Parser parser, int len)
593: {
594: if (len > bufferLim - bufferEnd) {
595: /* FIXME avoid integer overflow */
596: int neededSize = len + (bufferEnd - bufferPtr);
597: if (neededSize <= bufferLim - buffer) {
598: memmove(buffer, bufferPtr, bufferEnd - bufferPtr);
599: bufferEnd = buffer + (bufferEnd - bufferPtr);
600: bufferPtr = buffer;
601: }
602: else {
603: char *newBuf;
604: int bufferSize = bufferLim - bufferPtr;
605: if (bufferSize == 0)
606: bufferSize = INIT_BUFFER_SIZE;
607: do {
608: bufferSize *= 2;
609: } while (bufferSize < neededSize);
610: newBuf = malloc(bufferSize);
611: if (newBuf == 0) {
612: errorCode = XML_ERROR_NO_MEMORY;
613: return 0;
614: }
615: bufferLim = newBuf + bufferSize;
616: if (bufferPtr) {
617: memcpy(newBuf, bufferPtr, bufferEnd - bufferPtr);
618: free(buffer);
619: }
620: bufferEnd = newBuf + (bufferEnd - bufferPtr);
621: bufferPtr = buffer = newBuf;
622: }
623: }
624: return bufferEnd;
625: }
626:
627: enum XML_Error XML_GetErrorCode(XML_Parser parser)
628: {
629: return errorCode;
630: }
631:
632: long XML_GetCurrentByteIndex(XML_Parser parser)
633: {
634: if (eventPtr)
635: return parseEndByteIndex - (parseEndPtr - eventPtr);
636: return -1;
637: }
638:
639: int XML_GetCurrentLineNumber(XML_Parser parser)
640: {
641: if (eventPtr) {
642: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
643: positionPtr = eventPtr;
644: }
645: return position.lineNumber + 1;
646: }
647:
648: int XML_GetCurrentColumnNumber(XML_Parser parser)
649: {
650: if (eventPtr) {
651: XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
652: positionPtr = eventPtr;
653: }
654: return position.columnNumber;
655: }
656:
657: void XML_DefaultCurrent(XML_Parser parser)
658: {
659: if (defaultHandler)
660: reportDefault(parser, encoding, eventPtr, eventEndPtr);
661: }
662:
663: const XML_LChar *XML_ErrorString(int code)
664: {
665: static const XML_LChar *message[] = {
666: 0,
667: XML_T("out of memory"),
668: XML_T("syntax error"),
669: XML_T("no element found"),
670: XML_T("not well-formed"),
671: XML_T("unclosed token"),
672: XML_T("unclosed token"),
673: XML_T("mismatched tag"),
674: XML_T("duplicate attribute"),
675: XML_T("junk after document element"),
676: XML_T("illegal parameter entity reference"),
677: XML_T("undefined entity"),
678: XML_T("recursive entity reference"),
679: XML_T("asynchronous entity"),
680: XML_T("reference to invalid character number"),
681: XML_T("reference to binary entity"),
682: XML_T("reference to external entity in attribute"),
683: XML_T("xml processing instruction not at start of external entity"),
684: XML_T("unknown encoding"),
685: XML_T("encoding specified in XML declaration is incorrect"),
686: XML_T("unclosed CDATA section"),
687: XML_T("error in processing external entity reference")
688: };
689: if (code > 0 && code < sizeof(message)/sizeof(message[0]))
690: return message[code];
691: return 0;
692: }
693:
694: static
695: enum XML_Error contentProcessor(XML_Parser parser,
696: const char *start,
697: const char *end,
698: const char **endPtr)
699: {
700: return doContent(parser, 0, encoding, start, end, endPtr);
701: }
702:
703: static
704: enum XML_Error externalEntityInitProcessor(XML_Parser parser,
705: const char *start,
706: const char *end,
707: const char **endPtr)
708: {
709: enum XML_Error result = initializeEncoding(parser);
710: if (result != XML_ERROR_NONE)
711: return result;
712: processor = externalEntityInitProcessor2;
713: return externalEntityInitProcessor2(parser, start, end, endPtr);
714: }
715:
716: static
717: enum XML_Error externalEntityInitProcessor2(XML_Parser parser,
718: const char *start,
719: const char *end,
720: const char **endPtr)
721: {
722: const char *next;
723: int tok = XmlContentTok(encoding, start, end, &next);
724: switch (tok) {
725: case XML_TOK_BOM:
726: start = next;
727: break;
728: case XML_TOK_PARTIAL:
729: if (endPtr) {
730: *endPtr = start;
731: return XML_ERROR_NONE;
732: }
733: eventPtr = start;
734: return XML_ERROR_UNCLOSED_TOKEN;
735: case XML_TOK_PARTIAL_CHAR:
736: if (endPtr) {
737: *endPtr = start;
738: return XML_ERROR_NONE;
739: }
740: eventPtr = start;
741: return XML_ERROR_PARTIAL_CHAR;
742: }
743: processor = externalEntityInitProcessor3;
744: return externalEntityInitProcessor3(parser, start, end, endPtr);
745: }
746:
747: static
748: enum XML_Error externalEntityInitProcessor3(XML_Parser parser,
749: const char *start,
750: const char *end,
751: const char **endPtr)
752: {
753: const char *next;
754: int tok = XmlContentTok(encoding, start, end, &next);
755: switch (tok) {
756: case XML_TOK_XML_DECL:
757: {
758: enum XML_Error result = processXmlDecl(parser, 1, start, next);
759: if (result != XML_ERROR_NONE)
760: return result;
761: start = next;
762: }
763: break;
764: case XML_TOK_PARTIAL:
765: if (endPtr) {
766: *endPtr = start;
767: return XML_ERROR_NONE;
768: }
769: eventPtr = start;
770: return XML_ERROR_UNCLOSED_TOKEN;
771: case XML_TOK_PARTIAL_CHAR:
772: if (endPtr) {
773: *endPtr = start;
774: return XML_ERROR_NONE;
775: }
776: eventPtr = start;
777: return XML_ERROR_PARTIAL_CHAR;
778: }
779: processor = externalEntityContentProcessor;
780: tagLevel = 1;
781: return doContent(parser, 1, encoding, start, end, endPtr);
782: }
783:
784: static
785: enum XML_Error externalEntityContentProcessor(XML_Parser parser,
786: const char *start,
787: const char *end,
788: const char **endPtr)
789: {
790: return doContent(parser, 1, encoding, start, end, endPtr);
791: }
792:
793: static enum XML_Error
794: doContent(XML_Parser parser,
795: int startTagLevel,
796: const ENCODING *enc,
797: const char *s,
798: const char *end,
799: const char **nextPtr)
800: {
801: const ENCODING *internalEnc = XmlGetInternalEncoding();
802: const char *dummy;
803: const char **eventPP;
804: const char **eventEndPP;
805: if (enc == encoding) {
806: eventPP = &eventPtr;
807: *eventPP = s;
808: eventEndPP = &eventEndPtr;
809: }
810: else
811: eventPP = eventEndPP = &dummy;
812: for (;;) {
813: const char *next;
814: int tok = XmlContentTok(enc, s, end, &next);
815: *eventEndPP = next;
816: switch (tok) {
817: case XML_TOK_TRAILING_CR:
818: if (nextPtr) {
819: *nextPtr = s;
820: return XML_ERROR_NONE;
821: }
822: *eventEndPP = end;
823: if (characterDataHandler) {
824: XML_Char c = XML_T('\n');
825: characterDataHandler(handlerArg, &c, 1);
826: }
827: else if (defaultHandler)
828: reportDefault(parser, enc, s, end);
829: if (startTagLevel == 0)
830: return XML_ERROR_NO_ELEMENTS;
831: if (tagLevel != startTagLevel)
832: return XML_ERROR_ASYNC_ENTITY;
833: return XML_ERROR_NONE;
834: case XML_TOK_NONE:
835: if (nextPtr) {
836: *nextPtr = s;
837: return XML_ERROR_NONE;
838: }
839: if (startTagLevel > 0) {
840: if (tagLevel != startTagLevel)
841: return XML_ERROR_ASYNC_ENTITY;
842: return XML_ERROR_NONE;
843: }
844: return XML_ERROR_NO_ELEMENTS;
845: case XML_TOK_INVALID:
846: *eventPP = next;
847: return XML_ERROR_INVALID_TOKEN;
848: case XML_TOK_PARTIAL:
849: if (nextPtr) {
850: *nextPtr = s;
851: return XML_ERROR_NONE;
852: }
853: return XML_ERROR_UNCLOSED_TOKEN;
854: case XML_TOK_PARTIAL_CHAR:
855: if (nextPtr) {
856: *nextPtr = s;
857: return XML_ERROR_NONE;
858: }
859: return XML_ERROR_PARTIAL_CHAR;
860: case XML_TOK_ENTITY_REF:
861: {
862: const XML_Char *name;
863: ENTITY *entity;
864: XML_Char ch = XmlPredefinedEntityName(enc,
865: s + enc->minBytesPerChar,
866: next - enc->minBytesPerChar);
867: if (ch) {
868: if (characterDataHandler)
869: characterDataHandler(handlerArg, &ch, 1);
870: else if (defaultHandler)
871: reportDefault(parser, enc, s, next);
872: break;
873: }
874: name = poolStoreString(&dtd.pool, enc,
875: s + enc->minBytesPerChar,
876: next - enc->minBytesPerChar);
877: if (!name)
878: return XML_ERROR_NO_MEMORY;
879: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
880: poolDiscard(&dtd.pool);
881: if (!entity) {
882: if (dtd.complete || dtd.standalone)
883: return XML_ERROR_UNDEFINED_ENTITY;
884: if (defaultHandler)
885: reportDefault(parser, enc, s, next);
886: break;
887: }
888: if (entity->open)
889: return XML_ERROR_RECURSIVE_ENTITY_REF;
890: if (entity->notation)
891: return XML_ERROR_BINARY_ENTITY_REF;
892: if (entity) {
893: if (entity->textPtr) {
894: enum XML_Error result;
895: if (defaultHandler) {
896: reportDefault(parser, enc, s, next);
897: break;
898: }
899: /* Protect against the possibility that somebody sets
900: the defaultHandler from inside another handler. */
901: *eventEndPP = *eventPP;
902: entity->open = 1;
903: result = doContent(parser,
904: tagLevel,
905: internalEnc,
906: (char *)entity->textPtr,
907: (char *)(entity->textPtr + entity->textLen),
908: 0);
909: entity->open = 0;
910: if (result)
911: return result;
912: }
913: else if (externalEntityRefHandler) {
914: const XML_Char *openEntityNames;
915: entity->open = 1;
916: openEntityNames = getOpenEntityNames(parser);
917: entity->open = 0;
918: if (!openEntityNames)
919: return XML_ERROR_NO_MEMORY;
920: if (!externalEntityRefHandler(parser, openEntityNames, dtd.base, entity->systemId, entity->publicId))
921: return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
922: poolDiscard(&tempPool);
923: }
924: else if (defaultHandler)
925: reportDefault(parser, enc, s, next);
926: }
927: break;
928: }
929: case XML_TOK_START_TAG_WITH_ATTS:
930: if (!startElementHandler) {
931: enum XML_Error result = storeAtts(parser, enc, 0, s);
932: if (result)
933: return result;
934: }
935: /* fall through */
936: case XML_TOK_START_TAG_NO_ATTS:
937: {
938: TAG *tag;
939: if (freeTagList) {
940: tag = freeTagList;
941: freeTagList = freeTagList->parent;
942: }
943: else {
944: tag = malloc(sizeof(TAG));
945: if (!tag)
946: return XML_ERROR_NO_MEMORY;
947: tag->buf = malloc(INIT_TAG_BUF_SIZE);
948: if (!tag->buf)
949: return XML_ERROR_NO_MEMORY;
950: tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE;
951: }
952: tag->parent = tagStack;
953: tagStack = tag;
954: tag->rawName = s + enc->minBytesPerChar;
955: tag->rawNameLength = XmlNameLength(enc, tag->rawName);
956: if (nextPtr) {
957: if (tag->rawNameLength > tag->bufEnd - tag->buf) {
958: int bufSize = tag->rawNameLength * 4;
959: bufSize = ROUND_UP(bufSize, sizeof(XML_Char));
960: tag->buf = realloc(tag->buf, bufSize);
961: if (!tag->buf)
962: return XML_ERROR_NO_MEMORY;
963: tag->bufEnd = tag->buf + bufSize;
964: }
965: memcpy(tag->buf, tag->rawName, tag->rawNameLength);
966: tag->rawName = tag->buf;
967: }
968: ++tagLevel;
969: if (startElementHandler) {
970: enum XML_Error result;
971: XML_Char *toPtr;
972: for (;;) {
973: const char *rawNameEnd = tag->rawName + tag->rawNameLength;
974: const char *fromPtr = tag->rawName;
975: int bufSize;
976: if (nextPtr)
977: toPtr = (XML_Char *)(tag->buf + ROUND_UP(tag->rawNameLength, sizeof(XML_Char)));
978: else
979: toPtr = (XML_Char *)tag->buf;
980: tag->name = toPtr;
981: XmlConvert(enc,
982: &fromPtr, rawNameEnd,
983: (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1);
984: if (fromPtr == rawNameEnd)
985: break;
986: bufSize = (tag->bufEnd - tag->buf) << 1;
987: tag->buf = realloc(tag->buf, bufSize);
988: if (!tag->buf)
989: return XML_ERROR_NO_MEMORY;
990: tag->bufEnd = tag->buf + bufSize;
991: if (nextPtr)
992: tag->rawName = tag->buf;
993: }
994: *toPtr = XML_T('\0');
995: result = storeAtts(parser, enc, tag->name, s);
996: if (result)
997: return result;
998: startElementHandler(handlerArg, tag->name, (const XML_Char **)atts);
999: poolClear(&tempPool);
1000: }
1001: else {
1002: tag->name = 0;
1003: if (defaultHandler)
1004: reportDefault(parser, enc, s, next);
1005: }
1006: break;
1007: }
1008: case XML_TOK_EMPTY_ELEMENT_WITH_ATTS:
1009: if (!startElementHandler) {
1010: enum XML_Error result = storeAtts(parser, enc, 0, s);
1011: if (result)
1012: return result;
1013: }
1014: /* fall through */
1015: case XML_TOK_EMPTY_ELEMENT_NO_ATTS:
1016: if (startElementHandler || endElementHandler) {
1017: const char *rawName = s + enc->minBytesPerChar;
1018: const XML_Char *name = poolStoreString(&tempPool, enc, rawName,
1019: rawName
1020: + XmlNameLength(enc, rawName));
1021: if (!name)
1022: return XML_ERROR_NO_MEMORY;
1023: poolFinish(&tempPool);
1024: if (startElementHandler) {
1025: enum XML_Error result = storeAtts(parser, enc, name, s);
1026: if (result)
1027: return result;
1028: startElementHandler(handlerArg, name, (const XML_Char **)atts);
1029: }
1030: if (endElementHandler) {
1031: if (startElementHandler)
1032: *eventPP = *eventEndPP;
1033: endElementHandler(handlerArg, name);
1034: }
1035: poolClear(&tempPool);
1036: }
1037: else if (defaultHandler)
1038: reportDefault(parser, enc, s, next);
1039: if (tagLevel == 0)
1040: return epilogProcessor(parser, next, end, nextPtr);
1041: break;
1042: case XML_TOK_END_TAG:
1043: if (tagLevel == startTagLevel)
1044: return XML_ERROR_ASYNC_ENTITY;
1045: else {
1046: int len;
1047: const char *rawName;
1048: TAG *tag = tagStack;
1049: tagStack = tag->parent;
1050: tag->parent = freeTagList;
1051: freeTagList = tag;
1052: rawName = s + enc->minBytesPerChar*2;
1053: len = XmlNameLength(enc, rawName);
1054: if (len != tag->rawNameLength
1055: || memcmp(tag->rawName, rawName, len) != 0) {
1056: *eventPP = rawName;
1057: return XML_ERROR_TAG_MISMATCH;
1058: }
1059: --tagLevel;
1060: if (endElementHandler) {
1061: if (tag->name)
1062: endElementHandler(handlerArg, tag->name);
1063: else {
1064: const XML_Char *name = poolStoreString(&tempPool, enc, rawName,
1065: rawName + len);
1066: if (!name)
1067: return XML_ERROR_NO_MEMORY;
1068: endElementHandler(handlerArg, name);
1069: poolClear(&tempPool);
1070: }
1071: }
1072: else if (defaultHandler)
1073: reportDefault(parser, enc, s, next);
1074: if (tagLevel == 0)
1075: return epilogProcessor(parser, next, end, nextPtr);
1076: }
1077: break;
1078: case XML_TOK_CHAR_REF:
1079: {
1080: int n = XmlCharRefNumber(enc, s);
1081: if (n < 0)
1082: return XML_ERROR_BAD_CHAR_REF;
1083: if (characterDataHandler) {
1084: XML_Char buf[XML_ENCODE_MAX];
1085: characterDataHandler(handlerArg, buf, XmlEncode(n, (ICHAR *)buf));
1086: }
1087: else if (defaultHandler)
1088: reportDefault(parser, enc, s, next);
1089: }
1090: break;
1091: case XML_TOK_XML_DECL:
1092: return XML_ERROR_MISPLACED_XML_PI;
1093: case XML_TOK_DATA_NEWLINE:
1094: if (characterDataHandler) {
1095: XML_Char c = XML_T('\n');
1096: characterDataHandler(handlerArg, &c, 1);
1097: }
1098: else if (defaultHandler)
1099: reportDefault(parser, enc, s, next);
1100: break;
1101: case XML_TOK_CDATA_SECT_OPEN:
1102: {
1103: enum XML_Error result;
1104: if (characterDataHandler)
1105: characterDataHandler(handlerArg, dataBuf, 0);
1106: else if (defaultHandler)
1107: reportDefault(parser, enc, s, next);
1108: result = doCdataSection(parser, enc, &next, end, nextPtr);
1109: if (!next) {
1110: processor = cdataSectionProcessor;
1111: return result;
1112: }
1113: }
1114: break;
1115: case XML_TOK_TRAILING_RSQB:
1116: if (nextPtr) {
1117: *nextPtr = s;
1118: return XML_ERROR_NONE;
1119: }
1120: if (characterDataHandler) {
1121: if (MUST_CONVERT(enc, s)) {
1122: ICHAR *dataPtr = (ICHAR *)dataBuf;
1123: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
1124: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1125: }
1126: else
1127: characterDataHandler(handlerArg,
1128: (XML_Char *)s,
1129: (XML_Char *)end - (XML_Char *)s);
1130: }
1131: else if (defaultHandler)
1132: reportDefault(parser, enc, s, end);
1133: if (startTagLevel == 0) {
1134: *eventPP = end;
1135: return XML_ERROR_NO_ELEMENTS;
1136: }
1137: if (tagLevel != startTagLevel) {
1138: *eventPP = end;
1139: return XML_ERROR_ASYNC_ENTITY;
1140: }
1141: return XML_ERROR_NONE;
1142: case XML_TOK_DATA_CHARS:
1143: if (characterDataHandler) {
1144: if (MUST_CONVERT(enc, s)) {
1145: for (;;) {
1146: ICHAR *dataPtr = (ICHAR *)dataBuf;
1147: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1148: *eventEndPP = s;
1149: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1150: if (s == next)
1151: break;
1152: *eventPP = s;
1153: }
1154: }
1155: else
1156: characterDataHandler(handlerArg,
1157: (XML_Char *)s,
1158: (XML_Char *)next - (XML_Char *)s);
1159: }
1160: else if (defaultHandler)
1161: reportDefault(parser, enc, s, next);
1162: break;
1163: case XML_TOK_PI:
1164: if (!reportProcessingInstruction(parser, enc, s, next))
1165: return XML_ERROR_NO_MEMORY;
1166: break;
1167: default:
1168: if (defaultHandler)
1169: reportDefault(parser, enc, s, next);
1170: break;
1171: }
1172: *eventPP = s = next;
1173: }
1174: /* not reached */
1175: }
1176:
1177: /* If tagName is non-null, build a real list of attributes,
1178: otherwise just check the attributes for well-formedness. */
1179:
1180: static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *enc,
1181: const XML_Char *tagName, const char *s)
1182: {
1183: ELEMENT_TYPE *elementType = 0;
1184: int nDefaultAtts = 0;
1185: const XML_Char **appAtts;
1186: int i;
1187: int n;
1188:
1189: if (tagName) {
1190: elementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, tagName, 0);
1191: if (elementType)
1192: nDefaultAtts = elementType->nDefaultAtts;
1193: }
1194:
1195: n = XmlGetAttributes(enc, s, attsSize, atts);
1196: if (n + nDefaultAtts > attsSize) {
1197: int oldAttsSize = attsSize;
1198: attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
1199: atts = realloc((void *)atts, attsSize * sizeof(ATTRIBUTE));
1200: if (!atts)
1201: return XML_ERROR_NO_MEMORY;
1202: if (n > oldAttsSize)
1203: XmlGetAttributes(enc, s, n, atts);
1204: }
1205: appAtts = (const XML_Char **)atts;
1206: for (i = 0; i < n; i++) {
1207: ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
1208: atts[i].name
1209: + XmlNameLength(enc, atts[i].name));
1210: if (!attId)
1211: return XML_ERROR_NO_MEMORY;
1212: if ((attId->name)[-1]) {
1213: if (enc == encoding)
1214: eventPtr = atts[i].name;
1215: return XML_ERROR_DUPLICATE_ATTRIBUTE;
1216: }
1217: (attId->name)[-1] = 1;
1218: appAtts[i << 1] = attId->name;
1219: if (!atts[i].normalized) {
1220: enum XML_Error result;
1221: int isCdata = 1;
1222:
1223: if (attId->maybeTokenized) {
1224: int j;
1225: for (j = 0; j < nDefaultAtts; j++) {
1226: if (attId == elementType->defaultAtts[j].id) {
1227: isCdata = elementType->defaultAtts[j].isCdata;
1228: break;
1229: }
1230: }
1231: }
1232:
1233: result = storeAttributeValue(parser, enc, isCdata,
1234: atts[i].valuePtr, atts[i].valueEnd,
1235: &tempPool);
1236: if (result)
1237: return result;
1238: if (tagName) {
1239: appAtts[(i << 1) + 1] = poolStart(&tempPool);
1240: poolFinish(&tempPool);
1241: }
1242: else
1243: poolDiscard(&tempPool);
1244: }
1245: else if (tagName) {
1246: appAtts[(i << 1) + 1] = poolStoreString(&tempPool, enc, atts[i].valuePtr, atts[i].valueEnd);
1247: if (appAtts[(i << 1) + 1] == 0)
1248: return XML_ERROR_NO_MEMORY;
1249: poolFinish(&tempPool);
1250: }
1251: }
1252: if (tagName) {
1253: int j;
1254: for (j = 0; j < nDefaultAtts; j++) {
1255: const DEFAULT_ATTRIBUTE *da = elementType->defaultAtts + j;
1256: if (!(da->id->name)[-1] && da->value) {
1257: (da->id->name)[-1] = 1;
1258: appAtts[i << 1] = da->id->name;
1259: appAtts[(i << 1) + 1] = da->value;
1260: i++;
1261: }
1262: }
1263: appAtts[i << 1] = 0;
1264: }
1265: while (i-- > 0)
1266: ((XML_Char *)appAtts[i << 1])[-1] = 0;
1267: return XML_ERROR_NONE;
1268: }
1269:
1270: /* The idea here is to avoid using stack for each CDATA section when
1271: the whole file is parsed with one call. */
1272:
1273: static
1274: enum XML_Error cdataSectionProcessor(XML_Parser parser,
1275: const char *start,
1276: const char *end,
1277: const char **endPtr)
1278: {
1279: enum XML_Error result = doCdataSection(parser, encoding, &start, end, endPtr);
1280: if (start) {
1281: processor = contentProcessor;
1282: return contentProcessor(parser, start, end, endPtr);
1283: }
1284: return result;
1285: }
1286:
1287: /* startPtr gets set to non-null is the section is closed, and to null if
1288: the section is not yet closed. */
1289:
1290: static
1291: enum XML_Error doCdataSection(XML_Parser parser,
1292: const ENCODING *enc,
1293: const char **startPtr,
1294: const char *end,
1295: const char **nextPtr)
1296: {
1297: const char *s = *startPtr;
1298: const char *dummy;
1299: const char **eventPP;
1300: const char **eventEndPP;
1301: if (enc == encoding) {
1302: eventPP = &eventPtr;
1303: *eventPP = s;
1304: eventEndPP = &eventEndPtr;
1305: }
1306: else
1307: eventPP = eventEndPP = &dummy;
1308: *startPtr = 0;
1309: for (;;) {
1310: const char *next;
1311: int tok = XmlCdataSectionTok(enc, s, end, &next);
1312: *eventEndPP = next;
1313: switch (tok) {
1314: case XML_TOK_CDATA_SECT_CLOSE:
1315: if (characterDataHandler)
1316: characterDataHandler(handlerArg, dataBuf, 0);
1317: else if (defaultHandler)
1318: reportDefault(parser, enc, s, next);
1319: *startPtr = next;
1320: return XML_ERROR_NONE;
1321: case XML_TOK_DATA_NEWLINE:
1322: if (characterDataHandler) {
1323: XML_Char c = XML_T('\n');
1324: characterDataHandler(handlerArg, &c, 1);
1325: }
1326: else if (defaultHandler)
1327: reportDefault(parser, enc, s, next);
1328: break;
1329: case XML_TOK_DATA_CHARS:
1330: if (characterDataHandler) {
1331: if (MUST_CONVERT(enc, s)) {
1332: for (;;) {
1333: ICHAR *dataPtr = (ICHAR *)dataBuf;
1334: XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
1335: *eventEndPP = next;
1336: characterDataHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
1337: if (s == next)
1338: break;
1339: *eventPP = s;
1340: }
1341: }
1342: else
1343: characterDataHandler(handlerArg,
1344: (XML_Char *)s,
1345: (XML_Char *)next - (XML_Char *)s);
1346: }
1347: else if (defaultHandler)
1348: reportDefault(parser, enc, s, next);
1349: break;
1350: case XML_TOK_INVALID:
1351: *eventPP = next;
1352: return XML_ERROR_INVALID_TOKEN;
1353: case XML_TOK_PARTIAL_CHAR:
1354: if (nextPtr) {
1355: *nextPtr = s;
1356: return XML_ERROR_NONE;
1357: }
1358: return XML_ERROR_PARTIAL_CHAR;
1359: case XML_TOK_PARTIAL:
1360: case XML_TOK_NONE:
1361: if (nextPtr) {
1362: *nextPtr = s;
1363: return XML_ERROR_NONE;
1364: }
1365: return XML_ERROR_UNCLOSED_CDATA_SECTION;
1366: default:
1367: abort();
1368: }
1369: *eventPP = s = next;
1370: }
1371: /* not reached */
1372: }
1373:
1374: static enum XML_Error
1375: initializeEncoding(XML_Parser parser)
1376: {
1377: const char *s;
1378: #ifdef XML_UNICODE
1379: char encodingBuf[128];
1380: if (!protocolEncodingName)
1381: s = 0;
1382: else {
1383: int i;
1384: for (i = 0; protocolEncodingName[i]; i++) {
1385: if (i == sizeof(encodingBuf) - 1
1386: || protocolEncodingName[i] >= 0x80
1387: || protocolEncodingName[i] < 0) {
1388: encodingBuf[0] = '\0';
1389: break;
1390: }
1391: encodingBuf[i] = (char)protocolEncodingName[i];
1392: }
1393: encodingBuf[i] = '\0';
1394: s = encodingBuf;
1395: }
1396: #else
1397: s = protocolEncodingName;
1398: #endif
1399: if (XmlInitEncoding(&initEncoding, &encoding, s))
1400: return XML_ERROR_NONE;
1401: return handleUnknownEncoding(parser, protocolEncodingName);
1402: }
1403:
1404: static enum XML_Error
1405: processXmlDecl(XML_Parser parser, int isGeneralTextEntity,
1406: const char *s, const char *next)
1407: {
1408: const char *encodingName = 0;
1409: const ENCODING *newEncoding = 0;
1410: const char *version;
1411: int standalone = -1;
1412: if (!XmlParseXmlDecl(isGeneralTextEntity,
1413: encoding,
1414: s,
1415: next,
1416: &eventPtr,
1417: &version,
1418: &encodingName,
1419: &newEncoding,
1420: &standalone))
1421: return XML_ERROR_SYNTAX;
1422: if (!isGeneralTextEntity && standalone == 1)
1423: dtd.standalone = 1;
1424: if (defaultHandler)
1425: reportDefault(parser, encoding, s, next);
1426: if (!protocolEncodingName) {
1427: if (newEncoding) {
1428: if (newEncoding->minBytesPerChar != encoding->minBytesPerChar) {
1429: eventPtr = encodingName;
1430: return XML_ERROR_INCORRECT_ENCODING;
1431: }
1432: encoding = newEncoding;
1433: }
1434: else if (encodingName) {
1435: enum XML_Error result;
1436: const XML_Char *s = poolStoreString(&tempPool,
1437: encoding,
1438: encodingName,
1439: encodingName
1440: + XmlNameLength(encoding, encodingName));
1441: if (!s)
1442: return XML_ERROR_NO_MEMORY;
1443: result = handleUnknownEncoding(parser, s);
1444: poolDiscard(&tempPool);
1445: if (result == XML_ERROR_UNKNOWN_ENCODING)
1446: eventPtr = encodingName;
1447: return result;
1448: }
1449: }
1450: return XML_ERROR_NONE;
1451: }
1452:
1453: static enum XML_Error
1454: handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName)
1455: {
1456: if (unknownEncodingHandler) {
1457: XML_Encoding info;
1458: int i;
1459: for (i = 0; i < 256; i++)
1460: info.map[i] = -1;
1461: info.convert = 0;
1462: info.data = 0;
1463: info.release = 0;
1464: if (unknownEncodingHandler(unknownEncodingHandlerData, encodingName, &info)) {
1465: ENCODING *enc;
1466: unknownEncodingMem = malloc(XmlSizeOfUnknownEncoding());
1467: if (!unknownEncodingMem) {
1468: if (info.release)
1469: info.release(info.data);
1470: return XML_ERROR_NO_MEMORY;
1471: }
1472: enc = XmlInitUnknownEncoding(unknownEncodingMem,
1473: info.map,
1474: info.convert,
1475: info.data);
1476: if (enc) {
1477: unknownEncodingData = info.data;
1478: unknownEncodingRelease = info.release;
1479: encoding = enc;
1480: return XML_ERROR_NONE;
1481: }
1482: }
1483: if (info.release)
1484: info.release(info.data);
1485: }
1486: return XML_ERROR_UNKNOWN_ENCODING;
1487: }
1488:
1489: static enum XML_Error
1490: prologInitProcessor(XML_Parser parser,
1491: const char *s,
1492: const char *end,
1493: const char **nextPtr)
1494: {
1495: enum XML_Error result = initializeEncoding(parser);
1496: if (result != XML_ERROR_NONE)
1497: return result;
1498: processor = prologProcessor;
1499: return prologProcessor(parser, s, end, nextPtr);
1500: }
1501:
1502: static enum XML_Error
1503: prologProcessor(XML_Parser parser,
1504: const char *s,
1505: const char *end,
1506: const char **nextPtr)
1507: {
1508: for (;;) {
1509: const char *next;
1510: int tok = XmlPrologTok(encoding, s, end, &next);
1511: if (tok <= 0) {
1512: if (nextPtr != 0 && tok != XML_TOK_INVALID) {
1513: *nextPtr = s;
1514: return XML_ERROR_NONE;
1515: }
1516: switch (tok) {
1517: case XML_TOK_INVALID:
1518: eventPtr = next;
1519: return XML_ERROR_INVALID_TOKEN;
1520: case XML_TOK_NONE:
1521: return XML_ERROR_NO_ELEMENTS;
1522: case XML_TOK_PARTIAL:
1523: return XML_ERROR_UNCLOSED_TOKEN;
1524: case XML_TOK_PARTIAL_CHAR:
1525: return XML_ERROR_PARTIAL_CHAR;
1526: case XML_TOK_TRAILING_CR:
1527: eventPtr = s + encoding->minBytesPerChar;
1528: return XML_ERROR_NO_ELEMENTS;
1529: default:
1530: abort();
1531: }
1532: }
1533: switch (XmlTokenRole(&prologState, tok, s, next, encoding)) {
1534: case XML_ROLE_XML_DECL:
1535: {
1536: enum XML_Error result = processXmlDecl(parser, 0, s, next);
1537: if (result != XML_ERROR_NONE)
1538: return result;
1539: }
1540: break;
1541: case XML_ROLE_DOCTYPE_SYSTEM_ID:
1542: hadExternalDoctype = 1;
1543: break;
1544: case XML_ROLE_DOCTYPE_PUBLIC_ID:
1545: case XML_ROLE_ENTITY_PUBLIC_ID:
1546: if (!XmlIsPublicId(encoding, s, next, &eventPtr))
1547: return XML_ERROR_SYNTAX;
1548: if (declEntity) {
1549: XML_Char *tem = poolStoreString(&dtd.pool,
1550: encoding,
1551: s + encoding->minBytesPerChar,
1552: next - encoding->minBytesPerChar);
1553: if (!tem)
1554: return XML_ERROR_NO_MEMORY;
1555: normalizePublicId(tem);
1556: declEntity->publicId = tem;
1557: poolFinish(&dtd.pool);
1558: }
1559: break;
1560: case XML_ROLE_INSTANCE_START:
1561: processor = contentProcessor;
1562: if (hadExternalDoctype)
1563: dtd.complete = 0;
1564: return contentProcessor(parser, s, end, nextPtr);
1565: case XML_ROLE_ATTLIST_ELEMENT_NAME:
1566: {
1567: const XML_Char *name = poolStoreString(&dtd.pool, encoding, s, next);
1568: if (!name)
1569: return XML_ERROR_NO_MEMORY;
1570: declElementType = (ELEMENT_TYPE *)lookup(&dtd.elementTypes, name, sizeof(ELEMENT_TYPE));
1571: if (!declElementType)
1572: return XML_ERROR_NO_MEMORY;
1573: if (declElementType->name != name)
1574: poolDiscard(&dtd.pool);
1575: else
1576: poolFinish(&dtd.pool);
1577: break;
1578: }
1579: case XML_ROLE_ATTRIBUTE_NAME:
1580: declAttributeId = getAttributeId(parser, encoding, s, next);
1581: if (!declAttributeId)
1582: return XML_ERROR_NO_MEMORY;
1583: declAttributeIsCdata = 0;
1584: break;
1585: case XML_ROLE_ATTRIBUTE_TYPE_CDATA:
1586: declAttributeIsCdata = 1;
1587: break;
1588: case XML_ROLE_IMPLIED_ATTRIBUTE_VALUE:
1589: case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE:
1590: if (dtd.complete
1591: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, 0))
1592: return XML_ERROR_NO_MEMORY;
1593: break;
1594: case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE:
1595: case XML_ROLE_FIXED_ATTRIBUTE_VALUE:
1596: {
1597: const XML_Char *attVal;
1598: enum XML_Error result
1599: = storeAttributeValue(parser, encoding, declAttributeIsCdata,
1600: s + encoding->minBytesPerChar,
1601: next - encoding->minBytesPerChar,
1602: &dtd.pool);
1603: if (result)
1604: return result;
1605: attVal = poolStart(&dtd.pool);
1606: poolFinish(&dtd.pool);
1607: if (dtd.complete
1608: && !defineAttribute(declElementType, declAttributeId, declAttributeIsCdata, attVal))
1609: return XML_ERROR_NO_MEMORY;
1610: break;
1611: }
1612: case XML_ROLE_ENTITY_VALUE:
1613: {
1614: enum XML_Error result = storeEntityValue(parser, s, next);
1615: if (result != XML_ERROR_NONE)
1616: return result;
1617: }
1618: break;
1619: case XML_ROLE_ENTITY_SYSTEM_ID:
1620: if (declEntity) {
1621: declEntity->systemId = poolStoreString(&dtd.pool, encoding,
1622: s + encoding->minBytesPerChar,
1623: next - encoding->minBytesPerChar);
1624: if (!declEntity->systemId)
1625: return XML_ERROR_NO_MEMORY;
1626: declEntity->base = dtd.base;
1627: poolFinish(&dtd.pool);
1628: }
1629: break;
1630: case XML_ROLE_ENTITY_NOTATION_NAME:
1631: if (declEntity) {
1632: declEntity->notation = poolStoreString(&dtd.pool, encoding, s, next);
1633: if (!declEntity->notation)
1634: return XML_ERROR_NO_MEMORY;
1635: poolFinish(&dtd.pool);
1636: if (unparsedEntityDeclHandler) {
1637: eventPtr = eventEndPtr = s;
1638: unparsedEntityDeclHandler(handlerArg,
1639: declEntity->name,
1640: declEntity->base,
1641: declEntity->systemId,
1642: declEntity->publicId,
1643: declEntity->notation);
1644: }
1645:
1646: }
1647: break;
1648: case XML_ROLE_GENERAL_ENTITY_NAME:
1649: {
1650: const XML_Char *name;
1651: if (XmlPredefinedEntityName(encoding, s, next)) {
1652: declEntity = 0;
1653: break;
1654: }
1655: name = poolStoreString(&dtd.pool, encoding, s, next);
1656: if (!name)
1657: return XML_ERROR_NO_MEMORY;
1658: if (dtd.complete) {
1659: declEntity = (ENTITY *)lookup(&dtd.generalEntities, name, sizeof(ENTITY));
1660: if (!declEntity)
1661: return XML_ERROR_NO_MEMORY;
1662: if (declEntity->name != name) {
1663: poolDiscard(&dtd.pool);
1664: declEntity = 0;
1665: }
1666: else
1667: poolFinish(&dtd.pool);
1668: }
1669: else {
1670: poolDiscard(&dtd.pool);
1671: declEntity = 0;
1672: }
1673: }
1674: break;
1675: case XML_ROLE_PARAM_ENTITY_NAME:
1676: declEntity = 0;
1677: break;
1678: case XML_ROLE_NOTATION_NAME:
1679: declNotationPublicId = 0;
1680: declNotationName = 0;
1681: if (notationDeclHandler) {
1682: declNotationName = poolStoreString(&tempPool, encoding, s, next);
1683: if (!declNotationName)
1684: return XML_ERROR_NO_MEMORY;
1685: poolFinish(&tempPool);
1686: }
1687: break;
1688: case XML_ROLE_NOTATION_PUBLIC_ID:
1689: if (!XmlIsPublicId(encoding, s, next, &eventPtr))
1690: return XML_ERROR_SYNTAX;
1691: if (declNotationName) {
1692: XML_Char *tem = poolStoreString(&tempPool,
1693: encoding,
1694: s + encoding->minBytesPerChar,
1695: next - encoding->minBytesPerChar);
1696: if (!tem)
1697: return XML_ERROR_NO_MEMORY;
1698: normalizePublicId(tem);
1699: declNotationPublicId = tem;
1700: poolFinish(&tempPool);
1701: }
1702: break;
1703: case XML_ROLE_NOTATION_SYSTEM_ID:
1704: if (declNotationName && notationDeclHandler) {
1705: const XML_Char *systemId
1706: = poolStoreString(&tempPool, encoding,
1707: s + encoding->minBytesPerChar,
1708: next - encoding->minBytesPerChar);
1709: if (!systemId)
1710: return XML_ERROR_NO_MEMORY;
1711: eventPtr = eventEndPtr = s;
1712: notationDeclHandler(handlerArg,
1713: declNotationName,
1714: dtd.base,
1715: systemId,
1716: declNotationPublicId);
1717: }
1718: poolClear(&tempPool);
1719: break;
1720: case XML_ROLE_NOTATION_NO_SYSTEM_ID:
1721: if (declNotationPublicId && notationDeclHandler) {
1722: eventPtr = eventEndPtr = s;
1723: notationDeclHandler(handlerArg,
1724: declNotationName,
1725: dtd.base,
1726: 0,
1727: declNotationPublicId);
1728: }
1729: poolClear(&tempPool);
1730: break;
1731: case XML_ROLE_ERROR:
1732: eventPtr = s;
1733: switch (tok) {
1734: case XML_TOK_PARAM_ENTITY_REF:
1735: return XML_ERROR_PARAM_ENTITY_REF;
1736: case XML_TOK_XML_DECL:
1737: return XML_ERROR_MISPLACED_XML_PI;
1738: default:
1739: return XML_ERROR_SYNTAX;
1740: }
1741: case XML_ROLE_GROUP_OPEN:
1742: if (prologState.level >= groupSize) {
1743: if (groupSize)
1744: groupConnector = realloc(groupConnector, groupSize *= 2);
1745: else
1746: groupConnector = malloc(groupSize = 32);
1747: if (!groupConnector)
1748: return XML_ERROR_NO_MEMORY;
1749: }
1750: groupConnector[prologState.level] = 0;
1751: break;
1752: case XML_ROLE_GROUP_SEQUENCE:
1753: if (groupConnector[prologState.level] == '|') {
1754: eventPtr = s;
1755: return XML_ERROR_SYNTAX;
1756: }
1757: groupConnector[prologState.level] = ',';
1758: break;
1759: case XML_ROLE_GROUP_CHOICE:
1760: if (groupConnector[prologState.level] == ',') {
1761: eventPtr = s;
1762: return XML_ERROR_SYNTAX;
1763: }
1764: groupConnector[prologState.level] = '|';
1765: break;
1766: case XML_ROLE_PARAM_ENTITY_REF:
1767: dtd.complete = 0;
1768: break;
1769: case XML_ROLE_NONE:
1770: switch (tok) {
1771: case XML_TOK_PI:
1772: eventPtr = s;
1773: eventEndPtr = next;
1774: if (!reportProcessingInstruction(parser, encoding, s, next))
1775: return XML_ERROR_NO_MEMORY;
1776: break;
1777: }
1778: break;
1779: }
1780: if (defaultHandler) {
1781: switch (tok) {
1782: case XML_TOK_PI:
1783: case XML_TOK_BOM:
1784: case XML_TOK_XML_DECL:
1785: break;
1786: default:
1787: eventPtr = s;
1788: eventEndPtr = next;
1789: reportDefault(parser, encoding, s, next);
1790: }
1791: }
1792: s = next;
1793: }
1794: /* not reached */
1795: }
1796:
1797: static
1798: enum XML_Error epilogProcessor(XML_Parser parser,
1799: const char *s,
1800: const char *end,
1801: const char **nextPtr)
1802: {
1803: processor = epilogProcessor;
1804: eventPtr = s;
1805: for (;;) {
1806: const char *next;
1807: int tok = XmlPrologTok(encoding, s, end, &next);
1808: eventEndPtr = next;
1809: switch (tok) {
1810: case XML_TOK_TRAILING_CR:
1811: if (defaultHandler) {
1812: eventEndPtr = end;
1813: reportDefault(parser, encoding, s, end);
1814: }
1815: /* fall through */
1816: case XML_TOK_NONE:
1817: if (nextPtr)
1818: *nextPtr = end;
1819: return XML_ERROR_NONE;
1820: case XML_TOK_PROLOG_S:
1821: case XML_TOK_COMMENT:
1822: if (defaultHandler)
1823: reportDefault(parser, encoding, s, next);
1824: break;
1825: case XML_TOK_PI:
1826: if (!reportProcessingInstruction(parser, encoding, s, next))
1827: return XML_ERROR_NO_MEMORY;
1828: break;
1829: case XML_TOK_INVALID:
1830: eventPtr = next;
1831: return XML_ERROR_INVALID_TOKEN;
1832: case XML_TOK_PARTIAL:
1833: if (nextPtr) {
1834: *nextPtr = s;
1835: return XML_ERROR_NONE;
1836: }
1837: return XML_ERROR_UNCLOSED_TOKEN;
1838: case XML_TOK_PARTIAL_CHAR:
1839: if (nextPtr) {
1840: *nextPtr = s;
1841: return XML_ERROR_NONE;
1842: }
1843: return XML_ERROR_PARTIAL_CHAR;
1844: default:
1845: return XML_ERROR_JUNK_AFTER_DOC_ELEMENT;
1846: }
1847: eventPtr = s = next;
1848: }
1849: }
1850:
1851: static
1852: enum XML_Error errorProcessor(XML_Parser parser,
1853: const char *s,
1854: const char *end,
1855: const char **nextPtr)
1856: {
1857: return errorCode;
1858: }
1859:
1860: static enum XML_Error
1861: storeAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
1862: const char *ptr, const char *end,
1863: STRING_POOL *pool)
1864: {
1865: enum XML_Error result = appendAttributeValue(parser, enc, isCdata, ptr, end, pool);
1866: if (result)
1867: return result;
1868: if (!isCdata && poolLength(pool) && poolLastChar(pool) == XML_T(' '))
1869: poolChop(pool);
1870: if (!poolAppendChar(pool, XML_T('\0')))
1871: return XML_ERROR_NO_MEMORY;
1872: return XML_ERROR_NONE;
1873: }
1874:
1875: static enum XML_Error
1876: appendAttributeValue(XML_Parser parser, const ENCODING *enc, int isCdata,
1877: const char *ptr, const char *end,
1878: STRING_POOL *pool)
1879: {
1880: const ENCODING *internalEnc = XmlGetInternalEncoding();
1881: for (;;) {
1882: const char *next;
1883: int tok = XmlAttributeValueTok(enc, ptr, end, &next);
1884: switch (tok) {
1885: case XML_TOK_NONE:
1886: return XML_ERROR_NONE;
1887: case XML_TOK_INVALID:
1888: if (enc == encoding)
1889: eventPtr = next;
1890: return XML_ERROR_INVALID_TOKEN;
1891: case XML_TOK_PARTIAL:
1892: if (enc == encoding)
1893: eventPtr = ptr;
1894: return XML_ERROR_INVALID_TOKEN;
1895: case XML_TOK_CHAR_REF:
1896: {
1897: XML_Char buf[XML_ENCODE_MAX];
1898: int i;
1899: int n = XmlCharRefNumber(enc, ptr);
1900: if (n < 0) {
1901: if (enc == encoding)
1902: eventPtr = ptr;
1903: return XML_ERROR_BAD_CHAR_REF;
1904: }
1905: if (!isCdata
1906: && n == 0x20 /* space */
1907: && (poolLength(pool) == 0 || poolLastChar(pool) == XML_T(' ')))
1908: break;
1909: n = XmlEncode(n, (ICHAR *)buf);
1910: if (!n) {
1911: if (enc == encoding)
1912: eventPtr = ptr;
1913: return XML_ERROR_BAD_CHAR_REF;
1914: }
1915: for (i = 0; i < n; i++) {
1916: if (!poolAppendChar(pool, buf[i]))
1917: return XML_ERROR_NO_MEMORY;
1918: }
1919: }
1920: break;
1921: case XML_TOK_DATA_CHARS:
1922: if (!poolAppend(pool, enc, ptr, next))
1923: return XML_ERROR_NO_MEMORY;
1924: break;
1925: break;
1926: case XML_TOK_TRAILING_CR:
1927: next = ptr + enc->minBytesPerChar;
1928: /* fall through */
1929: case XML_TOK_ATTRIBUTE_VALUE_S:
1930: case XML_TOK_DATA_NEWLINE:
1931: if (!isCdata && (poolLength(pool) == 0 || poolLastChar(pool) == XML_T(' ')))
1932: break;
1933: if (!poolAppendChar(pool, XML_T(' ')))
1934: return XML_ERROR_NO_MEMORY;
1935: break;
1936: case XML_TOK_ENTITY_REF:
1937: {
1938: const XML_Char *name;
1939: ENTITY *entity;
1940: XML_Char ch = XmlPredefinedEntityName(enc,
1941: ptr + enc->minBytesPerChar,
1942: next - enc->minBytesPerChar);
1943: if (ch) {
1944: if (!poolAppendChar(pool, ch))
1945: return XML_ERROR_NO_MEMORY;
1946: break;
1947: }
1948: name = poolStoreString(&temp2Pool, enc,
1949: ptr + enc->minBytesPerChar,
1950: next - enc->minBytesPerChar);
1951: if (!name)
1952: return XML_ERROR_NO_MEMORY;
1953: entity = (ENTITY *)lookup(&dtd.generalEntities, name, 0);
1954: poolDiscard(&temp2Pool);
1955: if (!entity) {
1956: if (dtd.complete) {
1957: if (enc == encoding)
1958: eventPtr = ptr;
1959: return XML_ERROR_UNDEFINED_ENTITY;
1960: }
1961: }
1962: else if (entity->open) {
1963: if (enc == encoding)
1964: eventPtr = ptr;
1965: return XML_ERROR_RECURSIVE_ENTITY_REF;
1966: }
1967: else if (entity->notation) {
1968: if (enc == encoding)
1969: eventPtr = ptr;
1970: return XML_ERROR_BINARY_ENTITY_REF;
1971: }
1972: else if (!entity->textPtr) {
1973: if (enc == encoding)
1974: eventPtr = ptr;
1975: return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
1976: }
1977: else {
1978: enum XML_Error result;
1979: const XML_Char *textEnd = entity->textPtr + entity->textLen;
1980: entity->open = 1;
1981: result = appendAttributeValue(parser, internalEnc, isCdata, (char *)entity->textPtr, (char *)textEnd, pool);
1982: entity->open = 0;
1983: if (result)
1984: return result;
1985: }
1986: }
1987: break;
1988: default:
1989: abort();
1990: }
1991: ptr = next;
1992: }
1993: /* not reached */
1994: }
1995:
1996: static
1997: enum XML_Error storeEntityValue(XML_Parser parser,
1998: const char *entityTextPtr,
1999: const char *entityTextEnd)
2000: {
2001: const ENCODING *internalEnc = XmlGetInternalEncoding();
2002: STRING_POOL *pool = &(dtd.pool);
2003: entityTextPtr += encoding->minBytesPerChar;
2004: entityTextEnd -= encoding->minBytesPerChar;
2005: for (;;) {
2006: const char *next;
2007: int tok = XmlEntityValueTok(encoding, entityTextPtr, entityTextEnd, &next);
2008: switch (tok) {
2009: case XML_TOK_PARAM_ENTITY_REF:
2010: eventPtr = entityTextPtr;
2011: return XML_ERROR_SYNTAX;
2012: case XML_TOK_NONE:
2013: if (declEntity) {
2014: declEntity->textPtr = pool->start;
2015: declEntity->textLen = pool->ptr - pool->start;
2016: poolFinish(pool);
2017: }
2018: else
2019: poolDiscard(pool);
2020: return XML_ERROR_NONE;
2021: case XML_TOK_ENTITY_REF:
2022: case XML_TOK_DATA_CHARS:
2023: if (!poolAppend(pool, encoding, entityTextPtr, next))
2024: return XML_ERROR_NO_MEMORY;
2025: break;
2026: case XML_TOK_TRAILING_CR:
2027: next = entityTextPtr + encoding->minBytesPerChar;
2028: /* fall through */
2029: case XML_TOK_DATA_NEWLINE:
2030: if (pool->end == pool->ptr && !poolGrow(pool))
2031: return XML_ERROR_NO_MEMORY;
2032: *(pool->ptr)++ = XML_T('\n');
2033: break;
2034: case XML_TOK_CHAR_REF:
2035: {
2036: XML_Char buf[XML_ENCODE_MAX];
2037: int i;
2038: int n = XmlCharRefNumber(encoding, entityTextPtr);
2039: if (n < 0) {
2040: eventPtr = entityTextPtr;
2041: return XML_ERROR_BAD_CHAR_REF;
2042: }
2043: n = XmlEncode(n, (ICHAR *)buf);
2044: if (!n) {
2045: eventPtr = entityTextPtr;
2046: return XML_ERROR_BAD_CHAR_REF;
2047: }
2048: for (i = 0; i < n; i++) {
2049: if (pool->end == pool->ptr && !poolGrow(pool))
2050: return XML_ERROR_NO_MEMORY;
2051: *(pool->ptr)++ = buf[i];
2052: }
2053: }
2054: break;
2055: case XML_TOK_PARTIAL:
2056: eventPtr = entityTextPtr;
2057: return XML_ERROR_INVALID_TOKEN;
2058: case XML_TOK_INVALID:
2059: eventPtr = next;
2060: return XML_ERROR_INVALID_TOKEN;
2061: default:
2062: abort();
2063: }
2064: entityTextPtr = next;
2065: }
2066: /* not reached */
2067: }
2068:
2069: static void
2070: normalizeLines(XML_Char *s)
2071: {
2072: XML_Char *p;
2073: for (;; s++) {
2074: if (*s == XML_T('\0'))
2075: return;
2076: if (*s == XML_T('\r'))
2077: break;
2078: }
2079: p = s;
2080: do {
2081: if (*s == XML_T('\r')) {
2082: *p++ = XML_T('\n');
2083: if (*++s == XML_T('\n'))
2084: s++;
2085: }
2086: else
2087: *p++ = *s++;
2088: } while (*s);
2089: *p = XML_T('\0');
2090: }
2091:
2092: static int
2093: reportProcessingInstruction(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2094: {
2095: const XML_Char *target;
2096: XML_Char *data;
2097: const char *tem;
2098: if (!processingInstructionHandler) {
2099: if (defaultHandler)
2100: reportDefault(parser, enc, start, end);
2101: return 1;
2102: }
2103: start += enc->minBytesPerChar * 2;
2104: tem = start + XmlNameLength(enc, start);
2105: target = poolStoreString(&tempPool, enc, start, tem);
2106: if (!target)
2107: return 0;
2108: poolFinish(&tempPool);
2109: data = poolStoreString(&tempPool, enc,
2110: XmlSkipS(enc, tem),
2111: end - enc->minBytesPerChar*2);
2112: if (!data)
2113: return 0;
2114: normalizeLines(data);
2115: processingInstructionHandler(handlerArg, target, data);
2116: poolClear(&tempPool);
2117: return 1;
2118: }
2119:
2120: static void
2121: reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end)
2122: {
2123: if (MUST_CONVERT(enc, s)) {
2124: for (;;) {
2125: ICHAR *dataPtr = (ICHAR *)dataBuf;
2126: XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd);
2127: if (s == end) {
2128: defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
2129: break;
2130: }
2131: if (enc == encoding) {
2132: eventEndPtr = s;
2133: defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
2134: eventPtr = s;
2135: }
2136: else
2137: defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf);
2138: }
2139: }
2140: else
2141: defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s);
2142: }
2143:
2144:
2145: static int
2146: defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, int isCdata, const XML_Char *value)
2147: {
2148: DEFAULT_ATTRIBUTE *att;
2149: if (type->nDefaultAtts == type->allocDefaultAtts) {
2150: if (type->allocDefaultAtts == 0) {
2151: type->allocDefaultAtts = 8;
2152: type->defaultAtts = malloc(type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2153: }
2154: else {
2155: type->allocDefaultAtts *= 2;
2156: type->defaultAtts = realloc(type->defaultAtts,
2157: type->allocDefaultAtts*sizeof(DEFAULT_ATTRIBUTE));
2158: }
2159: if (!type->defaultAtts)
2160: return 0;
2161: }
2162: att = type->defaultAtts + type->nDefaultAtts;
2163: att->id = attId;
2164: att->value = value;
2165: att->isCdata = isCdata;
2166: if (!isCdata)
2167: attId->maybeTokenized = 1;
2168: type->nDefaultAtts += 1;
2169: return 1;
2170: }
2171:
2172: static ATTRIBUTE_ID *
2173: getAttributeId(XML_Parser parser, const ENCODING *enc, const char *start, const char *end)
2174: {
2175: ATTRIBUTE_ID *id;
2176: const XML_Char *name;
2177: if (!poolAppendChar(&dtd.pool, XML_T('\0')))
2178: return 0;
2179: name = poolStoreString(&dtd.pool, enc, start, end);
2180: if (!name)
2181: return 0;
2182: ++name;
2183: id = (ATTRIBUTE_ID *)lookup(&dtd.attributeIds, name, sizeof(ATTRIBUTE_ID));
2184: if (!id)
2185: return 0;
2186: if (id->name != name)
2187: poolDiscard(&dtd.pool);
2188: else
2189: poolFinish(&dtd.pool);
2190: return id;
2191: }
2192:
2193: static
2194: const XML_Char *getOpenEntityNames(XML_Parser parser)
2195: {
2196: HASH_TABLE_ITER iter;
2197:
2198: hashTableIterInit(&iter, &(dtd.generalEntities));
2199: for (;;) {
2200: const XML_Char *s;
2201: ENTITY *e = (ENTITY *)hashTableIterNext(&iter);
2202: if (!e)
2203: break;
2204: if (!e->open)
2205: continue;
2206: if (poolLength(&tempPool) > 0 && !poolAppendChar(&tempPool, XML_T(' ')))
2207: return 0;
2208: for (s = e->name; *s; s++)
2209: if (!poolAppendChar(&tempPool, *s))
2210: return 0;
2211: }
2212:
2213: if (!poolAppendChar(&tempPool, XML_T('\0')))
2214: return 0;
2215: return tempPool.start;
2216: }
2217:
2218: static
2219: int setOpenEntityNames(XML_Parser parser, const XML_Char *openEntityNames)
2220: {
2221: const XML_Char *s = openEntityNames;
2222: while (*openEntityNames != XML_T('\0')) {
2223: if (*s == XML_T(' ') || *s == XML_T('\0')) {
2224: ENTITY *e;
2225: if (!poolAppendChar(&tempPool, XML_T('\0')))
2226: return 0;
2227: e = (ENTITY *)lookup(&dtd.generalEntities, poolStart(&tempPool), 0);
2228: if (e)
2229: e->open = 1;
2230: if (*s == XML_T(' '))
2231: s++;
2232: openEntityNames = s;
2233: poolDiscard(&tempPool);
2234: }
2235: else {
2236: if (!poolAppendChar(&tempPool, *s))
2237: return 0;
2238: s++;
2239: }
2240: }
2241: return 1;
2242: }
2243:
2244:
2245: static
2246: void normalizePublicId(XML_Char *publicId)
2247: {
2248: XML_Char *p = publicId;
2249: XML_Char *s;
2250: for (s = publicId; *s; s++) {
2251: switch (*s) {
2252: case XML_T(' '):
2253: case XML_T('\r'):
2254: case XML_T('\n'):
2255: if (p != publicId && p[-1] != XML_T(' '))
2256: *p++ = XML_T(' ');
2257: break;
2258: default:
2259: *p++ = *s;
2260: }
2261: }
2262: if (p != publicId && p[-1] == XML_T(' '))
2263: --p;
2264: *p = XML_T('\0');
2265: }
2266:
2267: static int dtdInit(DTD *p)
2268: {
2269: poolInit(&(p->pool));
2270: hashTableInit(&(p->generalEntities));
2271: hashTableInit(&(p->elementTypes));
2272: hashTableInit(&(p->attributeIds));
2273: p->complete = 1;
2274: p->standalone = 0;
2275: p->base = 0;
2276: return 1;
2277: }
2278:
2279: static void dtdDestroy(DTD *p)
2280: {
2281: HASH_TABLE_ITER iter;
2282: hashTableIterInit(&iter, &(p->elementTypes));
2283: for (;;) {
2284: ELEMENT_TYPE *e = (ELEMENT_TYPE *)hashTableIterNext(&iter);
2285: if (!e)
2286: break;
2287: if (e->allocDefaultAtts != 0)
2288: free(e->defaultAtts);
2289: }
2290: hashTableDestroy(&(p->generalEntities));
2291: hashTableDestroy(&(p->elementTypes));
2292: hashTableDestroy(&(p->attributeIds));
2293: poolDestroy(&(p->pool));
2294: }
2295:
2296: /* Do a deep copy of the DTD. Return 0 for out of memory; non-zero otherwise.
2297: The new DTD has already been initialized. */
2298:
2299: static int dtdCopy(DTD *newDtd, const DTD *oldDtd)
2300: {
2301: HASH_TABLE_ITER iter;
2302:
2303: if (oldDtd->base) {
2304: const XML_Char *tem = poolCopyString(&(newDtd->pool), oldDtd->base);
2305: if (!tem)
2306: return 0;
2307: newDtd->base = tem;
2308: }
2309:
2310: hashTableIterInit(&iter, &(oldDtd->attributeIds));
2311:
2312: /* Copy the attribute id table. */
2313:
2314: for (;;) {
2315: ATTRIBUTE_ID *newA;
2316: const XML_Char *name;
2317: const ATTRIBUTE_ID *oldA = (ATTRIBUTE_ID *)hashTableIterNext(&iter);
2318:
2319: if (!oldA)
2320: break;
2321: /* Remember to allocate the scratch byte before the name. */
2322: if (!poolAppendChar(&(newDtd->pool), XML_T('\0')))
2323: return 0;
2324: name = poolCopyString(&(newDtd->pool), oldA->name);
2325: if (!name)
2326: return 0;
2327: ++name;
2328: newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID));
2329: if (!newA)
2330: return 0;
2331: newA->maybeTokenized = oldA->maybeTokenized;
2332: }
2333:
2334: /* Copy the element type table. */
2335:
2336: hashTableIterInit(&iter, &(oldDtd->elementTypes));
2337:
2338: for (;;) {
2339: int i;
2340: ELEMENT_TYPE *newE;
2341: const XML_Char *name;
2342: const ELEMENT_TYPE *oldE = (ELEMENT_TYPE *)hashTableIterNext(&iter);
2343: if (!oldE)
2344: break;
2345: name = poolCopyString(&(newDtd->pool), oldE->name);
2346: if (!name)
2347: return 0;
2348: newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE));
2349: if (!newE)
2350: return 0;
2351: newE->defaultAtts = (DEFAULT_ATTRIBUTE *)malloc(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE));
2352: if (!newE->defaultAtts)
2353: return 0;
2354: newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts;
2355: for (i = 0; i < newE->nDefaultAtts; i++) {
2356: newE->defaultAtts[i].id = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0);
2357: newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata;
2358: if (oldE->defaultAtts[i].value) {
2359: newE->defaultAtts[i].value = poolCopyString(&(newDtd->pool), oldE->defaultAtts[i].value);
2360: if (!newE->defaultAtts[i].value)
2361: return 0;
2362: }
2363: else
2364: newE->defaultAtts[i].value = 0;
2365: }
2366: }
2367:
2368: /* Copy the entity table. */
2369:
2370: hashTableIterInit(&iter, &(oldDtd->generalEntities));
2371:
2372: for (;;) {
2373: ENTITY *newE;
2374: const XML_Char *name;
2375: const ENTITY *oldE = (ENTITY *)hashTableIterNext(&iter);
2376: if (!oldE)
2377: break;
2378: name = poolCopyString(&(newDtd->pool), oldE->name);
2379: if (!name)
2380: return 0;
2381: newE = (ENTITY *)lookup(&(newDtd->generalEntities), name, sizeof(ENTITY));
2382: if (!newE)
2383: return 0;
2384: if (oldE->systemId) {
2385: const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->systemId);
2386: if (!tem)
2387: return 0;
2388: newE->systemId = tem;
2389: if (oldE->base) {
2390: if (oldE->base == oldDtd->base)
2391: newE->base = newDtd->base;
2392: tem = poolCopyString(&(newDtd->pool), oldE->base);
2393: if (!tem)
2394: return 0;
2395: newE->base = tem;
2396: }
2397: }
2398: else {
2399: const XML_Char *tem = poolCopyStringN(&(newDtd->pool), oldE->textPtr, oldE->textLen);
2400: if (!tem)
2401: return 0;
2402: newE->textPtr = tem;
2403: newE->textLen = oldE->textLen;
2404: }
2405: if (oldE->notation) {
2406: const XML_Char *tem = poolCopyString(&(newDtd->pool), oldE->notation);
2407: if (!tem)
2408: return 0;
2409: newE->notation = tem;
2410: }
2411: }
2412:
2413: newDtd->complete = oldDtd->complete;
2414: newDtd->standalone = oldDtd->standalone;
2415: return 1;
2416: }
2417:
2418: static
2419: void poolInit(STRING_POOL *pool)
2420: {
2421: pool->blocks = 0;
2422: pool->freeBlocks = 0;
2423: pool->start = 0;
2424: pool->ptr = 0;
2425: pool->end = 0;
2426: }
2427:
2428: static
2429: void poolClear(STRING_POOL *pool)
2430: {
2431: if (!pool->freeBlocks)
2432: pool->freeBlocks = pool->blocks;
2433: else {
2434: BLOCK *p = pool->blocks;
2435: while (p) {
2436: BLOCK *tem = p->next;
2437: p->next = pool->freeBlocks;
2438: pool->freeBlocks = p;
2439: p = tem;
2440: }
2441: }
2442: pool->blocks = 0;
2443: pool->start = 0;
2444: pool->ptr = 0;
2445: pool->end = 0;
2446: }
2447:
2448: static
2449: void poolDestroy(STRING_POOL *pool)
2450: {
2451: BLOCK *p = pool->blocks;
2452: while (p) {
2453: BLOCK *tem = p->next;
2454: free(p);
2455: p = tem;
2456: }
2457: pool->blocks = 0;
2458: p = pool->freeBlocks;
2459: while (p) {
2460: BLOCK *tem = p->next;
2461: free(p);
2462: p = tem;
2463: }
2464: pool->freeBlocks = 0;
2465: pool->ptr = 0;
2466: pool->start = 0;
2467: pool->end = 0;
2468: }
2469:
2470: static
2471: XML_Char *poolAppend(STRING_POOL *pool, const ENCODING *enc,
2472: const char *ptr, const char *end)
2473: {
2474: if (!pool->ptr && !poolGrow(pool))
2475: return 0;
2476: for (;;) {
2477: XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end);
2478: if (ptr == end)
2479: break;
2480: if (!poolGrow(pool))
2481: return 0;
2482: }
2483: return pool->start;
2484: }
2485:
2486: static const XML_Char *poolCopyString(STRING_POOL *pool, const XML_Char *s)
2487: {
2488: do {
2489: if (!poolAppendChar(pool, *s))
2490: return 0;
2491: } while (*s++);
2492: s = pool->start;
2493: poolFinish(pool);
2494: return s;
2495: }
2496:
2497: static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n)
2498: {
2499: if (!pool->ptr && !poolGrow(pool))
2500: return 0;
2501: for (; n > 0; --n, s++) {
2502: if (!poolAppendChar(pool, *s))
2503: return 0;
2504:
2505: }
2506: s = pool->start;
2507: poolFinish(pool);
2508: return s;
2509: }
2510:
2511: static
2512: XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc,
2513: const char *ptr, const char *end)
2514: {
2515: if (!poolAppend(pool, enc, ptr, end))
2516: return 0;
2517: if (pool->ptr == pool->end && !poolGrow(pool))
2518: return 0;
2519: *(pool->ptr)++ = 0;
2520: return pool->start;
2521: }
2522:
2523: static
2524: int poolGrow(STRING_POOL *pool)
2525: {
2526: if (pool->freeBlocks) {
2527: if (pool->start == 0) {
2528: pool->blocks = pool->freeBlocks;
2529: pool->freeBlocks = pool->freeBlocks->next;
2530: pool->blocks->next = 0;
2531: pool->start = pool->blocks->s;
2532: pool->end = pool->start + pool->blocks->size;
2533: pool->ptr = pool->start;
2534: return 1;
2535: }
2536: if (pool->end - pool->start < pool->freeBlocks->size) {
2537: BLOCK *tem = pool->freeBlocks->next;
2538: pool->freeBlocks->next = pool->blocks;
2539: pool->blocks = pool->freeBlocks;
2540: pool->freeBlocks = tem;
2541: memcpy(pool->blocks->s, pool->start, (pool->end - pool->start) * sizeof(XML_Char));
2542: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
2543: pool->start = pool->blocks->s;
2544: pool->end = pool->start + pool->blocks->size;
2545: return 1;
2546: }
2547: }
2548: if (pool->blocks && pool->start == pool->blocks->s) {
2549: int blockSize = (pool->end - pool->start)*2;
2550: pool->blocks = realloc(pool->blocks, offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
2551: if (!pool->blocks)
2552: return 0;
2553: pool->blocks->size = blockSize;
2554: pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
2555: pool->start = pool->blocks->s;
2556: pool->end = pool->start + blockSize;
2557: }
2558: else {
2559: BLOCK *tem;
2560: int blockSize = pool->end - pool->start;
2561: if (blockSize < INIT_BLOCK_SIZE)
2562: blockSize = INIT_BLOCK_SIZE;
2563: else
2564: blockSize *= 2;
2565: tem = malloc(offsetof(BLOCK, s) + blockSize * sizeof(XML_Char));
2566: if (!tem)
2567: return 0;
2568: tem->size = blockSize;
2569: tem->next = pool->blocks;
2570: pool->blocks = tem;
2571: memcpy(tem->s, pool->start, (pool->ptr - pool->start) * sizeof(XML_Char));
2572: pool->ptr = tem->s + (pool->ptr - pool->start);
2573: pool->start = tem->s;
2574: pool->end = tem->s + blockSize;
2575: }
2576: return 1;
2577: }
Webmaster