Annotation of XML/parser.c, revision 1.115
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.75 daniel 33: #include "encoding.h"
1.61 daniel 34: #include "valid.h"
1.69 daniel 35: #include "parserInternals.h"
1.91 daniel 36: #include "xmlIO.h"
1.1 veillard 37:
1.86 daniel 38: const char *xmlParserVersion = LIBXML_VERSION;
39:
1.91 daniel 40:
41: /************************************************************************
42: * *
43: * Input handling functions for progressive parsing *
44: * *
45: ************************************************************************/
46:
47: /* #define DEBUG_INPUT */
48:
1.110 daniel 49: #define INPUT_CHUNK 250
50: /* we need to keep enough input to show errors in context */
51: #define LINE_LEN 80
1.91 daniel 52:
53: #ifdef DEBUG_INPUT
54: #define CHECK_BUFFER(in) check_buffer(in)
55:
56: void check_buffer(xmlParserInputPtr in) {
57: if (in->base != in->buf->buffer->content) {
58: fprintf(stderr, "xmlParserInput: base mismatch problem\n");
59: }
60: if (in->cur < in->base) {
61: fprintf(stderr, "xmlParserInput: cur < base problem\n");
62: }
63: if (in->cur > in->base + in->buf->buffer->use) {
64: fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
65: }
66: fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
67: (int) in, (int) in->buf->buffer->content, in->cur - in->base,
68: in->buf->buffer->use, in->buf->buffer->size);
69: }
70:
1.110 daniel 71: #else
72: #define CHECK_BUFFER(in)
73: #endif
74:
1.91 daniel 75:
76: /**
77: * xmlParserInputRead:
78: * @in: an XML parser input
79: * @len: an indicative size for the lookahead
80: *
81: * This function refresh the input for the parser. It doesn't try to
82: * preserve pointers to the input buffer, and discard already read data
83: *
84: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
85: * end of this entity
86: */
87: int
88: xmlParserInputRead(xmlParserInputPtr in, int len) {
89: int ret;
90: int used;
91: int index;
92:
93: #ifdef DEBUG_INPUT
94: fprintf(stderr, "Read\n");
95: #endif
96: if (in->buf == NULL) return(-1);
97: if (in->base == NULL) return(-1);
98: if (in->cur == NULL) return(-1);
99: if (in->buf->buffer == NULL) return(-1);
100:
101: CHECK_BUFFER(in);
102:
103: used = in->cur - in->buf->buffer->content;
104: ret = xmlBufferShrink(in->buf->buffer, used);
105: if (ret > 0) {
106: in->cur -= ret;
107: in->consumed += ret;
108: }
109: ret = xmlParserInputBufferRead(in->buf, len);
110: if (in->base != in->buf->buffer->content) {
111: /*
112: * the buffer has been realloced
113: */
114: index = in->cur - in->base;
115: in->base = in->buf->buffer->content;
116: in->cur = &in->buf->buffer->content[index];
117: }
118:
119: CHECK_BUFFER(in);
120:
121: return(ret);
122: }
123:
124: /**
125: * xmlParserInputGrow:
126: * @in: an XML parser input
127: * @len: an indicative size for the lookahead
128: *
129: * This function increase the input for the parser. It tries to
130: * preserve pointers to the input buffer, and keep already read data
131: *
132: * Returns the number of CHARs read, or -1 in case of error, 0 indicate the
133: * end of this entity
134: */
135: int
136: xmlParserInputGrow(xmlParserInputPtr in, int len) {
137: int ret;
138: int index;
139:
140: #ifdef DEBUG_INPUT
141: fprintf(stderr, "Grow\n");
142: #endif
143: if (in->buf == NULL) return(-1);
144: if (in->base == NULL) return(-1);
145: if (in->cur == NULL) return(-1);
146: if (in->buf->buffer == NULL) return(-1);
147:
148: CHECK_BUFFER(in);
149:
150: index = in->cur - in->base;
151: if (in->buf->buffer->use > index + INPUT_CHUNK) {
152:
153: CHECK_BUFFER(in);
154:
155: return(0);
156: }
157: ret = xmlParserInputBufferGrow(in->buf, len);
158: if (in->base != in->buf->buffer->content) {
159: /*
160: * the buffer has been realloced
161: */
162: index = in->cur - in->base;
163: in->base = in->buf->buffer->content;
164: in->cur = &in->buf->buffer->content[index];
165: }
166:
167: CHECK_BUFFER(in);
168:
169: return(ret);
170: }
171:
172: /**
173: * xmlParserInputShrink:
174: * @in: an XML parser input
175: *
176: * This function removes used input for the parser.
177: */
178: void
179: xmlParserInputShrink(xmlParserInputPtr in) {
180: int used;
181: int ret;
182: int index;
183:
184: #ifdef DEBUG_INPUT
185: fprintf(stderr, "Shrink\n");
186: #endif
187: if (in->buf == NULL) return;
188: if (in->base == NULL) return;
189: if (in->cur == NULL) return;
190: if (in->buf->buffer == NULL) return;
191:
192: CHECK_BUFFER(in);
193:
194: used = in->cur - in->buf->buffer->content;
195: if (used > INPUT_CHUNK) {
1.110 daniel 196: ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
1.91 daniel 197: if (ret > 0) {
198: in->cur -= ret;
199: in->consumed += ret;
200: }
201: }
202:
203: CHECK_BUFFER(in);
204:
205: if (in->buf->buffer->use > INPUT_CHUNK) {
206: return;
207: }
208: xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
209: if (in->base != in->buf->buffer->content) {
210: /*
211: * the buffer has been realloced
212: */
213: index = in->cur - in->base;
214: in->base = in->buf->buffer->content;
215: in->cur = &in->buf->buffer->content[index];
216: }
217:
218: CHECK_BUFFER(in);
219: }
220:
1.45 daniel 221: /************************************************************************
222: * *
223: * Parser stacks related functions and macros *
224: * *
225: ************************************************************************/
1.79 daniel 226:
227: int xmlSubstituteEntitiesDefaultValue = 0;
1.100 daniel 228: int xmlDoValidityCheckingDefaultValue = 0;
1.79 daniel 229:
1.1 veillard 230: /*
1.40 daniel 231: * Generic function for accessing stacks in the Parser Context
1.1 veillard 232: */
233:
1.31 daniel 234: #define PUSH_AND_POP(type, name) \
1.72 daniel 235: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 236: if (ctxt->name##Nr >= ctxt->name##Max) { \
237: ctxt->name##Max *= 2; \
1.40 daniel 238: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
239: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
240: if (ctxt->name##Tab == NULL) { \
1.31 daniel 241: fprintf(stderr, "realloc failed !\n"); \
242: exit(1); \
243: } \
244: } \
1.40 daniel 245: ctxt->name##Tab[ctxt->name##Nr] = value; \
246: ctxt->name = value; \
247: return(ctxt->name##Nr++); \
1.31 daniel 248: } \
1.72 daniel 249: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 250: type ret; \
1.40 daniel 251: if (ctxt->name##Nr <= 0) return(0); \
252: ctxt->name##Nr--; \
1.50 daniel 253: if (ctxt->name##Nr > 0) \
254: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
255: else \
256: ctxt->name = NULL; \
1.69 daniel 257: ret = ctxt->name##Tab[ctxt->name##Nr]; \
258: ctxt->name##Tab[ctxt->name##Nr] = 0; \
259: return(ret); \
1.31 daniel 260: } \
261:
1.40 daniel 262: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 263: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 264:
1.55 daniel 265: /*
266: * Macros for accessing the content. Those should be used only by the parser,
267: * and not exported.
268: *
269: * Dirty macros, i.e. one need to make assumption on the context to use them
270: *
271: * CUR_PTR return the current pointer to the CHAR to be parsed.
272: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
273: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
274: * in UNICODE mode. This should be used internally by the parser
275: * only to compare to ASCII values otherwise it would break when
276: * running with UTF-8 encoding.
277: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
278: * to compare on ASCII based substring.
279: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
280: * strings within the parser.
281: *
1.77 daniel 282: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 283: *
284: * CURRENT Returns the current char value, with the full decoding of
285: * UTF-8 if we are using this mode. It returns an int.
286: * NEXT Skip to the next character, this does the proper decoding
287: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
1.77 daniel 288: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 289: */
1.45 daniel 290:
1.97 daniel 291: #define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
1.55 daniel 292: #define SKIP(val) ctxt->input->cur += (val)
293: #define NXT(val) ctxt->input->cur[(val)]
294: #define CUR_PTR ctxt->input->cur
1.97 daniel 295: #define SHRINK xmlParserInputShrink(ctxt->input); \
296: if ((*ctxt->input->cur == 0) && \
297: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
298: xmlPopInput(ctxt)
299:
300: #define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
301: if ((*ctxt->input->cur == 0) && \
302: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
303: xmlPopInput(ctxt)
1.55 daniel 304:
305: #define SKIP_BLANKS \
1.101 daniel 306: do { \
307: while (IS_BLANK(CUR)) NEXT; \
308: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
309: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
310: } while (IS_BLANK(CUR));
1.55 daniel 311:
312: #define CURRENT (*ctxt->input->cur)
1.91 daniel 313: #define NEXT { \
1.97 daniel 314: if (ctxt->token != 0) ctxt->token = 0; \
315: else { \
1.91 daniel 316: if ((*ctxt->input->cur == 0) && \
317: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) { \
318: xmlPopInput(ctxt); \
319: } else { \
320: if (*(ctxt->input->cur) == '\n') { \
321: ctxt->input->line++; ctxt->input->col = 1; \
322: } else ctxt->input->col++; \
323: ctxt->input->cur++; \
324: if (*ctxt->input->cur == 0) \
325: xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1.96 daniel 326: } \
327: if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
328: if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
1.97 daniel 329: }}
1.91 daniel 330:
1.42 daniel 331:
1.97 daniel 332: /************************************************************************
333: * *
334: * Commodity functions to handle entities processing *
335: * *
336: ************************************************************************/
1.40 daniel 337:
1.50 daniel 338: /**
339: * xmlPopInput:
340: * @ctxt: an XML parser context
341: *
1.40 daniel 342: * xmlPopInput: the current input pointed by ctxt->input came to an end
343: * pop it and return the next char.
1.45 daniel 344: *
1.68 daniel 345: * Returns the current CHAR in the parser context
1.40 daniel 346: */
1.55 daniel 347: CHAR
348: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 349: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 350: xmlFreeInputStream(inputPop(ctxt));
1.97 daniel 351: if ((*ctxt->input->cur == 0) &&
352: (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
353: return(xmlPopInput(ctxt));
1.40 daniel 354: return(CUR);
355: }
356:
1.50 daniel 357: /**
358: * xmlPushInput:
359: * @ctxt: an XML parser context
360: * @input: an XML parser input fragment (entity, XML fragment ...).
361: *
1.40 daniel 362: * xmlPushInput: switch to a new input stream which is stacked on top
363: * of the previous one(s).
364: */
1.55 daniel 365: void
366: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 367: if (input == NULL) return;
368: inputPush(ctxt, input);
369: }
370:
1.50 daniel 371: /**
1.69 daniel 372: * xmlFreeInputStream:
1.101 daniel 373: * @input: an xmlP arserInputPtr
1.69 daniel 374: *
375: * Free up an input stream.
376: */
377: void
378: xmlFreeInputStream(xmlParserInputPtr input) {
379: if (input == NULL) return;
380:
381: if (input->filename != NULL) free((char *) input->filename);
1.94 daniel 382: if (input->directory != NULL) free((char *) input->directory);
1.69 daniel 383: if ((input->free != NULL) && (input->base != NULL))
384: input->free((char *) input->base);
1.93 veillard 385: if (input->buf != NULL)
386: xmlFreeParserInputBuffer(input->buf);
1.69 daniel 387: memset(input, -1, sizeof(xmlParserInput));
388: free(input);
389: }
390:
391: /**
1.96 daniel 392: * xmlNewInputStream:
393: * @ctxt: an XML parser context
394: *
395: * Create a new input stream structure
396: * Returns the new input stream or NULL
397: */
398: xmlParserInputPtr
399: xmlNewInputStream(xmlParserCtxtPtr ctxt) {
400: xmlParserInputPtr input;
401:
402: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
403: if (input == NULL) {
404: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
405: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
406: return(NULL);
407: }
408: input->filename = NULL;
409: input->directory = NULL;
410: input->base = NULL;
411: input->cur = NULL;
412: input->buf = NULL;
413: input->line = 1;
414: input->col = 1;
415: input->buf = NULL;
416: input->free = NULL;
417: input->consumed = 0;
418: return(input);
419: }
420:
421: /**
1.50 daniel 422: * xmlNewEntityInputStream:
423: * @ctxt: an XML parser context
424: * @entity: an Entity pointer
425: *
1.82 daniel 426: * Create a new input stream based on an xmlEntityPtr
1.113 daniel 427: *
428: * Returns the new input stream or NULL
1.45 daniel 429: */
1.50 daniel 430: xmlParserInputPtr
431: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 432: xmlParserInputPtr input;
433:
434: if (entity == NULL) {
1.55 daniel 435: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 436: ctxt->sax->error(ctxt->userData,
1.45 daniel 437: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 438: return(NULL);
1.45 daniel 439: }
440: if (entity->content == NULL) {
1.113 daniel 441: switch (entity->type) {
442: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
443: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
444: ctxt->sax->error(ctxt->userData,
445: "xmlNewEntityInputStream unparsed entity !\n");
446: break;
447: case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
448: case XML_EXTERNAL_PARAMETER_ENTITY:
449: return(xmlLoadExternalEntity(entity->SystemID,
450: entity->ExternalID, ctxt->input));
451: case XML_INTERNAL_GENERAL_ENTITY:
452: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
453: ctxt->sax->error(ctxt->userData,
454: "Internal entity %s without content !\n", entity->name);
455: break;
456: case XML_INTERNAL_PARAMETER_ENTITY:
457: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
458: ctxt->sax->error(ctxt->userData,
459: "Internal parameter entity %s without content !\n", entity->name);
460: break;
461: case XML_INTERNAL_PREDEFINED_ENTITY:
462: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
463: ctxt->sax->error(ctxt->userData,
464: "Predefined entity %s without content !\n", entity->name);
465: break;
466: }
1.50 daniel 467: return(NULL);
1.45 daniel 468: }
1.96 daniel 469: input = xmlNewInputStream(ctxt);
1.45 daniel 470: if (input == NULL) {
1.50 daniel 471: return(NULL);
1.45 daniel 472: }
473: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
474: input->base = entity->content;
475: input->cur = entity->content;
1.50 daniel 476: return(input);
1.45 daniel 477: }
478:
1.59 daniel 479: /**
480: * xmlNewStringInputStream:
481: * @ctxt: an XML parser context
1.96 daniel 482: * @buffer: an memory buffer
1.59 daniel 483: *
484: * Create a new input stream based on a memory buffer.
1.68 daniel 485: * Returns the new input stream
1.59 daniel 486: */
487: xmlParserInputPtr
1.96 daniel 488: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const CHAR *buffer) {
1.59 daniel 489: xmlParserInputPtr input;
490:
1.96 daniel 491: if (buffer == NULL) {
1.59 daniel 492: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 493: ctxt->sax->error(ctxt->userData,
1.59 daniel 494: "internal: xmlNewStringInputStream string = NULL\n");
495: return(NULL);
496: }
1.96 daniel 497: input = xmlNewInputStream(ctxt);
1.59 daniel 498: if (input == NULL) {
499: return(NULL);
500: }
1.96 daniel 501: input->base = buffer;
502: input->cur = buffer;
1.59 daniel 503: return(input);
504: }
505:
1.76 daniel 506: /**
507: * xmlNewInputFromFile:
508: * @ctxt: an XML parser context
509: * @filename: the filename to use as entity
510: *
511: * Create a new input stream based on a file.
512: *
513: * Returns the new input stream or NULL in case of error
514: */
515: xmlParserInputPtr
1.79 daniel 516: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.91 daniel 517: xmlParserInputBufferPtr buf;
1.76 daniel 518: xmlParserInputPtr inputStream;
1.111 daniel 519: char *directory = NULL;
1.76 daniel 520:
1.96 daniel 521: if (ctxt == NULL) return(NULL);
1.91 daniel 522: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1.94 daniel 523: if (buf == NULL) {
1.106 daniel 524: char name[1024];
525:
1.94 daniel 526: if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
527: #ifdef WIN32
528: sprintf(name, "%s\\%s", ctxt->input->directory, filename);
529: #else
530: sprintf(name, "%s/%s", ctxt->input->directory, filename);
531: #endif
532: buf = xmlParserInputBufferCreateFilename(name,
533: XML_CHAR_ENCODING_NONE);
1.106 daniel 534: if (buf != NULL)
535: directory = strdup(ctxt->input->directory);
536: }
537: if ((buf == NULL) && (ctxt->directory != NULL)) {
538: #ifdef WIN32
539: sprintf(name, "%s\\%s", ctxt->directory, filename);
540: #else
541: sprintf(name, "%s/%s", ctxt->directory, filename);
542: #endif
543: buf = xmlParserInputBufferCreateFilename(name,
544: XML_CHAR_ENCODING_NONE);
545: if (buf != NULL)
546: directory = strdup(ctxt->directory);
547: }
548: if (buf == NULL)
1.94 daniel 549: return(NULL);
550: }
551: if (directory == NULL)
552: directory = xmlParserGetDirectory(filename);
1.76 daniel 553:
1.96 daniel 554: inputStream = xmlNewInputStream(ctxt);
1.76 daniel 555: if (inputStream == NULL) {
1.96 daniel 556: if (directory != NULL) free((char *) directory);
1.76 daniel 557: return(NULL);
558: }
559:
560: inputStream->filename = strdup(filename);
1.94 daniel 561: inputStream->directory = directory;
1.91 daniel 562: inputStream->buf = buf;
1.76 daniel 563:
1.91 daniel 564: inputStream->base = inputStream->buf->buffer->content;
565: inputStream->cur = inputStream->buf->buffer->content;
1.106 daniel 566: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 567: ctxt->directory = directory;
1.76 daniel 568: return(inputStream);
569: }
570:
1.77 daniel 571: /************************************************************************
572: * *
1.97 daniel 573: * Commodity functions to handle parser contexts *
574: * *
575: ************************************************************************/
576:
577: /**
578: * xmlInitParserCtxt:
579: * @ctxt: an XML parser context
580: *
581: * Initialize a parser context
582: */
583:
584: void
585: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
586: {
587: xmlSAXHandler *sax;
588:
589: sax = (xmlSAXHandler *) malloc(sizeof(xmlSAXHandler));
590: if (sax == NULL) {
591: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
592: }
593:
594: /* Allocate the Input stack */
595: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
596: ctxt->inputNr = 0;
597: ctxt->inputMax = 5;
598: ctxt->input = NULL;
599: ctxt->version = NULL;
600: ctxt->encoding = NULL;
601: ctxt->standalone = -1;
1.98 daniel 602: ctxt->hasExternalSubset = 0;
603: ctxt->hasPErefs = 0;
1.97 daniel 604: ctxt->html = 0;
1.98 daniel 605: ctxt->external = 0;
1.97 daniel 606: ctxt->instate = XML_PARSER_PROLOG;
607: ctxt->token = 0;
1.106 daniel 608: ctxt->directory = NULL;
1.97 daniel 609:
610: /* Allocate the Node stack */
611: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
612: ctxt->nodeNr = 0;
613: ctxt->nodeMax = 10;
614: ctxt->node = NULL;
615:
616: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
617: else {
618: ctxt->sax = sax;
619: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
620: }
621: ctxt->userData = ctxt;
622: ctxt->myDoc = NULL;
623: ctxt->wellFormed = 1;
1.99 daniel 624: ctxt->valid = 1;
1.100 daniel 625: ctxt->validate = xmlDoValidityCheckingDefaultValue;
626: ctxt->vctxt.userData = ctxt;
627: ctxt->vctxt.error = xmlParserValidityError;
628: ctxt->vctxt.warning = xmlParserValidityWarning;
1.97 daniel 629: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
630: ctxt->record_info = 0;
631: xmlInitNodeInfoSeq(&ctxt->node_seq);
632: }
633:
634: /**
635: * xmlFreeParserCtxt:
636: * @ctxt: an XML parser context
637: *
638: * Free all the memory used by a parser context. However the parsed
639: * document in ctxt->myDoc is not freed.
640: */
641:
642: void
643: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
644: {
645: xmlParserInputPtr input;
646:
647: if (ctxt == NULL) return;
648:
649: while ((input = inputPop(ctxt)) != NULL) {
650: xmlFreeInputStream(input);
651: }
652:
653: if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
654: if (ctxt->inputTab != NULL) free(ctxt->inputTab);
655: if (ctxt->version != NULL) free((char *) ctxt->version);
656: if (ctxt->encoding != NULL) free((char *) ctxt->encoding);
657: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
658: free(ctxt->sax);
1.106 daniel 659: if (ctxt->directory != NULL) free((char *) ctxt->directory);
1.97 daniel 660: free(ctxt);
661: }
662:
663: /**
664: * xmlNewParserCtxt:
665: *
666: * Allocate and initialize a new parser context.
667: *
668: * Returns the xmlParserCtxtPtr or NULL
669: */
670:
671: xmlParserCtxtPtr
672: xmlNewParserCtxt()
673: {
674: xmlParserCtxtPtr ctxt;
675:
676: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
677: if (ctxt == NULL) {
678: fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
679: perror("malloc");
680: return(NULL);
681: }
682: xmlInitParserCtxt(ctxt);
683: return(ctxt);
684: }
685:
686: /**
687: * xmlClearParserCtxt:
688: * @ctxt: an XML parser context
689: *
690: * Clear (release owned resources) and reinitialize a parser context
691: */
692:
693: void
694: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
695: {
696: xmlClearNodeInfoSeq(&ctxt->node_seq);
697: xmlInitParserCtxt(ctxt);
698: }
699:
700: /************************************************************************
701: * *
1.77 daniel 702: * Commodity functions to handle entities *
703: * *
704: ************************************************************************/
705:
1.97 daniel 706: void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
707: void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
708:
709: /**
710: * xmlParseCharRef:
711: * @ctxt: an XML parser context
712: *
713: * parse Reference declarations
714: *
715: * [66] CharRef ::= '&#' [0-9]+ ';' |
716: * '&#x' [0-9a-fA-F]+ ';'
717: *
1.98 daniel 718: * [ WFC: Legal Character ]
719: * Characters referred to using character references must match the
720: * production for Char.
721: *
1.97 daniel 722: * Returns the value parsed (as an int)
1.77 daniel 723: */
1.97 daniel 724: int
725: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
726: int val = 0;
727:
1.111 daniel 728: if (ctxt->token != 0) {
729: val = ctxt->token;
730: ctxt->token = 0;
731: return(val);
732: }
1.97 daniel 733: if ((CUR == '&') && (NXT(1) == '#') &&
734: (NXT(2) == 'x')) {
735: SKIP(3);
736: while (CUR != ';') {
737: if ((CUR >= '0') && (CUR <= '9'))
738: val = val * 16 + (CUR - '0');
739: else if ((CUR >= 'a') && (CUR <= 'f'))
740: val = val * 16 + (CUR - 'a') + 10;
741: else if ((CUR >= 'A') && (CUR <= 'F'))
742: val = val * 16 + (CUR - 'A') + 10;
743: else {
744: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
745: ctxt->sax->error(ctxt->userData,
746: "xmlParseCharRef: invalid hexadecimal value\n");
747: ctxt->wellFormed = 0;
748: val = 0;
749: break;
750: }
751: NEXT;
752: }
753: if (CUR == ';')
754: NEXT;
755: } else if ((CUR == '&') && (NXT(1) == '#')) {
756: SKIP(2);
757: while (CUR != ';') {
758: if ((CUR >= '0') && (CUR <= '9'))
759: val = val * 10 + (CUR - '0');
760: else {
761: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
762: ctxt->sax->error(ctxt->userData,
763: "xmlParseCharRef: invalid decimal value\n");
764: ctxt->wellFormed = 0;
765: val = 0;
766: break;
767: }
768: NEXT;
769: }
770: if (CUR == ';')
771: NEXT;
772: } else {
773: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 774: ctxt->sax->error(ctxt->userData,
775: "xmlParseCharRef: invalid value\n");
1.97 daniel 776: ctxt->wellFormed = 0;
777: }
1.98 daniel 778:
1.97 daniel 779: /*
1.98 daniel 780: * [ WFC: Legal Character ]
781: * Characters referred to using character references must match the
782: * production for Char.
1.97 daniel 783: */
784: if (IS_CHAR(val)) {
785: return(val);
786: } else {
787: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 788: ctxt->sax->error(ctxt->userData, "CharRef: invalid CHAR value %d\n",
1.97 daniel 789: val);
790: ctxt->wellFormed = 0;
791: }
792: return(0);
1.77 daniel 793: }
794:
1.96 daniel 795: /**
796: * xmlParserHandleReference:
797: * @ctxt: the parser context
798: *
1.97 daniel 799: * [67] Reference ::= EntityRef | CharRef
800: *
1.96 daniel 801: * [68] EntityRef ::= '&' Name ';'
802: *
1.98 daniel 803: * [ WFC: Entity Declared ]
804: * the Name given in the entity reference must match that in an entity
805: * declaration, except that well-formed documents need not declare any
806: * of the following entities: amp, lt, gt, apos, quot.
807: *
808: * [ WFC: Parsed Entity ]
809: * An entity reference must not contain the name of an unparsed entity
810: *
1.97 daniel 811: * [66] CharRef ::= '&#' [0-9]+ ';' |
812: * '&#x' [0-9a-fA-F]+ ';'
813: *
1.96 daniel 814: * A PEReference may have been detectect in the current input stream
815: * the handling is done accordingly to
816: * http://www.w3.org/TR/REC-xml#entproc
817: */
818: void
819: xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1.97 daniel 820: xmlParserInputPtr input;
821: CHAR *name;
822: xmlEntityPtr ent = NULL;
823:
1.111 daniel 824: if (ctxt->token != 0) return;
1.97 daniel 825: if (CUR != '&') return;
826: GROW;
827: if ((CUR == '&') && (NXT(1) == '#')) {
828: switch(ctxt->instate) {
1.109 daniel 829: case XML_PARSER_CDATA_SECTION:
830: return;
1.97 daniel 831: case XML_PARSER_COMMENT:
832: return;
833: case XML_PARSER_EOF:
834: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
835: ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
836: ctxt->wellFormed = 0;
837: return;
838: case XML_PARSER_PROLOG:
839: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
840: ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
841: ctxt->wellFormed = 0;
842: return;
843: case XML_PARSER_EPILOG:
844: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
845: ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
846: ctxt->wellFormed = 0;
847: return;
848: case XML_PARSER_DTD:
849: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
850: ctxt->sax->error(ctxt->userData,
851: "CharRef are forbiden in DTDs!\n");
852: ctxt->wellFormed = 0;
853: return;
854: case XML_PARSER_ENTITY_DECL:
855: /* we just ignore it there */
856: return;
857: case XML_PARSER_ENTITY_VALUE:
858: /*
859: * NOTE: in the case of entity values, we don't do the
860: * substitution here since we need the litteral
861: * entity value to be able to save the internal
862: * subset of the document.
863: * This will be handled by xmlDecodeEntities
864: */
865: return;
866: case XML_PARSER_CONTENT:
867: case XML_PARSER_ATTRIBUTE_VALUE:
868: /* TODO this may not be Ok for UTF-8, multibyte sequence */
869: ctxt->token = xmlParseCharRef(ctxt);
870: return;
871: }
872: return;
873: }
874:
875: switch(ctxt->instate) {
1.109 daniel 876: case XML_PARSER_CDATA_SECTION:
877: return;
1.97 daniel 878: case XML_PARSER_COMMENT:
879: return;
880: case XML_PARSER_EOF:
881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
882: ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
883: ctxt->wellFormed = 0;
884: return;
885: case XML_PARSER_PROLOG:
886: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
887: ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
888: ctxt->wellFormed = 0;
889: return;
890: case XML_PARSER_EPILOG:
891: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
892: ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
893: ctxt->wellFormed = 0;
894: return;
895: case XML_PARSER_ENTITY_VALUE:
896: /*
897: * NOTE: in the case of entity values, we don't do the
898: * substitution here since we need the litteral
899: * entity value to be able to save the internal
900: * subset of the document.
901: * This will be handled by xmlDecodeEntities
902: */
903: return;
904: case XML_PARSER_ATTRIBUTE_VALUE:
905: /*
906: * NOTE: in the case of attributes values, we don't do the
907: * substitution here unless we are in a mode where
908: * the parser is explicitely asked to substitute
909: * entities. The SAX callback is called with values
910: * without entity substitution.
911: * This will then be handled by xmlDecodeEntities
912: */
1.113 daniel 913: return;
1.97 daniel 914: case XML_PARSER_ENTITY_DECL:
915: /*
916: * we just ignore it there
917: * the substitution will be done once the entity is referenced
918: */
919: return;
920: case XML_PARSER_DTD:
921: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
922: ctxt->sax->error(ctxt->userData,
923: "Entity references are forbiden in DTDs!\n");
924: ctxt->wellFormed = 0;
925: return;
926: case XML_PARSER_CONTENT:
1.113 daniel 927: return;
1.97 daniel 928: }
929:
930: NEXT;
931: name = xmlScanName(ctxt);
932: if (name == NULL) {
933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
934: ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
935: ctxt->wellFormed = 0;
936: ctxt->token = '&';
937: return;
938: }
939: if (NXT(xmlStrlen(name)) != ';') {
940: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
941: ctxt->sax->error(ctxt->userData,
942: "Entity reference: ';' expected\n");
943: ctxt->wellFormed = 0;
944: ctxt->token = '&';
1.111 daniel 945: free(name);
1.97 daniel 946: return;
947: }
948: SKIP(xmlStrlen(name) + 1);
949: if (ctxt->sax != NULL) {
950: if (ctxt->sax->getEntity != NULL)
951: ent = ctxt->sax->getEntity(ctxt->userData, name);
952: }
1.98 daniel 953:
954: /*
955: * [ WFC: Entity Declared ]
956: * the Name given in the entity reference must match that in an entity
957: * declaration, except that well-formed documents need not declare any
958: * of the following entities: amp, lt, gt, apos, quot.
959: */
1.97 daniel 960: if (ent == NULL)
961: ent = xmlGetPredefinedEntity(name);
962: if (ent == NULL) {
963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
964: ctxt->sax->error(ctxt->userData,
1.98 daniel 965: "Entity reference: entity %s not declared\n",
966: name);
1.97 daniel 967: ctxt->wellFormed = 0;
1.111 daniel 968: free(name);
1.97 daniel 969: return;
970: }
1.98 daniel 971:
972: /*
973: * [ WFC: Parsed Entity ]
974: * An entity reference must not contain the name of an unparsed entity
975: */
976: if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
978: ctxt->sax->error(ctxt->userData,
979: "Entity reference to unparsed entity %s\n", name);
980: ctxt->wellFormed = 0;
981: }
982:
1.97 daniel 983: if (ent->type == XML_INTERNAL_PREDEFINED_ENTITY) {
984: ctxt->token = ent->content[0];
1.111 daniel 985: free(name);
1.97 daniel 986: return;
987: }
988: input = xmlNewEntityInputStream(ctxt, ent);
989: xmlPushInput(ctxt, input);
1.111 daniel 990: free(name);
1.96 daniel 991: return;
992: }
993:
994: /**
995: * xmlParserHandlePEReference:
996: * @ctxt: the parser context
997: *
998: * [69] PEReference ::= '%' Name ';'
999: *
1.98 daniel 1000: * [ WFC: No Recursion ]
1001: * TODO A parsed entity must not contain a recursive
1002: * reference to itself, either directly or indirectly.
1003: *
1004: * [ WFC: Entity Declared ]
1005: * In a document without any DTD, a document with only an internal DTD
1006: * subset which contains no parameter entity references, or a document
1007: * with "standalone='yes'", ... ... The declaration of a parameter
1008: * entity must precede any reference to it...
1009: *
1010: * [ VC: Entity Declared ]
1011: * In a document with an external subset or external parameter entities
1012: * with "standalone='no'", ... ... The declaration of a parameter entity
1013: * must precede any reference to it...
1014: *
1015: * [ WFC: In DTD ]
1016: * Parameter-entity references may only appear in the DTD.
1017: * NOTE: misleading but this is handled.
1018: *
1019: * A PEReference may have been detected in the current input stream
1.96 daniel 1020: * the handling is done accordingly to
1021: * http://www.w3.org/TR/REC-xml#entproc
1022: * i.e.
1023: * - Included in literal in entity values
1024: * - Included as Paraemeter Entity reference within DTDs
1025: */
1026: void
1027: xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
1028: CHAR *name;
1029: xmlEntityPtr entity = NULL;
1030: xmlParserInputPtr input;
1031:
1.111 daniel 1032: if (ctxt->token != 0) return;
1033: if (CUR != '%') return;
1.96 daniel 1034: switch(ctxt->instate) {
1.109 daniel 1035: case XML_PARSER_CDATA_SECTION:
1036: return;
1.97 daniel 1037: case XML_PARSER_COMMENT:
1038: return;
1.96 daniel 1039: case XML_PARSER_EOF:
1040: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1041: ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
1042: ctxt->wellFormed = 0;
1043: return;
1044: case XML_PARSER_PROLOG:
1045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1046: ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
1047: ctxt->wellFormed = 0;
1048: return;
1.97 daniel 1049: case XML_PARSER_ENTITY_DECL:
1.96 daniel 1050: case XML_PARSER_CONTENT:
1051: case XML_PARSER_ATTRIBUTE_VALUE:
1052: /* we just ignore it there */
1053: return;
1054: case XML_PARSER_EPILOG:
1055: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.97 daniel 1056: ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
1.96 daniel 1057: ctxt->wellFormed = 0;
1058: return;
1.97 daniel 1059: case XML_PARSER_ENTITY_VALUE:
1060: /*
1061: * NOTE: in the case of entity values, we don't do the
1062: * substitution here since we need the litteral
1063: * entity value to be able to save the internal
1064: * subset of the document.
1065: * This will be handled by xmlDecodeEntities
1066: */
1067: return;
1.96 daniel 1068: case XML_PARSER_DTD:
1.98 daniel 1069: /*
1070: * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
1071: * In the internal DTD subset, parameter-entity references
1072: * can occur only where markup declarations can occur, not
1073: * within markup declarations.
1074: * In that case this is handled in xmlParseMarkupDecl
1075: */
1076: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
1077: return;
1.96 daniel 1078: }
1079:
1080: NEXT;
1081: name = xmlParseName(ctxt);
1082: if (name == NULL) {
1083: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1084: ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
1085: ctxt->wellFormed = 0;
1086: } else {
1087: if (CUR == ';') {
1088: NEXT;
1.98 daniel 1089: if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
1090: entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
1.96 daniel 1091: if (entity == NULL) {
1.98 daniel 1092:
1093: /*
1094: * [ WFC: Entity Declared ]
1095: * In a document without any DTD, a document with only an
1096: * internal DTD subset which contains no parameter entity
1097: * references, or a document with "standalone='yes'", ...
1098: * ... The declaration of a parameter entity must precede
1099: * any reference to it...
1100: */
1101: if ((ctxt->standalone == 1) ||
1102: ((ctxt->hasExternalSubset == 0) &&
1103: (ctxt->hasPErefs == 0))) {
1104: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1105: ctxt->sax->error(ctxt->userData,
1106: "PEReference: %%%s; not found\n", name);
1107: ctxt->wellFormed = 0;
1108: } else {
1109: /*
1110: * [ VC: Entity Declared ]
1111: * In a document with an external subset or external
1112: * parameter entities with "standalone='no'", ...
1113: * ... The declaration of a parameter entity must precede
1114: * any reference to it...
1115: */
1116: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1117: ctxt->sax->warning(ctxt->userData,
1118: "PEReference: %%%s; not found\n", name);
1119: ctxt->valid = 0;
1120: }
1.96 daniel 1121: } else {
1122: if ((entity->type == XML_INTERNAL_PARAMETER_ENTITY) ||
1123: (entity->type == XML_EXTERNAL_PARAMETER_ENTITY)) {
1124: /*
1125: * TODO !!!! handle the extra spaces added before and after
1126: * c.f. http://www.w3.org/TR/REC-xml#as-PE
1127: * TODO !!!! Avoid quote processing in parameters value
1128: * c.f. http://www.w3.org/TR/REC-xml#inliteral
1129: */
1130: input = xmlNewEntityInputStream(ctxt, entity);
1131: xmlPushInput(ctxt, input);
1132: } else {
1133: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1134: ctxt->sax->error(ctxt->userData,
1135: "xmlHandlePEReference: %s is not a parameter entity\n",
1136: name);
1137: ctxt->wellFormed = 0;
1138: }
1139: }
1140: } else {
1141: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1142: ctxt->sax->error(ctxt->userData,
1143: "xmlHandlePEReference: expecting ';'\n");
1144: ctxt->wellFormed = 0;
1145: }
1.97 daniel 1146: free(name);
1147: }
1148: }
1149:
1150: /*
1151: * Macro used to grow the current buffer.
1152: */
1153: #define growBuffer(buffer) { \
1154: buffer##_size *= 2; \
1155: buffer = (CHAR *) realloc(buffer, buffer##_size * sizeof(CHAR)); \
1156: if (buffer == NULL) { \
1157: perror("realloc failed"); \
1158: exit(1); \
1159: } \
1.96 daniel 1160: }
1.77 daniel 1161:
1162: /**
1163: * xmlDecodeEntities:
1164: * @ctxt: the parser context
1165: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
1166: * @len: the len to decode (in bytes !), -1 for no size limit
1167: * @end: an end marker CHAR, 0 if none
1168: * @end2: an end marker CHAR, 0 if none
1169: * @end3: an end marker CHAR, 0 if none
1170: *
1171: * [67] Reference ::= EntityRef | CharRef
1172: *
1173: * [69] PEReference ::= '%' Name ';'
1174: *
1175: * Returns A newly allocated string with the substitution done. The caller
1176: * must deallocate it !
1177: */
1178: CHAR *
1179: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
1180: CHAR end, CHAR end2, CHAR end3) {
1181: CHAR *buffer = NULL;
1.78 daniel 1182: int buffer_size = 0;
1.77 daniel 1183: CHAR *out = NULL;
1.78 daniel 1184:
1.97 daniel 1185: CHAR *current = NULL;
1.77 daniel 1186: xmlEntityPtr ent;
1.91 daniel 1187: int nbchars = 0;
1.77 daniel 1188: unsigned int max = (unsigned int) len;
1.97 daniel 1189: CHAR cur;
1.77 daniel 1190:
1191: /*
1192: * allocate a translation buffer.
1193: */
1194: buffer_size = 1000;
1195: buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
1196: if (buffer == NULL) {
1197: perror("xmlDecodeEntities: malloc failed");
1198: return(NULL);
1199: }
1200: out = buffer;
1201:
1.78 daniel 1202: /*
1203: * Ok loop until we reach one of the ending char or a size limit.
1204: */
1.97 daniel 1205: cur = CUR;
1206: while ((nbchars < max) && (cur != end) &&
1207: (cur != end2) && (cur != end3)) {
1.77 daniel 1208:
1.98 daniel 1209: if (cur == 0) break;
1210: if ((cur == '&') && (NXT(1) == '#')) {
1211: int val = xmlParseCharRef(ctxt);
1212: *out++ = val;
1213: nbchars += 3;
1214: } else if ((cur == '&') && (what & XML_SUBSTITUTE_REF)) {
1215: ent = xmlParseEntityRef(ctxt);
1216: if ((ent != NULL) &&
1217: (ctxt->replaceEntities != 0)) {
1218: current = ent->content;
1219: while (*current != 0) {
1220: *out++ = *current++;
1221: if (out - buffer > buffer_size - 100) {
1222: int index = out - buffer;
1223:
1224: growBuffer(buffer);
1225: out = &buffer[index];
1.77 daniel 1226: }
1227: }
1.98 daniel 1228: nbchars += 3 + xmlStrlen(ent->name);
1229: } else if (ent != NULL) {
1230: int i = xmlStrlen(ent->name);
1231: const CHAR *cur = ent->name;
1232:
1233: nbchars += i + 2;
1234: *out++ = '&';
1235: if (out - buffer > buffer_size - i - 100) {
1236: int index = out - buffer;
1237:
1238: growBuffer(buffer);
1239: out = &buffer[index];
1240: }
1241: for (;i > 0;i--)
1242: *out++ = *cur++;
1243: *out++ = ';';
1.77 daniel 1244: }
1.97 daniel 1245: } else if (cur == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1246: /*
1.77 daniel 1247: * a PEReference induce to switch the entity flow,
1248: * we break here to flush the current set of chars
1249: * parsed if any. We will be called back later.
1.97 daniel 1250: */
1.91 daniel 1251: if (nbchars != 0) break;
1.77 daniel 1252:
1253: xmlParsePEReference(ctxt);
1.79 daniel 1254:
1.97 daniel 1255: /*
1.79 daniel 1256: * Pop-up of finished entities.
1.97 daniel 1257: */
1.79 daniel 1258: while ((CUR == 0) && (ctxt->inputNr > 1))
1259: xmlPopInput(ctxt);
1260:
1.98 daniel 1261: break;
1.77 daniel 1262: } else {
1263: /* TODO: invalid for UTF-8 , use COPY(out); */
1.97 daniel 1264: *out++ = cur;
1.91 daniel 1265: nbchars++;
1.86 daniel 1266: if (out - buffer > buffer_size - 100) {
1267: int index = out - buffer;
1268:
1269: growBuffer(buffer);
1270: out = &buffer[index];
1271: }
1.77 daniel 1272: NEXT;
1273: }
1.97 daniel 1274: cur = CUR;
1.77 daniel 1275: }
1276: *out++ = 0;
1277: return(buffer);
1278: }
1279:
1.1 veillard 1280:
1.28 daniel 1281: /************************************************************************
1282: * *
1.75 daniel 1283: * Commodity functions to handle encodings *
1284: * *
1285: ************************************************************************/
1286:
1287: /**
1288: * xmlSwitchEncoding:
1289: * @ctxt: the parser context
1290: * @len: the len of @cur
1291: *
1292: * change the input functions when discovering the character encoding
1293: * of a given entity.
1294: *
1295: */
1296: void
1297: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1298: {
1299: switch (enc) {
1300: case XML_CHAR_ENCODING_ERROR:
1301: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1302: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
1303: ctxt->wellFormed = 0;
1304: break;
1305: case XML_CHAR_ENCODING_NONE:
1306: /* let's assume it's UTF-8 without the XML decl */
1307: return;
1308: case XML_CHAR_ENCODING_UTF8:
1309: /* default encoding, no conversion should be needed */
1310: return;
1311: case XML_CHAR_ENCODING_UTF16LE:
1312: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1313: ctxt->sax->error(ctxt->userData,
1314: "char encoding UTF16 little endian not supported\n");
1315: break;
1316: case XML_CHAR_ENCODING_UTF16BE:
1317: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1318: ctxt->sax->error(ctxt->userData,
1319: "char encoding UTF16 big endian not supported\n");
1320: break;
1321: case XML_CHAR_ENCODING_UCS4LE:
1322: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1323: ctxt->sax->error(ctxt->userData,
1324: "char encoding USC4 little endian not supported\n");
1325: break;
1326: case XML_CHAR_ENCODING_UCS4BE:
1327: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1328: ctxt->sax->error(ctxt->userData,
1329: "char encoding USC4 big endian not supported\n");
1330: break;
1331: case XML_CHAR_ENCODING_EBCDIC:
1332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1333: ctxt->sax->error(ctxt->userData,
1334: "char encoding EBCDIC not supported\n");
1335: break;
1336: case XML_CHAR_ENCODING_UCS4_2143:
1337: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1338: ctxt->sax->error(ctxt->userData,
1339: "char encoding UCS4 2143 not supported\n");
1340: break;
1341: case XML_CHAR_ENCODING_UCS4_3412:
1342: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1343: ctxt->sax->error(ctxt->userData,
1344: "char encoding UCS4 3412 not supported\n");
1345: break;
1346: case XML_CHAR_ENCODING_UCS2:
1347: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1348: ctxt->sax->error(ctxt->userData,
1349: "char encoding UCS2 not supported\n");
1350: break;
1351: case XML_CHAR_ENCODING_8859_1:
1352: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1353: ctxt->sax->error(ctxt->userData,
1354: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
1355: break;
1356: case XML_CHAR_ENCODING_8859_2:
1357: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1358: ctxt->sax->error(ctxt->userData,
1359: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
1360: break;
1361: case XML_CHAR_ENCODING_8859_3:
1362: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1363: ctxt->sax->error(ctxt->userData,
1364: "char encoding ISO_8859_3 not supported\n");
1365: break;
1366: case XML_CHAR_ENCODING_8859_4:
1367: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1368: ctxt->sax->error(ctxt->userData,
1369: "char encoding ISO_8859_4 not supported\n");
1370: break;
1371: case XML_CHAR_ENCODING_8859_5:
1372: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1373: ctxt->sax->error(ctxt->userData,
1374: "char encoding ISO_8859_5 not supported\n");
1375: break;
1376: case XML_CHAR_ENCODING_8859_6:
1377: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1378: ctxt->sax->error(ctxt->userData,
1379: "char encoding ISO_8859_6 not supported\n");
1380: break;
1381: case XML_CHAR_ENCODING_8859_7:
1382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1383: ctxt->sax->error(ctxt->userData,
1384: "char encoding ISO_8859_7 not supported\n");
1385: break;
1386: case XML_CHAR_ENCODING_8859_8:
1387: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1388: ctxt->sax->error(ctxt->userData,
1389: "char encoding ISO_8859_8 not supported\n");
1390: break;
1391: case XML_CHAR_ENCODING_8859_9:
1392: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1393: ctxt->sax->error(ctxt->userData,
1394: "char encoding ISO_8859_9 not supported\n");
1395: break;
1396: case XML_CHAR_ENCODING_2022_JP:
1397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1398: ctxt->sax->error(ctxt->userData,
1399: "char encoding ISO-2022-JPnot supported\n");
1400: break;
1401: case XML_CHAR_ENCODING_SHIFT_JIS:
1402: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1403: ctxt->sax->error(ctxt->userData,
1404: "char encoding Shift_JISnot supported\n");
1405: break;
1406: case XML_CHAR_ENCODING_EUC_JP:
1407: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1408: ctxt->sax->error(ctxt->userData,
1409: "char encoding EUC-JPnot supported\n");
1410: break;
1411: }
1412: }
1413:
1414: /************************************************************************
1415: * *
1.28 daniel 1416: * Commodity functions to handle CHARs *
1417: * *
1418: ************************************************************************/
1419:
1.50 daniel 1420: /**
1421: * xmlStrndup:
1422: * @cur: the input CHAR *
1423: * @len: the len of @cur
1424: *
1425: * a strndup for array of CHAR's
1.68 daniel 1426: *
1427: * Returns a new CHAR * or NULL
1.1 veillard 1428: */
1.55 daniel 1429: CHAR *
1430: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 1431: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1432:
1433: if (ret == NULL) {
1.86 daniel 1434: fprintf(stderr, "malloc of %ld byte failed\n",
1435: (len + 1) * (long)sizeof(CHAR));
1.1 veillard 1436: return(NULL);
1437: }
1438: memcpy(ret, cur, len * sizeof(CHAR));
1439: ret[len] = 0;
1440: return(ret);
1441: }
1442:
1.50 daniel 1443: /**
1444: * xmlStrdup:
1445: * @cur: the input CHAR *
1446: *
1447: * a strdup for array of CHAR's
1.68 daniel 1448: *
1449: * Returns a new CHAR * or NULL
1.1 veillard 1450: */
1.55 daniel 1451: CHAR *
1452: xmlStrdup(const CHAR *cur) {
1.6 httpng 1453: const CHAR *p = cur;
1.1 veillard 1454:
1455: while (IS_CHAR(*p)) p++;
1456: return(xmlStrndup(cur, p - cur));
1457: }
1458:
1.50 daniel 1459: /**
1460: * xmlCharStrndup:
1461: * @cur: the input char *
1462: * @len: the len of @cur
1463: *
1464: * a strndup for char's to CHAR's
1.68 daniel 1465: *
1466: * Returns a new CHAR * or NULL
1.45 daniel 1467: */
1468:
1.55 daniel 1469: CHAR *
1470: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 1471: int i;
1472: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
1473:
1474: if (ret == NULL) {
1.86 daniel 1475: fprintf(stderr, "malloc of %ld byte failed\n",
1476: (len + 1) * (long)sizeof(CHAR));
1.45 daniel 1477: return(NULL);
1478: }
1479: for (i = 0;i < len;i++)
1480: ret[i] = (CHAR) cur[i];
1481: ret[len] = 0;
1482: return(ret);
1483: }
1484:
1.50 daniel 1485: /**
1486: * xmlCharStrdup:
1487: * @cur: the input char *
1488: * @len: the len of @cur
1489: *
1490: * a strdup for char's to CHAR's
1.68 daniel 1491: *
1492: * Returns a new CHAR * or NULL
1.45 daniel 1493: */
1494:
1.55 daniel 1495: CHAR *
1496: xmlCharStrdup(const char *cur) {
1.45 daniel 1497: const char *p = cur;
1498:
1499: while (*p != '\0') p++;
1500: return(xmlCharStrndup(cur, p - cur));
1501: }
1502:
1.50 daniel 1503: /**
1504: * xmlStrcmp:
1505: * @str1: the first CHAR *
1506: * @str2: the second CHAR *
1507: *
1508: * a strcmp for CHAR's
1.68 daniel 1509: *
1510: * Returns the integer result of the comparison
1.14 veillard 1511: */
1512:
1.55 daniel 1513: int
1514: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 1515: register int tmp;
1516:
1517: do {
1518: tmp = *str1++ - *str2++;
1519: if (tmp != 0) return(tmp);
1520: } while ((*str1 != 0) && (*str2 != 0));
1521: return (*str1 - *str2);
1522: }
1523:
1.50 daniel 1524: /**
1525: * xmlStrncmp:
1526: * @str1: the first CHAR *
1527: * @str2: the second CHAR *
1528: * @len: the max comparison length
1529: *
1530: * a strncmp for CHAR's
1.68 daniel 1531: *
1532: * Returns the integer result of the comparison
1.14 veillard 1533: */
1534:
1.55 daniel 1535: int
1536: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 1537: register int tmp;
1538:
1539: if (len <= 0) return(0);
1540: do {
1541: tmp = *str1++ - *str2++;
1542: if (tmp != 0) return(tmp);
1543: len--;
1544: if (len <= 0) return(0);
1545: } while ((*str1 != 0) && (*str2 != 0));
1546: return (*str1 - *str2);
1547: }
1548:
1.50 daniel 1549: /**
1550: * xmlStrchr:
1551: * @str: the CHAR * array
1552: * @val: the CHAR to search
1553: *
1554: * a strchr for CHAR's
1.68 daniel 1555: *
1556: * Returns the CHAR * for the first occurence or NULL.
1.14 veillard 1557: */
1558:
1.89 daniel 1559: const CHAR *
1.55 daniel 1560: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 1561: while (*str != 0) {
1562: if (*str == val) return((CHAR *) str);
1563: str++;
1564: }
1565: return(NULL);
1.89 daniel 1566: }
1567:
1568: /**
1569: * xmlStrstr:
1570: * @str: the CHAR * array (haystack)
1571: * @val: the CHAR to search (needle)
1572: *
1573: * a strstr for CHAR's
1574: *
1575: * Returns the CHAR * for the first occurence or NULL.
1576: */
1577:
1578: const CHAR *
1579: xmlStrstr(const CHAR *str, CHAR *val) {
1580: int n;
1581:
1582: if (str == NULL) return(NULL);
1583: if (val == NULL) return(NULL);
1584: n = xmlStrlen(val);
1585:
1586: if (n == 0) return(str);
1587: while (*str != 0) {
1588: if (*str == *val) {
1589: if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
1590: }
1591: str++;
1592: }
1593: return(NULL);
1594: }
1595:
1596: /**
1597: * xmlStrsub:
1598: * @str: the CHAR * array (haystack)
1599: * @start: the index of the first char (zero based)
1600: * @len: the length of the substring
1601: *
1602: * Extract a substring of a given string
1603: *
1604: * Returns the CHAR * for the first occurence or NULL.
1605: */
1606:
1607: CHAR *
1608: xmlStrsub(const CHAR *str, int start, int len) {
1609: int i;
1610:
1611: if (str == NULL) return(NULL);
1612: if (start < 0) return(NULL);
1.90 daniel 1613: if (len < 0) return(NULL);
1.89 daniel 1614:
1615: for (i = 0;i < start;i++) {
1616: if (*str == 0) return(NULL);
1617: str++;
1618: }
1619: if (*str == 0) return(NULL);
1620: return(xmlStrndup(str, len));
1.14 veillard 1621: }
1.28 daniel 1622:
1.50 daniel 1623: /**
1624: * xmlStrlen:
1625: * @str: the CHAR * array
1626: *
1627: * lenght of a CHAR's string
1.68 daniel 1628: *
1629: * Returns the number of CHAR contained in the ARRAY.
1.45 daniel 1630: */
1631:
1.55 daniel 1632: int
1633: xmlStrlen(const CHAR *str) {
1.45 daniel 1634: int len = 0;
1635:
1636: if (str == NULL) return(0);
1637: while (*str != 0) {
1638: str++;
1639: len++;
1640: }
1641: return(len);
1642: }
1643:
1.50 daniel 1644: /**
1645: * xmlStrncat:
1.68 daniel 1646: * @cur: the original CHAR * array
1.50 daniel 1647: * @add: the CHAR * array added
1648: * @len: the length of @add
1649: *
1650: * a strncat for array of CHAR's
1.68 daniel 1651: *
1652: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1653: */
1654:
1.55 daniel 1655: CHAR *
1656: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 1657: int size;
1658: CHAR *ret;
1659:
1660: if ((add == NULL) || (len == 0))
1661: return(cur);
1662: if (cur == NULL)
1663: return(xmlStrndup(add, len));
1664:
1665: size = xmlStrlen(cur);
1666: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
1667: if (ret == NULL) {
1.86 daniel 1668: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
1669: (size + len + 1) * (long)sizeof(CHAR));
1.45 daniel 1670: return(cur);
1671: }
1672: memcpy(&ret[size], add, len * sizeof(CHAR));
1673: ret[size + len] = 0;
1674: return(ret);
1675: }
1676:
1.50 daniel 1677: /**
1678: * xmlStrcat:
1.68 daniel 1679: * @cur: the original CHAR * array
1.50 daniel 1680: * @add: the CHAR * array added
1681: *
1682: * a strcat for array of CHAR's
1.68 daniel 1683: *
1684: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 1685: */
1.55 daniel 1686: CHAR *
1687: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 1688: const CHAR *p = add;
1689:
1690: if (add == NULL) return(cur);
1691: if (cur == NULL)
1692: return(xmlStrdup(add));
1693:
1694: while (IS_CHAR(*p)) p++;
1695: return(xmlStrncat(cur, add, p - add));
1696: }
1697:
1698: /************************************************************************
1699: * *
1700: * Commodity functions, cleanup needed ? *
1701: * *
1702: ************************************************************************/
1703:
1.50 daniel 1704: /**
1705: * areBlanks:
1706: * @ctxt: an XML parser context
1707: * @str: a CHAR *
1708: * @len: the size of @str
1709: *
1.45 daniel 1710: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 1711: *
1.99 daniel 1712: * TODO: Whether white space are significant has to be checked accordingly
1713: * to DTD informations if available
1.68 daniel 1714: *
1715: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 1716: */
1717:
1718: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
1.104 daniel 1719: int i, ret;
1.45 daniel 1720: xmlNodePtr lastChild;
1721:
1722: for (i = 0;i < len;i++)
1723: if (!(IS_BLANK(str[i]))) return(0);
1724:
1725: if (CUR != '<') return(0);
1.72 daniel 1726: if (ctxt->node == NULL) return(0);
1.104 daniel 1727: if (ctxt->myDoc != NULL) {
1728: ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1729: if (ret == 0) return(1);
1730: if (ret == 1) return(0);
1731: }
1732: /*
1733: * heuristic
1734: */
1.45 daniel 1735: lastChild = xmlGetLastChild(ctxt->node);
1736: if (lastChild == NULL) {
1737: if (ctxt->node->content != NULL) return(0);
1738: } else if (xmlNodeIsText(lastChild))
1739: return(0);
1.104 daniel 1740: else if ((ctxt->node->childs != NULL) &&
1741: (xmlNodeIsText(ctxt->node->childs)))
1742: return(0);
1.45 daniel 1743: return(1);
1744: }
1745:
1.50 daniel 1746: /**
1747: * xmlHandleEntity:
1748: * @ctxt: an XML parser context
1749: * @entity: an XML entity pointer.
1750: *
1751: * Default handling of defined entities, when should we define a new input
1.45 daniel 1752: * stream ? When do we just handle that as a set of chars ?
1.99 daniel 1753: *
1754: * OBSOLETE: to be removed at some point.
1.45 daniel 1755: */
1756:
1.55 daniel 1757: void
1758: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 1759: int len;
1.50 daniel 1760: xmlParserInputPtr input;
1.45 daniel 1761:
1762: if (entity->content == NULL) {
1.55 daniel 1763: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1764: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 1765: entity->name);
1.59 daniel 1766: ctxt->wellFormed = 0;
1.45 daniel 1767: return;
1768: }
1769: len = xmlStrlen(entity->content);
1770: if (len <= 2) goto handle_as_char;
1771:
1772: /*
1773: * Redefine its content as an input stream.
1774: */
1.50 daniel 1775: input = xmlNewEntityInputStream(ctxt, entity);
1776: xmlPushInput(ctxt, input);
1.45 daniel 1777: return;
1778:
1779: handle_as_char:
1780: /*
1781: * Just handle the content as a set of chars.
1782: */
1.72 daniel 1783: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1784: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1785:
1786: }
1787:
1788: /*
1789: * Forward definition for recusive behaviour.
1790: */
1.77 daniel 1791: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1792: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1793:
1.28 daniel 1794: /************************************************************************
1795: * *
1796: * Extra stuff for namespace support *
1797: * Relates to http://www.w3.org/TR/WD-xml-names *
1798: * *
1799: ************************************************************************/
1800:
1.50 daniel 1801: /**
1802: * xmlNamespaceParseNCName:
1803: * @ctxt: an XML parser context
1804: *
1805: * parse an XML namespace name.
1.28 daniel 1806: *
1807: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1808: *
1809: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1810: * CombiningChar | Extender
1.68 daniel 1811: *
1812: * Returns the namespace name or NULL
1.28 daniel 1813: */
1814:
1.55 daniel 1815: CHAR *
1816: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.91 daniel 1817: CHAR buf[XML_MAX_NAMELEN];
1818: int len = 0;
1.28 daniel 1819:
1.40 daniel 1820: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1.28 daniel 1821:
1.40 daniel 1822: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1823: (CUR == '.') || (CUR == '-') ||
1824: (CUR == '_') ||
1825: (IS_COMBINING(CUR)) ||
1.91 daniel 1826: (IS_EXTENDER(CUR))) {
1827: buf[len++] = CUR;
1.40 daniel 1828: NEXT;
1.91 daniel 1829: if (len >= XML_MAX_NAMELEN) {
1830: fprintf(stderr,
1831: "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
1832: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1833: (CUR == '.') || (CUR == '-') ||
1834: (CUR == '_') ||
1835: (IS_COMBINING(CUR)) ||
1836: (IS_EXTENDER(CUR)))
1837: NEXT;
1838: break;
1839: }
1840: }
1841: return(xmlStrndup(buf, len));
1.28 daniel 1842: }
1843:
1.50 daniel 1844: /**
1845: * xmlNamespaceParseQName:
1846: * @ctxt: an XML parser context
1847: * @prefix: a CHAR **
1848: *
1849: * parse an XML qualified name
1.28 daniel 1850: *
1851: * [NS 5] QName ::= (Prefix ':')? LocalPart
1852: *
1853: * [NS 6] Prefix ::= NCName
1854: *
1855: * [NS 7] LocalPart ::= NCName
1.68 daniel 1856: *
1857: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1858: * to get the Prefix if any.
1.28 daniel 1859: */
1860:
1.55 daniel 1861: CHAR *
1862: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1863: CHAR *ret = NULL;
1864:
1865: *prefix = NULL;
1866: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1867: if (CUR == ':') {
1.28 daniel 1868: *prefix = ret;
1.40 daniel 1869: NEXT;
1.28 daniel 1870: ret = xmlNamespaceParseNCName(ctxt);
1871: }
1872:
1873: return(ret);
1874: }
1875:
1.50 daniel 1876: /**
1.72 daniel 1877: * xmlSplitQName:
1878: * @name: an XML parser context
1879: * @prefix: a CHAR **
1880: *
1881: * parse an XML qualified name string
1882: *
1883: * [NS 5] QName ::= (Prefix ':')? LocalPart
1884: *
1885: * [NS 6] Prefix ::= NCName
1886: *
1887: * [NS 7] LocalPart ::= NCName
1888: *
1889: * Returns the function returns the local part, and prefix is updated
1890: * to get the Prefix if any.
1891: */
1892:
1893: CHAR *
1894: xmlSplitQName(const CHAR *name, CHAR **prefix) {
1895: CHAR *ret = NULL;
1896: const CHAR *q;
1897: const CHAR *cur = name;
1898:
1899: *prefix = NULL;
1.113 daniel 1900:
1901: /* xml: prefix is not really a namespace */
1902: if ((cur[0] == 'x') && (cur[1] == 'm') &&
1903: (cur[2] == 'l') && (cur[3] == ':'))
1904: return(xmlStrdup(name));
1905:
1.72 daniel 1906: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1907: q = cur++;
1908:
1909: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1910: (*cur == '.') || (*cur == '-') ||
1911: (*cur == '_') ||
1912: (IS_COMBINING(*cur)) ||
1913: (IS_EXTENDER(*cur)))
1914: cur++;
1915:
1916: ret = xmlStrndup(q, cur - q);
1917:
1918: if (*cur == ':') {
1919: cur++;
1920: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1921: *prefix = ret;
1922:
1923: q = cur++;
1924:
1925: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1926: (*cur == '.') || (*cur == '-') ||
1927: (*cur == '_') ||
1928: (IS_COMBINING(*cur)) ||
1929: (IS_EXTENDER(*cur)))
1930: cur++;
1931:
1932: ret = xmlStrndup(q, cur - q);
1933: }
1934:
1935: return(ret);
1936: }
1937: /**
1.50 daniel 1938: * xmlNamespaceParseNSDef:
1939: * @ctxt: an XML parser context
1940: *
1941: * parse a namespace prefix declaration
1.28 daniel 1942: *
1943: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1944: *
1945: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 1946: *
1947: * Returns the namespace name
1.28 daniel 1948: */
1949:
1.55 daniel 1950: CHAR *
1951: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1952: CHAR *name = NULL;
1953:
1.40 daniel 1954: if ((CUR == 'x') && (NXT(1) == 'm') &&
1955: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1956: (NXT(4) == 's')) {
1957: SKIP(5);
1958: if (CUR == ':') {
1959: NEXT;
1.28 daniel 1960: name = xmlNamespaceParseNCName(ctxt);
1961: }
1962: }
1.39 daniel 1963: return(name);
1.28 daniel 1964: }
1965:
1.50 daniel 1966: /**
1967: * xmlParseQuotedString:
1968: * @ctxt: an XML parser context
1969: *
1.45 daniel 1970: * [OLD] Parse and return a string between quotes or doublequotes
1.110 daniel 1971: * To be removed at next drop of binary compatibility
1.68 daniel 1972: *
1973: * Returns the string parser or NULL.
1.45 daniel 1974: */
1.55 daniel 1975: CHAR *
1976: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1977: CHAR *ret = NULL;
1978: const CHAR *q;
1979:
1980: if (CUR == '"') {
1981: NEXT;
1982: q = CUR_PTR;
1983: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1984: if (CUR != '"') {
1985: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1986: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1987: ctxt->wellFormed = 0;
1.55 daniel 1988: } else {
1.45 daniel 1989: ret = xmlStrndup(q, CUR_PTR - q);
1990: NEXT;
1991: }
1992: } else if (CUR == '\''){
1993: NEXT;
1994: q = CUR_PTR;
1995: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1996: if (CUR != '\'') {
1997: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1998: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1999: ctxt->wellFormed = 0;
1.55 daniel 2000: } else {
1.45 daniel 2001: ret = xmlStrndup(q, CUR_PTR - q);
2002: NEXT;
2003: }
2004: }
2005: return(ret);
2006: }
2007:
1.50 daniel 2008: /**
2009: * xmlParseNamespace:
2010: * @ctxt: an XML parser context
2011: *
1.45 daniel 2012: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
2013: *
2014: * This is what the older xml-name Working Draft specified, a bunch of
2015: * other stuff may still rely on it, so support is still here as
2016: * if ot was declared on the root of the Tree:-(
1.110 daniel 2017: *
2018: * To be removed at next drop of binary compatibility
1.45 daniel 2019: */
2020:
1.55 daniel 2021: void
2022: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 2023: CHAR *href = NULL;
2024: CHAR *prefix = NULL;
2025: int garbage = 0;
2026:
2027: /*
2028: * We just skipped "namespace" or "xml:namespace"
2029: */
2030: SKIP_BLANKS;
2031:
2032: while (IS_CHAR(CUR) && (CUR != '>')) {
2033: /*
2034: * We can have "ns" or "prefix" attributes
2035: * Old encoding as 'href' or 'AS' attributes is still supported
2036: */
2037: if ((CUR == 'n') && (NXT(1) == 's')) {
2038: garbage = 0;
2039: SKIP(2);
2040: SKIP_BLANKS;
2041:
2042: if (CUR != '=') continue;
2043: NEXT;
2044: SKIP_BLANKS;
2045:
2046: href = xmlParseQuotedString(ctxt);
2047: SKIP_BLANKS;
2048: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
2049: (NXT(2) == 'e') && (NXT(3) == 'f')) {
2050: garbage = 0;
2051: SKIP(4);
2052: SKIP_BLANKS;
2053:
2054: if (CUR != '=') continue;
2055: NEXT;
2056: SKIP_BLANKS;
2057:
2058: href = xmlParseQuotedString(ctxt);
2059: SKIP_BLANKS;
2060: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
2061: (NXT(2) == 'e') && (NXT(3) == 'f') &&
2062: (NXT(4) == 'i') && (NXT(5) == 'x')) {
2063: garbage = 0;
2064: SKIP(6);
2065: SKIP_BLANKS;
2066:
2067: if (CUR != '=') continue;
2068: NEXT;
2069: SKIP_BLANKS;
2070:
2071: prefix = xmlParseQuotedString(ctxt);
2072: SKIP_BLANKS;
2073: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
2074: garbage = 0;
2075: SKIP(2);
2076: SKIP_BLANKS;
2077:
2078: if (CUR != '=') continue;
2079: NEXT;
2080: SKIP_BLANKS;
2081:
2082: prefix = xmlParseQuotedString(ctxt);
2083: SKIP_BLANKS;
2084: } else if ((CUR == '?') && (NXT(1) == '>')) {
2085: garbage = 0;
1.91 daniel 2086: NEXT;
1.45 daniel 2087: } else {
2088: /*
2089: * Found garbage when parsing the namespace
2090: */
2091: if (!garbage)
1.55 daniel 2092: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2093: ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
1.59 daniel 2094: ctxt->wellFormed = 0;
1.45 daniel 2095: NEXT;
2096: }
2097: }
2098:
2099: MOVETO_ENDTAG(CUR_PTR);
2100: NEXT;
2101:
2102: /*
2103: * Register the DTD.
1.72 daniel 2104: if (href != NULL)
2105: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 2106: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 2107: */
2108:
2109: if (prefix != NULL) free(prefix);
2110: if (href != NULL) free(href);
2111: }
2112:
1.28 daniel 2113: /************************************************************************
2114: * *
2115: * The parser itself *
2116: * Relates to http://www.w3.org/TR/REC-xml *
2117: * *
2118: ************************************************************************/
1.14 veillard 2119:
1.50 daniel 2120: /**
1.97 daniel 2121: * xmlScanName:
2122: * @ctxt: an XML parser context
2123: *
2124: * Trickery: parse an XML name but without consuming the input flow
2125: * Needed for rollback cases.
2126: *
2127: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2128: * CombiningChar | Extender
2129: *
2130: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2131: *
2132: * [6] Names ::= Name (S Name)*
2133: *
2134: * Returns the Name parsed or NULL
2135: */
2136:
2137: CHAR *
2138: xmlScanName(xmlParserCtxtPtr ctxt) {
2139: CHAR buf[XML_MAX_NAMELEN];
2140: int len = 0;
2141:
2142: GROW;
2143: if (!IS_LETTER(CUR) && (CUR != '_') &&
2144: (CUR != ':')) {
2145: return(NULL);
2146: }
2147:
2148: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2149: (NXT(len) == '.') || (NXT(len) == '-') ||
2150: (NXT(len) == '_') || (NXT(len) == ':') ||
2151: (IS_COMBINING(NXT(len))) ||
2152: (IS_EXTENDER(NXT(len)))) {
2153: buf[len] = NXT(len);
2154: len++;
2155: if (len >= XML_MAX_NAMELEN) {
2156: fprintf(stderr,
2157: "xmlScanName: reached XML_MAX_NAMELEN limit\n");
2158: while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) ||
2159: (NXT(len) == '.') || (NXT(len) == '-') ||
2160: (NXT(len) == '_') || (NXT(len) == ':') ||
2161: (IS_COMBINING(NXT(len))) ||
2162: (IS_EXTENDER(NXT(len))))
2163: len++;
2164: break;
2165: }
2166: }
2167: return(xmlStrndup(buf, len));
2168: }
2169:
2170: /**
1.50 daniel 2171: * xmlParseName:
2172: * @ctxt: an XML parser context
2173: *
2174: * parse an XML name.
1.22 daniel 2175: *
2176: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
2177: * CombiningChar | Extender
2178: *
2179: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
2180: *
2181: * [6] Names ::= Name (S Name)*
1.68 daniel 2182: *
2183: * Returns the Name parsed or NULL
1.1 veillard 2184: */
2185:
1.55 daniel 2186: CHAR *
2187: xmlParseName(xmlParserCtxtPtr ctxt) {
1.91 daniel 2188: CHAR buf[XML_MAX_NAMELEN];
2189: int len = 0;
1.97 daniel 2190: CHAR cur;
1.1 veillard 2191:
1.91 daniel 2192: GROW;
1.97 daniel 2193: cur = CUR;
2194: if (!IS_LETTER(cur) && (cur != '_') &&
2195: (cur != ':')) {
1.91 daniel 2196: return(NULL);
2197: }
1.40 daniel 2198:
1.97 daniel 2199: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2200: (cur == '.') || (cur == '-') ||
2201: (cur == '_') || (cur == ':') ||
2202: (IS_COMBINING(cur)) ||
2203: (IS_EXTENDER(cur))) {
2204: buf[len++] = cur;
1.40 daniel 2205: NEXT;
1.97 daniel 2206: cur = CUR;
1.91 daniel 2207: if (len >= XML_MAX_NAMELEN) {
2208: fprintf(stderr,
2209: "xmlParseName: reached XML_MAX_NAMELEN limit\n");
1.97 daniel 2210: while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||
2211: (cur == '.') || (cur == '-') ||
2212: (cur == '_') || (cur == ':') ||
2213: (IS_COMBINING(cur)) ||
2214: (IS_EXTENDER(cur))) {
2215: NEXT;
2216: cur = CUR;
2217: }
1.91 daniel 2218: break;
2219: }
2220: }
2221: return(xmlStrndup(buf, len));
1.22 daniel 2222: }
2223:
1.50 daniel 2224: /**
2225: * xmlParseNmtoken:
2226: * @ctxt: an XML parser context
2227: *
2228: * parse an XML Nmtoken.
1.22 daniel 2229: *
2230: * [7] Nmtoken ::= (NameChar)+
2231: *
2232: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 2233: *
2234: * Returns the Nmtoken parsed or NULL
1.22 daniel 2235: */
2236:
1.55 daniel 2237: CHAR *
2238: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.91 daniel 2239: CHAR buf[XML_MAX_NAMELEN];
2240: int len = 0;
1.22 daniel 2241:
1.91 daniel 2242: GROW;
1.40 daniel 2243: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2244: (CUR == '.') || (CUR == '-') ||
2245: (CUR == '_') || (CUR == ':') ||
2246: (IS_COMBINING(CUR)) ||
1.91 daniel 2247: (IS_EXTENDER(CUR))) {
2248: buf[len++] = CUR;
1.40 daniel 2249: NEXT;
1.91 daniel 2250: if (len >= XML_MAX_NAMELEN) {
2251: fprintf(stderr,
2252: "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n");
2253: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
2254: (CUR == '.') || (CUR == '-') ||
2255: (CUR == '_') || (CUR == ':') ||
2256: (IS_COMBINING(CUR)) ||
2257: (IS_EXTENDER(CUR)))
2258: NEXT;
2259: break;
2260: }
2261: }
2262: return(xmlStrndup(buf, len));
1.1 veillard 2263: }
2264:
1.50 daniel 2265: /**
2266: * xmlParseEntityValue:
2267: * @ctxt: an XML parser context
1.78 daniel 2268: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 2269: *
2270: * parse a value for ENTITY decl.
1.24 daniel 2271: *
2272: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2273: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 2274: *
1.78 daniel 2275: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 2276: */
2277:
1.55 daniel 2278: CHAR *
1.78 daniel 2279: xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
1.77 daniel 2280: CHAR *ret = NULL;
1.78 daniel 2281: const CHAR *org = NULL;
1.79 daniel 2282: const CHAR *tst = NULL;
2283: const CHAR *temp = NULL;
1.98 daniel 2284: xmlParserInputPtr input;
1.24 daniel 2285:
1.91 daniel 2286: SHRINK;
1.40 daniel 2287: if (CUR == '"') {
1.96 daniel 2288: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2289: input = ctxt->input;
1.40 daniel 2290: NEXT;
1.78 daniel 2291: org = CUR_PTR;
1.98 daniel 2292: /*
2293: * NOTE: 4.4.5 Included in Literal
2294: * When a parameter entity reference appears in a literal entity
2295: * value, ... a single or double quote character in the replacement
2296: * text is always treated as a normal data character and will not
2297: * terminate the literal.
2298: * In practice it means we stop the loop only when back at parsing
2299: * the initial entity and the quote is found
2300: */
2301: while ((CUR != '"') || (ctxt->input != input)) {
1.79 daniel 2302: tst = CUR_PTR;
1.98 daniel 2303: /*
2304: * NOTE: 4.4.7 Bypassed
2305: * When a general entity reference appears in the EntityValue in
2306: * an entity declaration, it is bypassed and left as is.
2307: * so XML_SUBSTITUTE_REF is not set.
2308: */
2309: if (ctxt->input != input)
2310: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2311: 0, 0, 0);
2312: else
2313: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2314: '"', 0, 0);
1.94 daniel 2315:
2316: /*
2317: * Pop-up of finished entities.
2318: */
2319: while ((CUR == 0) && (ctxt->inputNr > 1))
2320: xmlPopInput(ctxt);
2321:
2322: if ((temp == NULL) && (tst == CUR_PTR)) {
2323: ret = xmlStrndup("", 0);
2324: break;
2325: }
2326: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2327: free((char *)temp);
2328: ret = xmlStrndup("", 0);
2329: break;
2330: }
1.79 daniel 2331: ret = xmlStrcat(ret, temp);
1.80 daniel 2332: if (temp != NULL) free((char *)temp);
1.94 daniel 2333: GROW;
1.79 daniel 2334: }
1.77 daniel 2335: if (CUR != '"') {
1.55 daniel 2336: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 2337: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 2338: ctxt->wellFormed = 0;
1.78 daniel 2339: } else {
1.99 daniel 2340: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2341: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2342: if (ret == NULL)
2343: ret = xmlStrndup("", 0);
1.40 daniel 2344: NEXT;
1.78 daniel 2345: }
1.40 daniel 2346: } else if (CUR == '\'') {
1.96 daniel 2347: ctxt->instate = XML_PARSER_ENTITY_VALUE;
1.98 daniel 2348: input = ctxt->input;
1.40 daniel 2349: NEXT;
1.78 daniel 2350: org = CUR_PTR;
1.98 daniel 2351: /*
2352: * NOTE: 4.4.5 Included in Literal
2353: * When a parameter entity reference appears in a literal entity
2354: * value, ... a single or double quote character in the replacement
2355: * text is always treated as a normal data character and will not
2356: * terminate the literal.
2357: * In practice it means we stop the loop only when back at parsing
2358: * the initial entity and the quote is found
2359: */
2360: while ((CUR != '\'') || (ctxt->input != input)) {
1.79 daniel 2361: tst = CUR_PTR;
1.98 daniel 2362: /*
2363: * NOTE: 4.4.7 Bypassed
2364: * When a general entity reference appears in the EntityValue in
2365: * an entity declaration, it is bypassed and left as is.
2366: * so XML_SUBSTITUTE_REF is not set.
2367: */
2368: if (ctxt->input != input)
2369: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2370: 0, 0, 0);
2371: else
2372: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_PEREF,
2373: '\'', 0, 0);
1.94 daniel 2374:
2375: /*
2376: * Pop-up of finished entities.
2377: */
2378: while ((CUR == 0) && (ctxt->inputNr > 1))
2379: xmlPopInput(ctxt);
2380:
2381: if ((temp == NULL) && (tst == CUR_PTR)) {
2382: ret = xmlStrndup("", 0);
2383: break;
2384: }
2385: if ((temp[0] == 0) && (tst == CUR_PTR)) {
2386: free((char *)temp);
2387: ret = xmlStrndup("", 0);
2388: break;
2389: }
1.79 daniel 2390: ret = xmlStrcat(ret, temp);
1.80 daniel 2391: if (temp != NULL) free((char *)temp);
1.94 daniel 2392: GROW;
1.79 daniel 2393: }
1.77 daniel 2394: if (CUR != '\'') {
1.55 daniel 2395: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2396: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 2397: ctxt->wellFormed = 0;
1.78 daniel 2398: } else {
1.99 daniel 2399: if (orig != NULL) /* !!!!!!!!! */
1.78 daniel 2400: *orig = xmlStrndup(org, CUR_PTR - org);
1.94 daniel 2401: if (ret == NULL)
2402: ret = xmlStrndup("", 0);
1.40 daniel 2403: NEXT;
1.78 daniel 2404: }
1.24 daniel 2405: } else {
1.55 daniel 2406: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2407: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 2408: ctxt->wellFormed = 0;
1.24 daniel 2409: }
2410:
2411: return(ret);
2412: }
2413:
1.50 daniel 2414: /**
2415: * xmlParseAttValue:
2416: * @ctxt: an XML parser context
2417: *
2418: * parse a value for an attribute
1.78 daniel 2419: * Note: the parser won't do substitution of entities here, this
1.113 daniel 2420: * will be handled later in xmlStringGetNodeList
1.29 daniel 2421: *
2422: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2423: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 2424: *
2425: * Returns the AttValue parsed or NULL.
1.29 daniel 2426: */
2427:
1.55 daniel 2428: CHAR *
2429: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.77 daniel 2430: CHAR *ret = NULL;
1.29 daniel 2431:
1.91 daniel 2432: SHRINK;
1.40 daniel 2433: if (CUR == '"') {
1.96 daniel 2434: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2435: NEXT;
1.98 daniel 2436: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
1.77 daniel 2437: if (CUR == '<') {
2438: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2439: ctxt->sax->error(ctxt->userData,
2440: "Unescaped '<' not allowed in attributes values\n");
2441: ctxt->wellFormed = 0;
1.29 daniel 2442: }
1.77 daniel 2443: if (CUR != '"') {
1.55 daniel 2444: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2445: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2446: ctxt->wellFormed = 0;
1.77 daniel 2447: } else
1.40 daniel 2448: NEXT;
2449: } else if (CUR == '\'') {
1.96 daniel 2450: ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
1.40 daniel 2451: NEXT;
1.98 daniel 2452: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
1.77 daniel 2453: if (CUR == '<') {
2454: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2455: ctxt->sax->error(ctxt->userData,
2456: "Unescaped '<' not allowed in attributes values\n");
2457: ctxt->wellFormed = 0;
1.29 daniel 2458: }
1.77 daniel 2459: if (CUR != '\'') {
1.55 daniel 2460: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 2461: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 2462: ctxt->wellFormed = 0;
1.77 daniel 2463: } else
1.40 daniel 2464: NEXT;
1.29 daniel 2465: } else {
1.55 daniel 2466: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2467: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 2468: ctxt->wellFormed = 0;
1.29 daniel 2469: }
2470:
2471: return(ret);
2472: }
2473:
1.50 daniel 2474: /**
2475: * xmlParseSystemLiteral:
2476: * @ctxt: an XML parser context
2477: *
2478: * parse an XML Literal
1.21 daniel 2479: *
1.22 daniel 2480: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 2481: *
2482: * Returns the SystemLiteral parsed or NULL
1.21 daniel 2483: */
2484:
1.55 daniel 2485: CHAR *
2486: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2487: const CHAR *q;
2488: CHAR *ret = NULL;
2489:
1.91 daniel 2490: SHRINK;
1.40 daniel 2491: if (CUR == '"') {
2492: NEXT;
2493: q = CUR_PTR;
2494: while ((IS_CHAR(CUR)) && (CUR != '"'))
2495: NEXT;
2496: if (!IS_CHAR(CUR)) {
1.55 daniel 2497: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2498: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2499: ctxt->wellFormed = 0;
1.21 daniel 2500: } else {
1.40 daniel 2501: ret = xmlStrndup(q, CUR_PTR - q);
2502: NEXT;
1.21 daniel 2503: }
1.40 daniel 2504: } else if (CUR == '\'') {
2505: NEXT;
2506: q = CUR_PTR;
2507: while ((IS_CHAR(CUR)) && (CUR != '\''))
2508: NEXT;
2509: if (!IS_CHAR(CUR)) {
1.55 daniel 2510: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2511: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 2512: ctxt->wellFormed = 0;
1.21 daniel 2513: } else {
1.40 daniel 2514: ret = xmlStrndup(q, CUR_PTR - q);
2515: NEXT;
1.21 daniel 2516: }
2517: } else {
1.55 daniel 2518: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2519: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2520: ctxt->wellFormed = 0;
1.21 daniel 2521: }
2522:
2523: return(ret);
2524: }
2525:
1.50 daniel 2526: /**
2527: * xmlParsePubidLiteral:
2528: * @ctxt: an XML parser context
1.21 daniel 2529: *
1.50 daniel 2530: * parse an XML public literal
1.68 daniel 2531: *
2532: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2533: *
2534: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 2535: */
2536:
1.55 daniel 2537: CHAR *
2538: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 2539: const CHAR *q;
2540: CHAR *ret = NULL;
2541: /*
2542: * Name ::= (Letter | '_') (NameChar)*
2543: */
1.91 daniel 2544: SHRINK;
1.40 daniel 2545: if (CUR == '"') {
2546: NEXT;
2547: q = CUR_PTR;
2548: while (IS_PUBIDCHAR(CUR)) NEXT;
2549: if (CUR != '"') {
1.55 daniel 2550: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2551: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2552: ctxt->wellFormed = 0;
1.21 daniel 2553: } else {
1.40 daniel 2554: ret = xmlStrndup(q, CUR_PTR - q);
2555: NEXT;
1.21 daniel 2556: }
1.40 daniel 2557: } else if (CUR == '\'') {
2558: NEXT;
2559: q = CUR_PTR;
2560: while ((IS_LETTER(CUR)) && (CUR != '\''))
2561: NEXT;
2562: if (!IS_LETTER(CUR)) {
1.55 daniel 2563: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2564: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 2565: ctxt->wellFormed = 0;
1.21 daniel 2566: } else {
1.40 daniel 2567: ret = xmlStrndup(q, CUR_PTR - q);
2568: NEXT;
1.21 daniel 2569: }
2570: } else {
1.55 daniel 2571: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2572: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 2573: ctxt->wellFormed = 0;
1.21 daniel 2574: }
2575:
2576: return(ret);
2577: }
2578:
1.50 daniel 2579: /**
2580: * xmlParseCharData:
2581: * @ctxt: an XML parser context
2582: * @cdata: int indicating whether we are within a CDATA section
2583: *
2584: * parse a CharData section.
2585: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 2586: *
2587: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2588: */
2589:
1.55 daniel 2590: void
2591: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.91 daniel 2592: CHAR buf[1000];
2593: int nbchar = 0;
1.97 daniel 2594: CHAR cur;
1.27 daniel 2595:
1.91 daniel 2596: SHRINK;
1.97 daniel 2597: /*
2598: * !!!!!!!!!!!!
2599: * NOTE: NXT(0) is used here to avoid breaking on < or &
2600: * entities substitutions.
2601: */
2602: cur = CUR;
2603: while ((IS_CHAR(cur)) && (cur != '<') &&
2604: (cur != '&')) {
2605: if ((cur == ']') && (NXT(1) == ']') &&
1.59 daniel 2606: (NXT(2) == '>')) {
2607: if (cdata) break;
2608: else {
2609: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2610: ctxt->sax->error(ctxt->userData,
1.59 daniel 2611: "Sequence ']]>' not allowed in content\n");
2612: ctxt->wellFormed = 0;
2613: }
2614: }
1.91 daniel 2615: buf[nbchar++] = CUR;
2616: if (nbchar == 1000) {
2617: /*
2618: * Ok the segment is to be consumed as chars.
2619: */
2620: if (ctxt->sax != NULL) {
2621: if (areBlanks(ctxt, buf, nbchar)) {
2622: if (ctxt->sax->ignorableWhitespace != NULL)
2623: ctxt->sax->ignorableWhitespace(ctxt->userData,
2624: buf, nbchar);
2625: } else {
2626: if (ctxt->sax->characters != NULL)
2627: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2628: }
2629: }
2630: nbchar = 0;
2631: }
1.40 daniel 2632: NEXT;
1.97 daniel 2633: cur = CUR;
1.27 daniel 2634: }
1.91 daniel 2635: if (nbchar != 0) {
2636: /*
2637: * Ok the segment is to be consumed as chars.
2638: */
2639: if (ctxt->sax != NULL) {
2640: if (areBlanks(ctxt, buf, nbchar)) {
2641: if (ctxt->sax->ignorableWhitespace != NULL)
2642: ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2643: } else {
2644: if (ctxt->sax->characters != NULL)
2645: ctxt->sax->characters(ctxt->userData, buf, nbchar);
2646: }
2647: }
1.45 daniel 2648: }
1.27 daniel 2649: }
2650:
1.50 daniel 2651: /**
2652: * xmlParseExternalID:
2653: * @ctxt: an XML parser context
2654: * @publicID: a CHAR** receiving PubidLiteral
1.67 daniel 2655: * @strict: indicate whether we should restrict parsing to only
2656: * production [75], see NOTE below
1.50 daniel 2657: *
1.67 daniel 2658: * Parse an External ID or a Public ID
2659: *
2660: * NOTE: Productions [75] and [83] interract badly since [75] can generate
2661: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 2662: *
2663: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2664: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 2665: *
2666: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2667: *
1.68 daniel 2668: * Returns the function returns SystemLiteral and in the second
1.67 daniel 2669: * case publicID receives PubidLiteral, is strict is off
2670: * it is possible to return NULL and have publicID set.
1.22 daniel 2671: */
2672:
1.55 daniel 2673: CHAR *
1.67 daniel 2674: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
1.39 daniel 2675: CHAR *URI = NULL;
1.22 daniel 2676:
1.91 daniel 2677: SHRINK;
1.40 daniel 2678: if ((CUR == 'S') && (NXT(1) == 'Y') &&
2679: (NXT(2) == 'S') && (NXT(3) == 'T') &&
2680: (NXT(4) == 'E') && (NXT(5) == 'M')) {
2681: SKIP(6);
1.59 daniel 2682: if (!IS_BLANK(CUR)) {
2683: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2684: ctxt->sax->error(ctxt->userData,
1.59 daniel 2685: "Space required after 'SYSTEM'\n");
2686: ctxt->wellFormed = 0;
2687: }
1.42 daniel 2688: SKIP_BLANKS;
1.39 daniel 2689: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2690: if (URI == NULL) {
1.55 daniel 2691: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2692: ctxt->sax->error(ctxt->userData,
1.39 daniel 2693: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 2694: ctxt->wellFormed = 0;
2695: }
1.40 daniel 2696: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
2697: (NXT(2) == 'B') && (NXT(3) == 'L') &&
2698: (NXT(4) == 'I') && (NXT(5) == 'C')) {
2699: SKIP(6);
1.59 daniel 2700: if (!IS_BLANK(CUR)) {
2701: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2702: ctxt->sax->error(ctxt->userData,
1.59 daniel 2703: "Space required after 'PUBLIC'\n");
2704: ctxt->wellFormed = 0;
2705: }
1.42 daniel 2706: SKIP_BLANKS;
1.39 daniel 2707: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 2708: if (*publicID == NULL) {
1.55 daniel 2709: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2710: ctxt->sax->error(ctxt->userData,
1.39 daniel 2711: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 2712: ctxt->wellFormed = 0;
2713: }
1.67 daniel 2714: if (strict) {
2715: /*
2716: * We don't handle [83] so "S SystemLiteral" is required.
2717: */
2718: if (!IS_BLANK(CUR)) {
2719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2720: ctxt->sax->error(ctxt->userData,
1.67 daniel 2721: "Space required after the Public Identifier\n");
2722: ctxt->wellFormed = 0;
2723: }
2724: } else {
2725: /*
2726: * We handle [83] so we return immediately, if
2727: * "S SystemLiteral" is not detected. From a purely parsing
2728: * point of view that's a nice mess.
2729: */
2730: const CHAR *ptr = CUR_PTR;
2731: if (!IS_BLANK(*ptr)) return(NULL);
2732:
2733: while (IS_BLANK(*ptr)) ptr++;
2734: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 2735: }
1.42 daniel 2736: SKIP_BLANKS;
1.39 daniel 2737: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 2738: if (URI == NULL) {
1.55 daniel 2739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2740: ctxt->sax->error(ctxt->userData,
1.39 daniel 2741: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 2742: ctxt->wellFormed = 0;
2743: }
1.22 daniel 2744: }
1.39 daniel 2745: return(URI);
1.22 daniel 2746: }
2747:
1.50 daniel 2748: /**
2749: * xmlParseComment:
1.69 daniel 2750: * @ctxt: an XML parser context
1.50 daniel 2751: *
1.3 veillard 2752: * Skip an XML (SGML) comment <!-- .... -->
1.38 daniel 2753: * The spec says that "For compatibility, the string "--" (double-hyphen)
2754: * must not occur within comments. "
1.22 daniel 2755: *
2756: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 2757: */
1.72 daniel 2758: void
1.114 daniel 2759: xmlParseComment(xmlParserCtxtPtr ctxt) {
1.17 daniel 2760: const CHAR *q, *start;
2761: const CHAR *r;
1.39 daniel 2762: CHAR *val;
1.3 veillard 2763:
2764: /*
1.22 daniel 2765: * Check that there is a comment right here.
1.3 veillard 2766: */
1.40 daniel 2767: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 2768: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 2769:
1.97 daniel 2770: ctxt->instate = XML_PARSER_COMMENT;
1.91 daniel 2771: SHRINK;
1.40 daniel 2772: SKIP(4);
2773: start = q = CUR_PTR;
2774: NEXT;
2775: r = CUR_PTR;
2776: NEXT;
2777: while (IS_CHAR(CUR) &&
2778: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 2779: (*r != '-') || (*q != '-'))) {
1.59 daniel 2780: if ((*r == '-') && (*q == '-')) {
1.55 daniel 2781: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2782: ctxt->sax->error(ctxt->userData,
1.38 daniel 2783: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 2784: ctxt->wellFormed = 0;
2785: }
1.40 daniel 2786: NEXT;r++;q++;
1.3 veillard 2787: }
1.40 daniel 2788: if (!IS_CHAR(CUR)) {
1.55 daniel 2789: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2790: ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 2791: ctxt->wellFormed = 0;
1.3 veillard 2792: } else {
1.40 daniel 2793: NEXT;
1.114 daniel 2794: val = xmlStrndup(start, q - start);
2795: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
2796: ctxt->sax->comment(ctxt->userData, val);
2797: free(val);
1.3 veillard 2798: }
2799: }
2800:
1.50 daniel 2801: /**
2802: * xmlParsePITarget:
2803: * @ctxt: an XML parser context
2804: *
2805: * parse the name of a PI
1.22 daniel 2806: *
2807: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 2808: *
2809: * Returns the PITarget name or NULL
1.22 daniel 2810: */
2811:
1.55 daniel 2812: CHAR *
2813: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 2814: CHAR *name;
2815:
2816: name = xmlParseName(ctxt);
2817: if ((name != NULL) && (name[3] == 0) &&
2818: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 2819: ((name[1] == 'm') || (name[1] == 'M')) &&
2820: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 2821: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2822: ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 2823: return(NULL);
2824: }
2825: return(name);
2826: }
2827:
1.50 daniel 2828: /**
2829: * xmlParsePI:
2830: * @ctxt: an XML parser context
2831: *
2832: * parse an XML Processing Instruction.
1.22 daniel 2833: *
2834: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 2835: *
1.69 daniel 2836: * The processing is transfered to SAX once parsed.
1.3 veillard 2837: */
2838:
1.55 daniel 2839: void
2840: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 2841: CHAR *target;
2842:
1.40 daniel 2843: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 2844: /*
2845: * this is a Processing Instruction.
2846: */
1.40 daniel 2847: SKIP(2);
1.91 daniel 2848: SHRINK;
1.3 veillard 2849:
2850: /*
1.22 daniel 2851: * Parse the target name and check for special support like
2852: * namespace.
1.3 veillard 2853: */
1.22 daniel 2854: target = xmlParsePITarget(ctxt);
2855: if (target != NULL) {
1.114 daniel 2856: const CHAR *q;
1.72 daniel 2857:
1.114 daniel 2858: if (!IS_BLANK(CUR)) {
2859: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2860: ctxt->sax->error(ctxt->userData,
2861: "xmlParsePI: PI %s space expected\n", target);
2862: ctxt->wellFormed = 0;
2863: }
2864: SKIP_BLANKS;
2865: q = CUR_PTR;
1.72 daniel 2866: while (IS_CHAR(CUR) &&
2867: ((CUR != '?') || (NXT(1) != '>')))
2868: NEXT;
2869: if (!IS_CHAR(CUR)) {
2870: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2871: ctxt->sax->error(ctxt->userData,
1.72 daniel 2872: "xmlParsePI: PI %s never end ...\n", target);
2873: ctxt->wellFormed = 0;
1.22 daniel 2874: } else {
1.72 daniel 2875: CHAR *data;
1.44 daniel 2876:
1.72 daniel 2877: data = xmlStrndup(q, CUR_PTR - q);
2878: SKIP(2);
1.44 daniel 2879:
1.72 daniel 2880: /*
2881: * SAX: PI detected.
2882: */
2883: if ((ctxt->sax) &&
2884: (ctxt->sax->processingInstruction != NULL))
1.99 daniel 2885: ctxt->sax->processingInstruction(ctxt->userData,
2886: target, data);
1.72 daniel 2887: free(data);
1.22 daniel 2888: }
1.39 daniel 2889: free(target);
1.3 veillard 2890: } else {
1.55 daniel 2891: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.99 daniel 2892: ctxt->sax->error(ctxt->userData,
2893: "xmlParsePI : no target name\n");
1.59 daniel 2894: ctxt->wellFormed = 0;
1.22 daniel 2895: }
2896: }
2897: }
2898:
1.50 daniel 2899: /**
2900: * xmlParseNotationDecl:
2901: * @ctxt: an XML parser context
2902: *
2903: * parse a notation declaration
1.22 daniel 2904: *
2905: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2906: *
2907: * Hence there is actually 3 choices:
2908: * 'PUBLIC' S PubidLiteral
2909: * 'PUBLIC' S PubidLiteral S SystemLiteral
2910: * and 'SYSTEM' S SystemLiteral
1.50 daniel 2911: *
1.67 daniel 2912: * See the NOTE on xmlParseExternalID().
1.22 daniel 2913: */
2914:
1.55 daniel 2915: void
2916: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2917: CHAR *name;
1.67 daniel 2918: CHAR *Pubid;
2919: CHAR *Systemid;
1.22 daniel 2920:
1.40 daniel 2921: if ((CUR == '<') && (NXT(1) == '!') &&
2922: (NXT(2) == 'N') && (NXT(3) == 'O') &&
2923: (NXT(4) == 'T') && (NXT(5) == 'A') &&
2924: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 2925: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.91 daniel 2926: SHRINK;
1.40 daniel 2927: SKIP(10);
1.67 daniel 2928: if (!IS_BLANK(CUR)) {
2929: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2930: ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
1.67 daniel 2931: ctxt->wellFormed = 0;
2932: return;
2933: }
2934: SKIP_BLANKS;
1.22 daniel 2935:
2936: name = xmlParseName(ctxt);
2937: if (name == NULL) {
1.55 daniel 2938: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2939: ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
1.67 daniel 2940: ctxt->wellFormed = 0;
2941: return;
2942: }
2943: if (!IS_BLANK(CUR)) {
2944: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2945: ctxt->sax->error(ctxt->userData,
1.67 daniel 2946: "Space required after the NOTATION name'\n");
1.59 daniel 2947: ctxt->wellFormed = 0;
1.22 daniel 2948: return;
2949: }
1.42 daniel 2950: SKIP_BLANKS;
1.67 daniel 2951:
1.22 daniel 2952: /*
1.67 daniel 2953: * Parse the IDs.
1.22 daniel 2954: */
1.67 daniel 2955: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
2956: SKIP_BLANKS;
2957:
2958: if (CUR == '>') {
1.40 daniel 2959: NEXT;
1.72 daniel 2960: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 2961: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 2962: } else {
2963: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2964: ctxt->sax->error(ctxt->userData,
1.67 daniel 2965: "'>' required to close NOTATION declaration\n");
2966: ctxt->wellFormed = 0;
2967: }
1.22 daniel 2968: free(name);
1.67 daniel 2969: if (Systemid != NULL) free(Systemid);
2970: if (Pubid != NULL) free(Pubid);
1.22 daniel 2971: }
2972: }
2973:
1.50 daniel 2974: /**
2975: * xmlParseEntityDecl:
2976: * @ctxt: an XML parser context
2977: *
2978: * parse <!ENTITY declarations
1.22 daniel 2979: *
2980: * [70] EntityDecl ::= GEDecl | PEDecl
2981: *
2982: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2983: *
2984: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2985: *
2986: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2987: *
2988: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2989: *
2990: * [76] NDataDecl ::= S 'NDATA' S Name
1.99 daniel 2991: *
2992: * [ VC: Notation Declared ]
2993: * TODO The Name must match the declared name of a notation.
1.22 daniel 2994: */
2995:
1.55 daniel 2996: void
2997: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2998: CHAR *name = NULL;
1.24 daniel 2999: CHAR *value = NULL;
1.39 daniel 3000: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 3001: CHAR *ndata = NULL;
1.39 daniel 3002: int isParameter = 0;
1.78 daniel 3003: CHAR *orig = NULL;
1.22 daniel 3004:
1.94 daniel 3005: GROW;
1.40 daniel 3006: if ((CUR == '<') && (NXT(1) == '!') &&
3007: (NXT(2) == 'E') && (NXT(3) == 'N') &&
3008: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 3009: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.96 daniel 3010: ctxt->instate = XML_PARSER_ENTITY_DECL;
1.91 daniel 3011: SHRINK;
1.40 daniel 3012: SKIP(8);
1.59 daniel 3013: if (!IS_BLANK(CUR)) {
3014: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3015: ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
1.59 daniel 3016: ctxt->wellFormed = 0;
3017: }
3018: SKIP_BLANKS;
1.40 daniel 3019:
3020: if (CUR == '%') {
3021: NEXT;
1.59 daniel 3022: if (!IS_BLANK(CUR)) {
3023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3024: ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
1.59 daniel 3025: ctxt->wellFormed = 0;
3026: }
1.42 daniel 3027: SKIP_BLANKS;
1.39 daniel 3028: isParameter = 1;
1.22 daniel 3029: }
3030:
3031: name = xmlParseName(ctxt);
1.24 daniel 3032: if (name == NULL) {
1.55 daniel 3033: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3034: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 3035: ctxt->wellFormed = 0;
1.24 daniel 3036: return;
3037: }
1.59 daniel 3038: if (!IS_BLANK(CUR)) {
3039: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3040: ctxt->sax->error(ctxt->userData,
1.59 daniel 3041: "Space required after the entity name\n");
3042: ctxt->wellFormed = 0;
3043: }
1.42 daniel 3044: SKIP_BLANKS;
1.24 daniel 3045:
1.22 daniel 3046: /*
1.68 daniel 3047: * handle the various case of definitions...
1.22 daniel 3048: */
1.39 daniel 3049: if (isParameter) {
1.40 daniel 3050: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 3051: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 3052: if (value) {
1.72 daniel 3053: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3054: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3055: XML_INTERNAL_PARAMETER_ENTITY,
3056: NULL, NULL, value);
3057: }
1.24 daniel 3058: else {
1.67 daniel 3059: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 3060: if (URI) {
1.72 daniel 3061: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3062: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3063: XML_EXTERNAL_PARAMETER_ENTITY,
3064: literal, URI, NULL);
3065: }
1.24 daniel 3066: }
3067: } else {
1.40 daniel 3068: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 3069: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 3070: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3071: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3072: XML_INTERNAL_GENERAL_ENTITY,
3073: NULL, NULL, value);
3074: } else {
1.67 daniel 3075: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 3076: if ((CUR != '>') && (!IS_BLANK(CUR))) {
3077: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3078: ctxt->sax->error(ctxt->userData,
1.59 daniel 3079: "Space required before 'NDATA'\n");
3080: ctxt->wellFormed = 0;
3081: }
1.42 daniel 3082: SKIP_BLANKS;
1.40 daniel 3083: if ((CUR == 'N') && (NXT(1) == 'D') &&
3084: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3085: (NXT(4) == 'A')) {
3086: SKIP(5);
1.59 daniel 3087: if (!IS_BLANK(CUR)) {
3088: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3089: ctxt->sax->error(ctxt->userData,
1.59 daniel 3090: "Space required after 'NDATA'\n");
3091: ctxt->wellFormed = 0;
3092: }
1.42 daniel 3093: SKIP_BLANKS;
1.24 daniel 3094: ndata = xmlParseName(ctxt);
1.72 daniel 3095: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3096: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3097: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
3098: literal, URI, ndata);
3099: } else {
1.72 daniel 3100: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 3101: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 3102: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3103: literal, URI, NULL);
1.24 daniel 3104: }
3105: }
3106: }
1.42 daniel 3107: SKIP_BLANKS;
1.40 daniel 3108: if (CUR != '>') {
1.55 daniel 3109: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3110: ctxt->sax->error(ctxt->userData,
1.31 daniel 3111: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 3112: ctxt->wellFormed = 0;
1.24 daniel 3113: } else
1.40 daniel 3114: NEXT;
1.78 daniel 3115: if (orig != NULL) {
3116: /*
1.98 daniel 3117: * Ugly mechanism to save the raw entity value.
1.78 daniel 3118: */
3119: xmlEntityPtr cur = NULL;
3120:
1.98 daniel 3121: if (isParameter) {
3122: if ((ctxt->sax != NULL) &&
3123: (ctxt->sax->getParameterEntity != NULL))
3124: cur = ctxt->sax->getParameterEntity(ctxt, name);
3125: } else {
3126: if ((ctxt->sax != NULL) &&
3127: (ctxt->sax->getEntity != NULL))
3128: cur = ctxt->sax->getEntity(ctxt, name);
3129: }
3130: if (cur != NULL) {
3131: if (cur->orig != NULL)
3132: free(orig);
3133: else
3134: cur->orig = orig;
3135: } else
1.78 daniel 3136: free(orig);
3137: }
1.39 daniel 3138: if (name != NULL) free(name);
3139: if (value != NULL) free(value);
3140: if (URI != NULL) free(URI);
3141: if (literal != NULL) free(literal);
3142: if (ndata != NULL) free(ndata);
1.22 daniel 3143: }
3144: }
3145:
1.50 daniel 3146: /**
1.59 daniel 3147: * xmlParseDefaultDecl:
3148: * @ctxt: an XML parser context
3149: * @value: Receive a possible fixed default value for the attribute
3150: *
3151: * Parse an attribute default declaration
3152: *
3153: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3154: *
1.99 daniel 3155: * [ VC: Required Attribute ]
3156: * TODO if the default declaration is the keyword #REQUIRED, then the
3157: * attribute must be specified for all elements of the type in the
3158: * attribute-list declaration.
3159: *
3160: * [ VC: Attribute Default Legal ]
1.102 daniel 3161: * The declared default value must meet the lexical constraints of
3162: * the declared attribute type c.f. xmlValidateAttributeDecl()
1.99 daniel 3163: *
3164: * [ VC: Fixed Attribute Default ]
3165: * TODO if an attribute has a default value declared with the #FIXED
3166: * keyword, instances of that attribute must match the default value.
3167: *
3168: * [ WFC: No < in Attribute Values ]
3169: * handled in xmlParseAttValue()
3170: *
1.59 daniel 3171: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3172: * or XML_ATTRIBUTE_FIXED.
3173: */
3174:
3175: int
3176: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
3177: int val;
3178: CHAR *ret;
3179:
3180: *value = NULL;
3181: if ((CUR == '#') && (NXT(1) == 'R') &&
3182: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3183: (NXT(4) == 'U') && (NXT(5) == 'I') &&
3184: (NXT(6) == 'R') && (NXT(7) == 'E') &&
3185: (NXT(8) == 'D')) {
3186: SKIP(9);
3187: return(XML_ATTRIBUTE_REQUIRED);
3188: }
3189: if ((CUR == '#') && (NXT(1) == 'I') &&
3190: (NXT(2) == 'M') && (NXT(3) == 'P') &&
3191: (NXT(4) == 'L') && (NXT(5) == 'I') &&
3192: (NXT(6) == 'E') && (NXT(7) == 'D')) {
3193: SKIP(8);
3194: return(XML_ATTRIBUTE_IMPLIED);
3195: }
3196: val = XML_ATTRIBUTE_NONE;
3197: if ((CUR == '#') && (NXT(1) == 'F') &&
3198: (NXT(2) == 'I') && (NXT(3) == 'X') &&
3199: (NXT(4) == 'E') && (NXT(5) == 'D')) {
3200: SKIP(6);
3201: val = XML_ATTRIBUTE_FIXED;
3202: if (!IS_BLANK(CUR)) {
3203: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3204: ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
1.59 daniel 3205: ctxt->wellFormed = 0;
3206: }
3207: SKIP_BLANKS;
3208: }
3209: ret = xmlParseAttValue(ctxt);
1.96 daniel 3210: ctxt->instate = XML_PARSER_DTD;
1.59 daniel 3211: if (ret == NULL) {
3212: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3213: ctxt->sax->error(ctxt->userData,
1.59 daniel 3214: "Attribute default value declaration error\n");
3215: ctxt->wellFormed = 0;
3216: } else
3217: *value = ret;
3218: return(val);
3219: }
3220:
3221: /**
1.66 daniel 3222: * xmlParseNotationType:
3223: * @ctxt: an XML parser context
3224: *
3225: * parse an Notation attribute type.
3226: *
1.99 daniel 3227: * Note: the leading 'NOTATION' S part has already being parsed...
3228: *
1.66 daniel 3229: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3230: *
1.99 daniel 3231: * [ VC: Notation Attributes ]
3232: * TODO Values of this type must match one of the notation names included
3233: * in the declaration; all notation names in the declaration must be declared.
1.66 daniel 3234: *
3235: * Returns: the notation attribute tree built while parsing
3236: */
3237:
3238: xmlEnumerationPtr
3239: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3240: CHAR *name;
3241: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3242:
3243: if (CUR != '(') {
3244: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3245: ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
1.66 daniel 3246: ctxt->wellFormed = 0;
3247: return(NULL);
3248: }
1.91 daniel 3249: SHRINK;
1.66 daniel 3250: do {
3251: NEXT;
3252: SKIP_BLANKS;
3253: name = xmlParseName(ctxt);
3254: if (name == NULL) {
3255: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3256: ctxt->sax->error(ctxt->userData,
1.66 daniel 3257: "Name expected in NOTATION declaration\n");
3258: ctxt->wellFormed = 0;
3259: return(ret);
3260: }
3261: cur = xmlCreateEnumeration(name);
1.67 daniel 3262: free(name);
1.66 daniel 3263: if (cur == NULL) return(ret);
3264: if (last == NULL) ret = last = cur;
3265: else {
3266: last->next = cur;
3267: last = cur;
3268: }
3269: SKIP_BLANKS;
3270: } while (CUR == '|');
3271: if (CUR != ')') {
3272: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3273: ctxt->sax->error(ctxt->userData,
1.66 daniel 3274: "')' required to finish NOTATION declaration\n");
3275: ctxt->wellFormed = 0;
3276: return(ret);
3277: }
3278: NEXT;
3279: return(ret);
3280: }
3281:
3282: /**
3283: * xmlParseEnumerationType:
3284: * @ctxt: an XML parser context
3285: *
3286: * parse an Enumeration attribute type.
3287: *
3288: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3289: *
1.99 daniel 3290: * [ VC: Enumeration ]
3291: * TODO Values of this type must match one of the Nmtoken tokens in
3292: * the declaration
3293: *
1.66 daniel 3294: * Returns: the enumeration attribute tree built while parsing
3295: */
3296:
3297: xmlEnumerationPtr
3298: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3299: CHAR *name;
3300: xmlEnumerationPtr ret = NULL, last = NULL, cur;
3301:
3302: if (CUR != '(') {
3303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3304: ctxt->sax->error(ctxt->userData,
1.66 daniel 3305: "'(' required to start ATTLIST enumeration\n");
3306: ctxt->wellFormed = 0;
3307: return(NULL);
3308: }
1.91 daniel 3309: SHRINK;
1.66 daniel 3310: do {
3311: NEXT;
3312: SKIP_BLANKS;
3313: name = xmlParseNmtoken(ctxt);
3314: if (name == NULL) {
3315: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3316: ctxt->sax->error(ctxt->userData,
1.66 daniel 3317: "NmToken expected in ATTLIST enumeration\n");
3318: ctxt->wellFormed = 0;
3319: return(ret);
3320: }
3321: cur = xmlCreateEnumeration(name);
1.67 daniel 3322: free(name);
1.66 daniel 3323: if (cur == NULL) return(ret);
3324: if (last == NULL) ret = last = cur;
3325: else {
3326: last->next = cur;
3327: last = cur;
3328: }
3329: SKIP_BLANKS;
3330: } while (CUR == '|');
3331: if (CUR != ')') {
3332: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3333: ctxt->sax->error(ctxt->userData,
1.66 daniel 3334: "')' required to finish ATTLIST enumeration\n");
3335: ctxt->wellFormed = 0;
3336: return(ret);
3337: }
3338: NEXT;
3339: return(ret);
3340: }
3341:
3342: /**
1.50 daniel 3343: * xmlParseEnumeratedType:
3344: * @ctxt: an XML parser context
1.66 daniel 3345: * @tree: the enumeration tree built while parsing
1.50 daniel 3346: *
1.66 daniel 3347: * parse an Enumerated attribute type.
1.22 daniel 3348: *
3349: * [57] EnumeratedType ::= NotationType | Enumeration
3350: *
3351: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3352: *
1.50 daniel 3353: *
1.66 daniel 3354: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 3355: */
3356:
1.66 daniel 3357: int
3358: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3359: if ((CUR == 'N') && (NXT(1) == 'O') &&
3360: (NXT(2) == 'T') && (NXT(3) == 'A') &&
3361: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3362: (NXT(6) == 'O') && (NXT(7) == 'N')) {
3363: SKIP(8);
3364: if (!IS_BLANK(CUR)) {
3365: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3366: ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
1.66 daniel 3367: ctxt->wellFormed = 0;
3368: return(0);
3369: }
3370: SKIP_BLANKS;
3371: *tree = xmlParseNotationType(ctxt);
3372: if (*tree == NULL) return(0);
3373: return(XML_ATTRIBUTE_NOTATION);
3374: }
3375: *tree = xmlParseEnumerationType(ctxt);
3376: if (*tree == NULL) return(0);
3377: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 3378: }
3379:
1.50 daniel 3380: /**
3381: * xmlParseAttributeType:
3382: * @ctxt: an XML parser context
1.66 daniel 3383: * @tree: the enumeration tree built while parsing
1.50 daniel 3384: *
1.59 daniel 3385: * parse the Attribute list def for an element
1.22 daniel 3386: *
3387: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3388: *
3389: * [55] StringType ::= 'CDATA'
3390: *
3391: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3392: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 3393: *
1.102 daniel 3394: * Validity constraints for attribute values syntax are checked in
3395: * xmlValidateAttributeValue()
3396: *
1.99 daniel 3397: * [ VC: ID ]
1.102 daniel 3398: * Values of type ID must match the Name production. TODO A name must not
1.99 daniel 3399: * appear more than once in an XML document as a value of this type;
3400: * i.e., ID values must uniquely identify the elements which bear them.
3401: *
3402: * [ VC: One ID per Element Type ]
3403: * TODO No element type may have more than one ID attribute specified.
3404: *
3405: * [ VC: ID Attribute Default ]
3406: * TODO An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3407: *
3408: * [ VC: IDREF ]
1.102 daniel 3409: * Values of type IDREF must match the Name production, and values
3410: * of type IDREFS must match Names; TODO each Name must match the value of
1.99 daniel 3411: * an ID attribute on some element in the XML document; i.e. IDREF
3412: * values must match the value of some ID attribute.
3413: *
3414: * [ VC: Entity Name ]
1.102 daniel 3415: * Values of type ENTITY must match the Name production, values
3416: * of type ENTITIES must match Names; TODO each Name must match the name of
1.99 daniel 3417: * an unparsed entity declared in the DTD.
3418: *
3419: * [ VC: Name Token ]
1.102 daniel 3420: * Values of type NMTOKEN must match the Nmtoken production; values
1.99 daniel 3421: * of type NMTOKENS must match Nmtokens.
3422: *
1.69 daniel 3423: * Returns the attribute type
1.22 daniel 3424: */
1.59 daniel 3425: int
1.66 daniel 3426: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.91 daniel 3427: SHRINK;
1.40 daniel 3428: if ((CUR == 'C') && (NXT(1) == 'D') &&
3429: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3430: (NXT(4) == 'A')) {
3431: SKIP(5);
1.66 daniel 3432: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 3433: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3434: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.97 daniel 3435: (NXT(4) == 'F') && (NXT(5) == 'S')) {
3436: SKIP(6);
3437: return(XML_ATTRIBUTE_IDREFS);
3438: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
3439: (NXT(2) == 'R') && (NXT(3) == 'E') &&
1.40 daniel 3440: (NXT(4) == 'F')) {
3441: SKIP(5);
1.59 daniel 3442: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 3443: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
3444: SKIP(2);
3445: return(XML_ATTRIBUTE_ID);
1.40 daniel 3446: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3447: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3448: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3449: SKIP(6);
1.59 daniel 3450: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 3451: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
3452: (NXT(2) == 'T') && (NXT(3) == 'I') &&
3453: (NXT(4) == 'T') && (NXT(5) == 'I') &&
3454: (NXT(6) == 'E') && (NXT(7) == 'S')) {
3455: SKIP(8);
1.59 daniel 3456: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 3457: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3458: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3459: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 3460: (NXT(6) == 'N') && (NXT(7) == 'S')) {
3461: SKIP(8);
3462: return(XML_ATTRIBUTE_NMTOKENS);
3463: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
3464: (NXT(2) == 'T') && (NXT(3) == 'O') &&
3465: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 3466: (NXT(6) == 'N')) {
3467: SKIP(7);
1.59 daniel 3468: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 3469: }
1.66 daniel 3470: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 3471: }
3472:
1.50 daniel 3473: /**
3474: * xmlParseAttributeListDecl:
3475: * @ctxt: an XML parser context
3476: *
3477: * : parse the Attribute list def for an element
1.22 daniel 3478: *
3479: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3480: *
3481: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 3482: *
1.22 daniel 3483: */
1.55 daniel 3484: void
3485: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 3486: CHAR *elemName;
3487: CHAR *attrName;
1.103 daniel 3488: xmlEnumerationPtr tree;
1.22 daniel 3489:
1.40 daniel 3490: if ((CUR == '<') && (NXT(1) == '!') &&
3491: (NXT(2) == 'A') && (NXT(3) == 'T') &&
3492: (NXT(4) == 'T') && (NXT(5) == 'L') &&
3493: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 3494: (NXT(8) == 'T')) {
1.40 daniel 3495: SKIP(9);
1.59 daniel 3496: if (!IS_BLANK(CUR)) {
3497: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3498: ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
1.59 daniel 3499: ctxt->wellFormed = 0;
3500: }
1.42 daniel 3501: SKIP_BLANKS;
1.59 daniel 3502: elemName = xmlParseName(ctxt);
3503: if (elemName == NULL) {
1.55 daniel 3504: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3505: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
1.59 daniel 3506: ctxt->wellFormed = 0;
1.22 daniel 3507: return;
3508: }
1.42 daniel 3509: SKIP_BLANKS;
1.40 daniel 3510: while (CUR != '>') {
3511: const CHAR *check = CUR_PTR;
1.59 daniel 3512: int type;
3513: int def;
3514: CHAR *defaultValue = NULL;
3515:
1.103 daniel 3516: tree = NULL;
1.59 daniel 3517: attrName = xmlParseName(ctxt);
3518: if (attrName == NULL) {
3519: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3520: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
1.59 daniel 3521: ctxt->wellFormed = 0;
3522: break;
3523: }
1.97 daniel 3524: GROW;
1.59 daniel 3525: if (!IS_BLANK(CUR)) {
3526: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3527: ctxt->sax->error(ctxt->userData,
1.59 daniel 3528: "Space required after the attribute name\n");
3529: ctxt->wellFormed = 0;
3530: break;
3531: }
3532: SKIP_BLANKS;
3533:
1.66 daniel 3534: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 3535: if (type <= 0) break;
1.22 daniel 3536:
1.97 daniel 3537: GROW;
1.59 daniel 3538: if (!IS_BLANK(CUR)) {
3539: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3540: ctxt->sax->error(ctxt->userData,
1.59 daniel 3541: "Space required after the attribute type\n");
3542: ctxt->wellFormed = 0;
3543: break;
3544: }
1.42 daniel 3545: SKIP_BLANKS;
1.59 daniel 3546:
3547: def = xmlParseDefaultDecl(ctxt, &defaultValue);
3548: if (def <= 0) break;
3549:
1.97 daniel 3550: GROW;
1.59 daniel 3551: if (CUR != '>') {
3552: if (!IS_BLANK(CUR)) {
3553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3554: ctxt->sax->error(ctxt->userData,
1.59 daniel 3555: "Space required after the attribute default value\n");
3556: ctxt->wellFormed = 0;
3557: break;
3558: }
3559: SKIP_BLANKS;
3560: }
1.40 daniel 3561: if (check == CUR_PTR) {
1.55 daniel 3562: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3563: ctxt->sax->error(ctxt->userData,
1.59 daniel 3564: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 3565: break;
3566: }
1.72 daniel 3567: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 3568: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 3569: type, def, defaultValue, tree);
1.59 daniel 3570: if (attrName != NULL)
3571: free(attrName);
3572: if (defaultValue != NULL)
3573: free(defaultValue);
1.97 daniel 3574: GROW;
1.22 daniel 3575: }
1.40 daniel 3576: if (CUR == '>')
3577: NEXT;
1.22 daniel 3578:
1.59 daniel 3579: free(elemName);
1.22 daniel 3580: }
3581: }
3582:
1.50 daniel 3583: /**
1.61 daniel 3584: * xmlParseElementMixedContentDecl:
3585: * @ctxt: an XML parser context
3586: *
3587: * parse the declaration for a Mixed Element content
3588: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3589: *
3590: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3591: * '(' S? '#PCDATA' S? ')'
3592: *
1.99 daniel 3593: * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3594: *
3595: * [ VC: No Duplicate Types ]
3596: * TODO The same name must not appear more than once in a single
3597: * mixed-content declaration.
3598: *
1.61 daniel 3599: * returns: the list of the xmlElementContentPtr describing the element choices
3600: */
3601: xmlElementContentPtr
1.62 daniel 3602: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 3603: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.61 daniel 3604: CHAR *elem = NULL;
3605:
1.97 daniel 3606: GROW;
1.61 daniel 3607: if ((CUR == '#') && (NXT(1) == 'P') &&
3608: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3609: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3610: (NXT(6) == 'A')) {
3611: SKIP(7);
3612: SKIP_BLANKS;
1.91 daniel 3613: SHRINK;
1.63 daniel 3614: if (CUR == ')') {
3615: NEXT;
3616: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3617: return(ret);
3618: }
1.61 daniel 3619: if ((CUR == '(') || (CUR == '|')) {
3620: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3621: if (ret == NULL) return(NULL);
1.99 daniel 3622: }
1.61 daniel 3623: while (CUR == '|') {
1.64 daniel 3624: NEXT;
1.61 daniel 3625: if (elem == NULL) {
3626: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3627: if (ret == NULL) return(NULL);
3628: ret->c1 = cur;
1.64 daniel 3629: cur = ret;
1.61 daniel 3630: } else {
1.64 daniel 3631: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3632: if (n == NULL) return(NULL);
3633: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3634: cur->c2 = n;
3635: cur = n;
1.66 daniel 3636: free(elem);
1.61 daniel 3637: }
3638: SKIP_BLANKS;
3639: elem = xmlParseName(ctxt);
3640: if (elem == NULL) {
3641: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3642: ctxt->sax->error(ctxt->userData,
1.61 daniel 3643: "xmlParseElementMixedContentDecl : Name expected\n");
3644: ctxt->wellFormed = 0;
3645: xmlFreeElementContent(cur);
3646: return(NULL);
3647: }
3648: SKIP_BLANKS;
1.97 daniel 3649: GROW;
1.61 daniel 3650: }
1.63 daniel 3651: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 3652: if (elem != NULL) {
1.61 daniel 3653: cur->c2 = xmlNewElementContent(elem,
3654: XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3655: free(elem);
3656: }
1.65 daniel 3657: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 3658: SKIP(2);
1.61 daniel 3659: } else {
1.66 daniel 3660: if (elem != NULL) free(elem);
1.61 daniel 3661: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3662: ctxt->sax->error(ctxt->userData,
1.63 daniel 3663: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 3664: ctxt->wellFormed = 0;
3665: xmlFreeElementContent(ret);
3666: return(NULL);
3667: }
3668:
3669: } else {
3670: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3671: ctxt->sax->error(ctxt->userData,
1.61 daniel 3672: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3673: ctxt->wellFormed = 0;
3674: }
3675: return(ret);
3676: }
3677:
3678: /**
3679: * xmlParseElementChildrenContentDecl:
1.50 daniel 3680: * @ctxt: an XML parser context
3681: *
1.61 daniel 3682: * parse the declaration for a Mixed Element content
3683: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 3684: *
1.61 daniel 3685: *
1.22 daniel 3686: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3687: *
3688: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3689: *
3690: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3691: *
3692: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3693: *
1.99 daniel 3694: * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3695: * TODO Parameter-entity replacement text must be properly nested
3696: * with parenthetized groups. That is to say, if either of the
3697: * opening or closing parentheses in a choice, seq, or Mixed
3698: * construct is contained in the replacement text for a parameter
3699: * entity, both must be contained in the same replacement text. For
3700: * interoperability, if a parameter-entity reference appears in a
3701: * choice, seq, or Mixed construct, its replacement text should not
3702: * be empty, and neither the first nor last non-blank character of
3703: * the replacement text should be a connector (| or ,).
3704: *
1.62 daniel 3705: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 3706: * hierarchy.
3707: */
3708: xmlElementContentPtr
1.62 daniel 3709: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 3710: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 3711: CHAR *elem;
3712: CHAR type = 0;
3713:
3714: SKIP_BLANKS;
1.94 daniel 3715: GROW;
1.62 daniel 3716: if (CUR == '(') {
1.63 daniel 3717: /* Recurse on first child */
1.62 daniel 3718: NEXT;
3719: SKIP_BLANKS;
3720: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3721: SKIP_BLANKS;
1.101 daniel 3722: GROW;
1.62 daniel 3723: } else {
3724: elem = xmlParseName(ctxt);
3725: if (elem == NULL) {
3726: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3727: ctxt->sax->error(ctxt->userData,
1.62 daniel 3728: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3729: ctxt->wellFormed = 0;
3730: return(NULL);
3731: }
3732: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.101 daniel 3733: GROW;
1.62 daniel 3734: if (CUR == '?') {
1.104 daniel 3735: cur->ocur = XML_ELEMENT_CONTENT_OPT;
1.62 daniel 3736: NEXT;
3737: } else if (CUR == '*') {
1.104 daniel 3738: cur->ocur = XML_ELEMENT_CONTENT_MULT;
1.62 daniel 3739: NEXT;
3740: } else if (CUR == '+') {
1.104 daniel 3741: cur->ocur = XML_ELEMENT_CONTENT_PLUS;
1.62 daniel 3742: NEXT;
3743: } else {
1.104 daniel 3744: cur->ocur = XML_ELEMENT_CONTENT_ONCE;
1.62 daniel 3745: }
1.66 daniel 3746: free(elem);
1.101 daniel 3747: GROW;
1.62 daniel 3748: }
3749: SKIP_BLANKS;
1.91 daniel 3750: SHRINK;
1.62 daniel 3751: while (CUR != ')') {
1.63 daniel 3752: /*
3753: * Each loop we parse one separator and one element.
3754: */
1.62 daniel 3755: if (CUR == ',') {
3756: if (type == 0) type = CUR;
3757:
3758: /*
3759: * Detect "Name | Name , Name" error
3760: */
3761: else if (type != CUR) {
3762: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3763: ctxt->sax->error(ctxt->userData,
1.62 daniel 3764: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3765: type);
3766: ctxt->wellFormed = 0;
3767: xmlFreeElementContent(ret);
3768: return(NULL);
3769: }
1.64 daniel 3770: NEXT;
1.62 daniel 3771:
1.63 daniel 3772: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3773: if (op == NULL) {
3774: xmlFreeElementContent(ret);
3775: return(NULL);
3776: }
3777: if (last == NULL) {
3778: op->c1 = ret;
1.65 daniel 3779: ret = cur = op;
1.63 daniel 3780: } else {
3781: cur->c2 = op;
3782: op->c1 = last;
3783: cur =op;
1.65 daniel 3784: last = NULL;
1.63 daniel 3785: }
1.62 daniel 3786: } else if (CUR == '|') {
3787: if (type == 0) type = CUR;
3788:
3789: /*
1.63 daniel 3790: * Detect "Name , Name | Name" error
1.62 daniel 3791: */
3792: else if (type != CUR) {
3793: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3794: ctxt->sax->error(ctxt->userData,
1.62 daniel 3795: "xmlParseElementChildrenContentDecl : '%c' expected\n",
3796: type);
3797: ctxt->wellFormed = 0;
3798: xmlFreeElementContent(ret);
3799: return(NULL);
3800: }
1.64 daniel 3801: NEXT;
1.62 daniel 3802:
1.63 daniel 3803: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3804: if (op == NULL) {
3805: xmlFreeElementContent(ret);
3806: return(NULL);
3807: }
3808: if (last == NULL) {
3809: op->c1 = ret;
1.65 daniel 3810: ret = cur = op;
1.63 daniel 3811: } else {
3812: cur->c2 = op;
3813: op->c1 = last;
3814: cur =op;
1.65 daniel 3815: last = NULL;
1.63 daniel 3816: }
1.62 daniel 3817: } else {
3818: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3819: ctxt->sax->error(ctxt->userData,
1.62 daniel 3820: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
3821: ctxt->wellFormed = 0;
3822: xmlFreeElementContent(ret);
3823: return(NULL);
3824: }
1.101 daniel 3825: GROW;
1.62 daniel 3826: SKIP_BLANKS;
1.101 daniel 3827: GROW;
1.62 daniel 3828: if (CUR == '(') {
1.63 daniel 3829: /* Recurse on second child */
1.62 daniel 3830: NEXT;
3831: SKIP_BLANKS;
1.65 daniel 3832: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 3833: SKIP_BLANKS;
3834: } else {
3835: elem = xmlParseName(ctxt);
3836: if (elem == NULL) {
3837: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3838: ctxt->sax->error(ctxt->userData,
1.62 daniel 3839: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3840: ctxt->wellFormed = 0;
3841: return(NULL);
3842: }
1.65 daniel 3843: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 3844: free(elem);
1.105 daniel 3845: if (CUR == '?') {
3846: last->ocur = XML_ELEMENT_CONTENT_OPT;
3847: NEXT;
3848: } else if (CUR == '*') {
3849: last->ocur = XML_ELEMENT_CONTENT_MULT;
3850: NEXT;
3851: } else if (CUR == '+') {
3852: last->ocur = XML_ELEMENT_CONTENT_PLUS;
3853: NEXT;
3854: } else {
3855: last->ocur = XML_ELEMENT_CONTENT_ONCE;
3856: }
1.63 daniel 3857: }
3858: SKIP_BLANKS;
1.97 daniel 3859: GROW;
1.64 daniel 3860: }
1.65 daniel 3861: if ((cur != NULL) && (last != NULL)) {
3862: cur->c2 = last;
1.62 daniel 3863: }
3864: NEXT;
3865: if (CUR == '?') {
3866: ret->ocur = XML_ELEMENT_CONTENT_OPT;
3867: NEXT;
3868: } else if (CUR == '*') {
3869: ret->ocur = XML_ELEMENT_CONTENT_MULT;
3870: NEXT;
3871: } else if (CUR == '+') {
3872: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
3873: NEXT;
3874: }
3875: return(ret);
1.61 daniel 3876: }
3877:
3878: /**
3879: * xmlParseElementContentDecl:
3880: * @ctxt: an XML parser context
3881: * @name: the name of the element being defined.
3882: * @result: the Element Content pointer will be stored here if any
1.22 daniel 3883: *
1.61 daniel 3884: * parse the declaration for an Element content either Mixed or Children,
3885: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
3886: *
3887: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 3888: *
1.61 daniel 3889: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 3890: */
3891:
1.61 daniel 3892: int
3893: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
3894: xmlElementContentPtr *result) {
3895:
3896: xmlElementContentPtr tree = NULL;
3897: int res;
3898:
3899: *result = NULL;
3900:
3901: if (CUR != '(') {
3902: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3903: ctxt->sax->error(ctxt->userData,
1.61 daniel 3904: "xmlParseElementContentDecl : '(' expected\n");
3905: ctxt->wellFormed = 0;
3906: return(-1);
3907: }
3908: NEXT;
1.97 daniel 3909: GROW;
1.61 daniel 3910: SKIP_BLANKS;
3911: if ((CUR == '#') && (NXT(1) == 'P') &&
3912: (NXT(2) == 'C') && (NXT(3) == 'D') &&
3913: (NXT(4) == 'A') && (NXT(5) == 'T') &&
3914: (NXT(6) == 'A')) {
1.62 daniel 3915: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 3916: res = XML_ELEMENT_TYPE_MIXED;
3917: } else {
1.62 daniel 3918: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 3919: res = XML_ELEMENT_TYPE_ELEMENT;
3920: }
3921: SKIP_BLANKS;
1.63 daniel 3922: /****************************
1.61 daniel 3923: if (CUR != ')') {
3924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3925: ctxt->sax->error(ctxt->userData,
1.61 daniel 3926: "xmlParseElementContentDecl : ')' expected\n");
3927: ctxt->wellFormed = 0;
3928: return(-1);
3929: }
1.63 daniel 3930: ****************************/
3931: *result = tree;
1.61 daniel 3932: return(res);
1.22 daniel 3933: }
3934:
1.50 daniel 3935: /**
3936: * xmlParseElementDecl:
3937: * @ctxt: an XML parser context
3938: *
3939: * parse an Element declaration.
1.22 daniel 3940: *
3941: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
3942: *
1.99 daniel 3943: * [ VC: Unique Element Type Declaration ]
3944: * TODO No element type may be declared more than once
1.69 daniel 3945: *
3946: * Returns the type of the element, or -1 in case of error
1.22 daniel 3947: */
1.59 daniel 3948: int
1.55 daniel 3949: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 3950: CHAR *name;
1.59 daniel 3951: int ret = -1;
1.61 daniel 3952: xmlElementContentPtr content = NULL;
1.22 daniel 3953:
1.97 daniel 3954: GROW;
1.40 daniel 3955: if ((CUR == '<') && (NXT(1) == '!') &&
3956: (NXT(2) == 'E') && (NXT(3) == 'L') &&
3957: (NXT(4) == 'E') && (NXT(5) == 'M') &&
3958: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 3959: (NXT(8) == 'T')) {
1.40 daniel 3960: SKIP(9);
1.59 daniel 3961: if (!IS_BLANK(CUR)) {
3962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3963: ctxt->sax->error(ctxt->userData,
1.59 daniel 3964: "Space required after 'ELEMENT'\n");
3965: ctxt->wellFormed = 0;
3966: }
1.42 daniel 3967: SKIP_BLANKS;
1.22 daniel 3968: name = xmlParseName(ctxt);
3969: if (name == NULL) {
1.55 daniel 3970: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3971: ctxt->sax->error(ctxt->userData,
1.59 daniel 3972: "xmlParseElementDecl: no name for Element\n");
3973: ctxt->wellFormed = 0;
3974: return(-1);
3975: }
3976: if (!IS_BLANK(CUR)) {
3977: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3978: ctxt->sax->error(ctxt->userData,
1.59 daniel 3979: "Space required after the element name\n");
3980: ctxt->wellFormed = 0;
1.22 daniel 3981: }
1.42 daniel 3982: SKIP_BLANKS;
1.40 daniel 3983: if ((CUR == 'E') && (NXT(1) == 'M') &&
3984: (NXT(2) == 'P') && (NXT(3) == 'T') &&
3985: (NXT(4) == 'Y')) {
3986: SKIP(5);
1.22 daniel 3987: /*
3988: * Element must always be empty.
3989: */
1.59 daniel 3990: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 3991: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
3992: (NXT(2) == 'Y')) {
3993: SKIP(3);
1.22 daniel 3994: /*
3995: * Element is a generic container.
3996: */
1.59 daniel 3997: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 3998: } else if (CUR == '(') {
3999: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 4000: } else {
1.98 daniel 4001: /*
4002: * [ WFC: PEs in Internal Subset ] error handling.
4003: */
4004: if ((CUR == '%') && (ctxt->external == 0) &&
4005: (ctxt->inputNr == 1)) {
4006: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007: ctxt->sax->error(ctxt->userData,
4008: "PEReference: forbidden within markup decl in internal subset\n");
4009: } else {
4010: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4011: ctxt->sax->error(ctxt->userData,
4012: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4013: }
1.61 daniel 4014: ctxt->wellFormed = 0;
4015: if (name != NULL) free(name);
4016: return(-1);
1.22 daniel 4017: }
1.42 daniel 4018: SKIP_BLANKS;
1.40 daniel 4019: if (CUR != '>') {
1.55 daniel 4020: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4021: ctxt->sax->error(ctxt->userData,
1.31 daniel 4022: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 4023: ctxt->wellFormed = 0;
1.61 daniel 4024: } else {
1.40 daniel 4025: NEXT;
1.72 daniel 4026: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 4027: ctxt->sax->elementDecl(ctxt->userData, name, ret,
4028: content);
1.61 daniel 4029: }
1.84 daniel 4030: if (content != NULL) {
4031: xmlFreeElementContent(content);
4032: }
1.61 daniel 4033: if (name != NULL) {
4034: free(name);
4035: }
1.22 daniel 4036: }
1.59 daniel 4037: return(ret);
1.22 daniel 4038: }
4039:
1.50 daniel 4040: /**
4041: * xmlParseMarkupDecl:
4042: * @ctxt: an XML parser context
4043: *
4044: * parse Markup declarations
1.22 daniel 4045: *
4046: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4047: * NotationDecl | PI | Comment
4048: *
1.98 daniel 4049: * [ VC: Proper Declaration/PE Nesting ]
4050: * TODO Parameter-entity replacement text must be properly nested with
4051: * markup declarations. That is to say, if either the first character
4052: * or the last character of a markup declaration (markupdecl above) is
4053: * contained in the replacement text for a parameter-entity reference,
4054: * both must be contained in the same replacement text.
4055: *
4056: * [ WFC: PEs in Internal Subset ]
4057: * In the internal DTD subset, parameter-entity references can occur
4058: * only where markup declarations can occur, not within markup declarations.
4059: * (This does not apply to references that occur in external parameter
4060: * entities or to the external subset.)
1.22 daniel 4061: */
1.55 daniel 4062: void
4063: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.94 daniel 4064: GROW;
1.22 daniel 4065: xmlParseElementDecl(ctxt);
4066: xmlParseAttributeListDecl(ctxt);
4067: xmlParseEntityDecl(ctxt);
4068: xmlParseNotationDecl(ctxt);
4069: xmlParsePI(ctxt);
1.114 daniel 4070: xmlParseComment(ctxt);
1.98 daniel 4071: /*
4072: * This is only for internal subset. On external entities,
4073: * the replacement is done before parsing stage
4074: */
4075: if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4076: xmlParsePEReference(ctxt);
1.97 daniel 4077: ctxt->instate = XML_PARSER_DTD;
1.22 daniel 4078: }
4079:
1.50 daniel 4080: /**
1.76 daniel 4081: * xmlParseTextDecl:
4082: * @ctxt: an XML parser context
4083: *
4084: * parse an XML declaration header for external entities
4085: *
4086: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4087: *
4088: * Returns the only valuable info for an external parsed entity, the encoding
4089: */
4090:
4091: CHAR *
4092: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4093: CHAR *version;
4094: CHAR *encoding = NULL;
4095:
4096: /*
4097: * We know that '<?xml' is here.
4098: */
4099: SKIP(5);
4100:
4101: if (!IS_BLANK(CUR)) {
4102: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4103: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
4104: ctxt->wellFormed = 0;
4105: }
4106: SKIP_BLANKS;
4107:
4108: /*
4109: * We may have the VersionInfo here.
4110: */
4111: version = xmlParseVersionInfo(ctxt);
1.99 daniel 4112:
1.76 daniel 4113: /* TODO: we should actually inherit from the referencing doc if absent
4114: if (version == NULL)
4115: version = xmlCharStrdup(XML_DEFAULT_VERSION);
4116: ctxt->version = xmlStrdup(version);
4117: */
1.99 daniel 4118:
1.76 daniel 4119: if (version != NULL)
4120: free(version);
4121:
4122: /*
4123: * We must have the encoding declaration
4124: */
4125: if (!IS_BLANK(CUR)) {
4126: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4127: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
4128: ctxt->wellFormed = 0;
4129: }
4130: encoding = xmlParseEncodingDecl(ctxt);
4131:
4132: SKIP_BLANKS;
4133: if ((CUR == '?') && (NXT(1) == '>')) {
4134: SKIP(2);
4135: } else if (CUR == '>') {
4136: /* Deprecated old WD ... */
4137: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4138: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
4139: ctxt->wellFormed = 0;
4140: NEXT;
4141: } else {
4142: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4143: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
4144: ctxt->wellFormed = 0;
4145: MOVETO_ENDTAG(CUR_PTR);
4146: NEXT;
4147: }
4148: return(encoding);
4149: }
4150:
4151: /*
4152: * xmlParseConditionalSections
4153: * @ctxt: an XML parser context
4154: *
4155: * TODO : Conditionnal section are not yet supported !
4156: *
4157: * [61] conditionalSect ::= includeSect | ignoreSect
4158: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4159: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4160: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4161: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4162: */
4163:
4164: void
4165: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4166: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4167: ctxt->sax->warning(ctxt->userData,
4168: "XML conditional section not supported\n");
4169: /*
4170: * Skip up to the end of the conditionnal section.
4171: */
4172: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
4173: NEXT;
4174: if (CUR == 0) {
4175: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4176: ctxt->sax->error(ctxt->userData,
4177: "XML conditional section not closed\n");
4178: ctxt->wellFormed = 0;
4179: }
4180: }
4181:
4182: /**
4183: * xmlParseExternalSubset
4184: * @ctxt: an XML parser context
4185: *
4186: * parse Markup declarations from an external subset
4187: *
4188: * [30] extSubset ::= textDecl? extSubsetDecl
4189: *
4190: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4191: *
4192: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
4193: */
4194: void
1.79 daniel 4195: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
4196: const CHAR *SystemID) {
1.76 daniel 4197: if ((CUR == '<') && (NXT(1) == '?') &&
4198: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4199: (NXT(4) == 'l')) {
4200: xmlParseTextDecl(ctxt);
4201: }
1.79 daniel 4202: if (ctxt->myDoc == NULL) {
4203: ctxt->myDoc = xmlNewDoc("1.0");
4204: }
4205: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4206: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4207:
1.96 daniel 4208: ctxt->instate = XML_PARSER_DTD;
1.101 daniel 4209: ctxt->external = 1;
1.76 daniel 4210: while (((CUR == '<') && (NXT(1) == '?')) ||
4211: ((CUR == '<') && (NXT(1) == '!')) ||
4212: IS_BLANK(CUR)) {
1.115 ! daniel 4213: const CHAR *check = CUR_PTR;
! 4214: int cons = ctxt->input->consumed;
! 4215:
1.76 daniel 4216: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4217: xmlParseConditionalSections(ctxt);
4218: } else if (IS_BLANK(CUR)) {
4219: NEXT;
4220: } else if (CUR == '%') {
4221: xmlParsePEReference(ctxt);
4222: } else
4223: xmlParseMarkupDecl(ctxt);
1.77 daniel 4224:
4225: /*
4226: * Pop-up of finished entities.
4227: */
4228: while ((CUR == 0) && (ctxt->inputNr > 1))
4229: xmlPopInput(ctxt);
4230:
1.115 ! daniel 4231: if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
! 4232: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
! 4233: ctxt->sax->error(ctxt->userData,
! 4234: "Content error in the external subset\n");
! 4235: ctxt->wellFormed = 0;
! 4236: break;
! 4237: }
1.76 daniel 4238: }
4239:
4240: if (CUR != 0) {
4241: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4242: ctxt->sax->error(ctxt->userData,
4243: "Extra content at the end of the document\n");
4244: ctxt->wellFormed = 0;
4245: }
4246:
4247: }
4248:
4249: /**
1.77 daniel 4250: * xmlParseReference:
4251: * @ctxt: an XML parser context
4252: *
4253: * parse and handle entity references in content, depending on the SAX
4254: * interface, this may end-up in a call to character() if this is a
1.79 daniel 4255: * CharRef, a predefined entity, if there is no reference() callback.
4256: * or if the parser was asked to switch to that mode.
1.77 daniel 4257: *
4258: * [67] Reference ::= EntityRef | CharRef
4259: */
4260: void
4261: xmlParseReference(xmlParserCtxtPtr ctxt) {
4262: xmlEntityPtr ent;
4263: CHAR *val;
4264: if (CUR != '&') return;
4265:
1.113 daniel 4266: if (ctxt->inputNr > 1) {
4267: CHAR cur[2] = { '&' , 0 } ;
4268:
4269: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4270: ctxt->sax->characters(ctxt->userData, cur, 1);
4271: if (ctxt->token == '&')
4272: ctxt->token = 0;
4273: else {
4274: SKIP(1);
4275: }
4276: return;
4277: }
1.77 daniel 4278: if (NXT(1) == '#') {
4279: CHAR out[2];
4280: int val = xmlParseCharRef(ctxt);
4281: /* TODO: invalid for UTF-8 variable encoding !!! */
4282: out[0] = val;
4283: out[1] = 0;
4284: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4285: ctxt->sax->characters(ctxt->userData, out, 1);
4286: } else {
4287: ent = xmlParseEntityRef(ctxt);
4288: if (ent == NULL) return;
4289: if ((ent->name != NULL) &&
1.113 daniel 4290: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY)) {
4291: if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4292: (ctxt->replaceEntities == 0)) {
4293: /*
4294: * Create a node.
4295: */
4296: ctxt->sax->reference(ctxt->userData, ent->name);
4297: return;
4298: } else if (ctxt->replaceEntities) {
4299: xmlParserInputPtr input;
1.79 daniel 4300:
1.113 daniel 4301: input = xmlNewEntityInputStream(ctxt, ent);
4302: xmlPushInput(ctxt, input);
4303: return;
4304: }
1.77 daniel 4305: }
4306: val = ent->content;
4307: if (val == NULL) return;
4308: /*
4309: * inline the entity.
4310: */
4311: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
4312: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4313: }
1.24 daniel 4314: }
4315:
1.50 daniel 4316: /**
4317: * xmlParseEntityRef:
4318: * @ctxt: an XML parser context
4319: *
4320: * parse ENTITY references declarations
1.24 daniel 4321: *
4322: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 4323: *
1.98 daniel 4324: * [ WFC: Entity Declared ]
4325: * In a document without any DTD, a document with only an internal DTD
4326: * subset which contains no parameter entity references, or a document
4327: * with "standalone='yes'", the Name given in the entity reference
4328: * must match that in an entity declaration, except that well-formed
4329: * documents need not declare any of the following entities: amp, lt,
4330: * gt, apos, quot. The declaration of a parameter entity must precede
4331: * any reference to it. Similarly, the declaration of a general entity
4332: * must precede any reference to it which appears in a default value in an
4333: * attribute-list declaration. Note that if entities are declared in the
4334: * external subset or in external parameter entities, a non-validating
4335: * processor is not obligated to read and process their declarations;
4336: * for such documents, the rule that an entity must be declared is a
4337: * well-formedness constraint only if standalone='yes'.
4338: *
4339: * [ WFC: Parsed Entity ]
4340: * An entity reference must not contain the name of an unparsed entity
4341: *
1.77 daniel 4342: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 4343: */
1.77 daniel 4344: xmlEntityPtr
1.55 daniel 4345: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.24 daniel 4346: CHAR *name;
1.72 daniel 4347: xmlEntityPtr ent = NULL;
1.24 daniel 4348:
1.91 daniel 4349: GROW;
1.111 daniel 4350:
1.40 daniel 4351: if (CUR == '&') {
4352: NEXT;
1.24 daniel 4353: name = xmlParseName(ctxt);
4354: if (name == NULL) {
1.55 daniel 4355: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4356: ctxt->sax->error(ctxt->userData,
4357: "xmlParseEntityRef: no name\n");
1.59 daniel 4358: ctxt->wellFormed = 0;
1.24 daniel 4359: } else {
1.40 daniel 4360: if (CUR == ';') {
4361: NEXT;
1.24 daniel 4362: /*
1.77 daniel 4363: * Ask first SAX for entity resolution, otherwise try the
4364: * predefined set.
4365: */
4366: if (ctxt->sax != NULL) {
4367: if (ctxt->sax->getEntity != NULL)
4368: ent = ctxt->sax->getEntity(ctxt->userData, name);
4369: if (ent == NULL)
4370: ent = xmlGetPredefinedEntity(name);
4371: }
4372: /*
1.98 daniel 4373: * [ WFC: Entity Declared ]
4374: * In a document without any DTD, a document with only an
4375: * internal DTD subset which contains no parameter entity
4376: * references, or a document with "standalone='yes'", the
4377: * Name given in the entity reference must match that in an
4378: * entity declaration, except that well-formed documents
4379: * need not declare any of the following entities: amp, lt,
4380: * gt, apos, quot.
4381: * The declaration of a parameter entity must precede any
4382: * reference to it.
4383: * Similarly, the declaration of a general entity must
4384: * precede any reference to it which appears in a default
4385: * value in an attribute-list declaration. Note that if
4386: * entities are declared in the external subset or in
4387: * external parameter entities, a non-validating processor
4388: * is not obligated to read and process their declarations;
4389: * for such documents, the rule that an entity must be
4390: * declared is a well-formedness constraint only if
4391: * standalone='yes'.
1.59 daniel 4392: */
1.77 daniel 4393: if (ent == NULL) {
1.98 daniel 4394: if ((ctxt->standalone == 1) ||
4395: ((ctxt->hasExternalSubset == 0) &&
4396: (ctxt->hasPErefs == 0))) {
4397: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 4398: ctxt->sax->error(ctxt->userData,
4399: "Entity '%s' not defined\n", name);
4400: ctxt->wellFormed = 0;
4401: } else {
1.98 daniel 4402: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4403: ctxt->sax->warning(ctxt->userData,
4404: "Entity '%s' not defined\n", name);
1.59 daniel 4405: }
1.77 daniel 4406: }
1.59 daniel 4407:
4408: /*
1.98 daniel 4409: * [ WFC: Parsed Entity ]
4410: * An entity reference must not contain the name of an
4411: * unparsed entity
4412: */
4413: else if (ent->type == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
4414: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4415: ctxt->sax->error(ctxt->userData,
4416: "Entity reference to unparsed entity %s\n", name);
4417: ctxt->wellFormed = 0;
4418: }
4419:
4420: /*
4421: * [ WFC: No External Entity References ]
4422: * Attribute values cannot contain direct or indirect
4423: * entity references to external entities.
4424: */
4425: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4426: (ent->type == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
4427: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4428: ctxt->sax->error(ctxt->userData,
4429: "Attribute references external entity '%s'\n", name);
4430: ctxt->wellFormed = 0;
4431: }
4432: /*
4433: * [ WFC: No < in Attribute Values ]
4434: * The replacement text of any entity referred to directly or
4435: * indirectly in an attribute value (other than "<") must
4436: * not contain a <.
1.59 daniel 4437: */
1.98 daniel 4438: else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
4439: (ent != NULL) && (xmlStrcmp(ent->name, "lt")) &&
4440: (ent->content != NULL) &&
4441: (xmlStrchr(ent->content, '<'))) {
4442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4443: ctxt->sax->error(ctxt->userData,
4444: "'<' in entity '%s' is not allowed in attributes values\n", name);
4445: ctxt->wellFormed = 0;
4446: }
4447:
4448: /*
4449: * Internal check, no parameter entities here ...
4450: */
4451: else {
1.59 daniel 4452: switch (ent->type) {
4453: case XML_INTERNAL_PARAMETER_ENTITY:
4454: case XML_EXTERNAL_PARAMETER_ENTITY:
4455: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4456: ctxt->sax->error(ctxt->userData,
1.59 daniel 4457: "Attempt to reference the parameter entity '%s'\n", name);
4458: ctxt->wellFormed = 0;
4459: break;
4460: }
4461: }
4462:
4463: /*
1.98 daniel 4464: * [ WFC: No Recursion ]
4465: * TODO A parsed entity must not contain a recursive
4466: * reference to itself, either directly or indirectly.
1.59 daniel 4467: */
1.77 daniel 4468:
1.24 daniel 4469: } else {
1.55 daniel 4470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4471: ctxt->sax->error(ctxt->userData,
1.59 daniel 4472: "xmlParseEntityRef: expecting ';'\n");
4473: ctxt->wellFormed = 0;
1.24 daniel 4474: }
1.45 daniel 4475: free(name);
1.24 daniel 4476: }
4477: }
1.77 daniel 4478: return(ent);
1.24 daniel 4479: }
4480:
1.50 daniel 4481: /**
4482: * xmlParsePEReference:
4483: * @ctxt: an XML parser context
4484: *
4485: * parse PEReference declarations
1.77 daniel 4486: * The entity content is handled directly by pushing it's content as
4487: * a new input stream.
1.22 daniel 4488: *
4489: * [69] PEReference ::= '%' Name ';'
1.68 daniel 4490: *
1.98 daniel 4491: * [ WFC: No Recursion ]
4492: * TODO A parsed entity must not contain a recursive
4493: * reference to itself, either directly or indirectly.
4494: *
4495: * [ WFC: Entity Declared ]
4496: * In a document without any DTD, a document with only an internal DTD
4497: * subset which contains no parameter entity references, or a document
4498: * with "standalone='yes'", ... ... The declaration of a parameter
4499: * entity must precede any reference to it...
4500: *
4501: * [ VC: Entity Declared ]
4502: * In a document with an external subset or external parameter entities
4503: * with "standalone='no'", ... ... The declaration of a parameter entity
4504: * must precede any reference to it...
4505: *
4506: * [ WFC: In DTD ]
4507: * Parameter-entity references may only appear in the DTD.
4508: * NOTE: misleading but this is handled.
1.22 daniel 4509: */
1.77 daniel 4510: void
1.55 daniel 4511: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 4512: CHAR *name;
1.72 daniel 4513: xmlEntityPtr entity = NULL;
1.50 daniel 4514: xmlParserInputPtr input;
1.22 daniel 4515:
1.40 daniel 4516: if (CUR == '%') {
4517: NEXT;
1.22 daniel 4518: name = xmlParseName(ctxt);
4519: if (name == NULL) {
1.55 daniel 4520: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4521: ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
1.59 daniel 4522: ctxt->wellFormed = 0;
1.22 daniel 4523: } else {
1.40 daniel 4524: if (CUR == ';') {
4525: NEXT;
1.98 daniel 4526: if ((ctxt->sax != NULL) &&
4527: (ctxt->sax->getParameterEntity != NULL))
4528: entity = ctxt->sax->getParameterEntity(ctxt->userData,
4529: name);
1.45 daniel 4530: if (entity == NULL) {
1.98 daniel 4531: /*
4532: * [ WFC: Entity Declared ]
4533: * In a document without any DTD, a document with only an
4534: * internal DTD subset which contains no parameter entity
4535: * references, or a document with "standalone='yes'", ...
4536: * ... The declaration of a parameter entity must precede
4537: * any reference to it...
4538: */
4539: if ((ctxt->standalone == 1) ||
4540: ((ctxt->hasExternalSubset == 0) &&
4541: (ctxt->hasPErefs == 0))) {
4542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4543: ctxt->sax->error(ctxt->userData,
4544: "PEReference: %%%s; not found\n", name);
4545: ctxt->wellFormed = 0;
4546: } else {
4547: /*
4548: * [ VC: Entity Declared ]
4549: * In a document with an external subset or external
4550: * parameter entities with "standalone='no'", ...
4551: * ... The declaration of a parameter entity must precede
4552: * any reference to it...
4553: */
4554: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4555: ctxt->sax->warning(ctxt->userData,
4556: "PEReference: %%%s; not found\n", name);
4557: ctxt->valid = 0;
4558: }
1.50 daniel 4559: } else {
1.98 daniel 4560: /*
4561: * Internal checking in case the entity quest barfed
4562: */
4563: if ((entity->type != XML_INTERNAL_PARAMETER_ENTITY) &&
4564: (entity->type != XML_EXTERNAL_PARAMETER_ENTITY)) {
4565: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
4566: ctxt->sax->warning(ctxt->userData,
4567: "Internal: %%%s; is not a parameter entity\n", name);
4568: } else {
4569: input = xmlNewEntityInputStream(ctxt, entity);
4570: xmlPushInput(ctxt, input);
4571: }
1.45 daniel 4572: }
1.98 daniel 4573: ctxt->hasPErefs = 1;
1.22 daniel 4574: } else {
1.55 daniel 4575: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4576: ctxt->sax->error(ctxt->userData,
1.59 daniel 4577: "xmlParsePEReference: expecting ';'\n");
4578: ctxt->wellFormed = 0;
1.22 daniel 4579: }
1.45 daniel 4580: free(name);
1.3 veillard 4581: }
4582: }
4583: }
4584:
1.50 daniel 4585: /**
4586: * xmlParseDocTypeDecl :
4587: * @ctxt: an XML parser context
4588: *
4589: * parse a DOCTYPE declaration
1.21 daniel 4590: *
1.22 daniel 4591: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
4592: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.98 daniel 4593: *
4594: * [ VC: Root Element Type ]
1.99 daniel 4595: * The Name in the document type declaration must match the element
1.98 daniel 4596: * type of the root element.
1.21 daniel 4597: */
4598:
1.55 daniel 4599: void
4600: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.21 daniel 4601: CHAR *name;
4602: CHAR *ExternalID = NULL;
1.39 daniel 4603: CHAR *URI = NULL;
1.21 daniel 4604:
4605: /*
4606: * We know that '<!DOCTYPE' has been detected.
4607: */
1.40 daniel 4608: SKIP(9);
1.21 daniel 4609:
1.42 daniel 4610: SKIP_BLANKS;
1.21 daniel 4611:
4612: /*
4613: * Parse the DOCTYPE name.
4614: */
4615: name = xmlParseName(ctxt);
4616: if (name == NULL) {
1.55 daniel 4617: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4618: ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 4619: ctxt->wellFormed = 0;
1.21 daniel 4620: }
4621:
1.42 daniel 4622: SKIP_BLANKS;
1.21 daniel 4623:
4624: /*
1.22 daniel 4625: * Check for SystemID and ExternalID
4626: */
1.67 daniel 4627: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.98 daniel 4628:
4629: if ((URI != NULL) || (ExternalID != NULL)) {
4630: ctxt->hasExternalSubset = 1;
4631: }
4632:
1.42 daniel 4633: SKIP_BLANKS;
1.36 daniel 4634:
1.76 daniel 4635: /*
4636: * NOTE: the SAX callback may try to fetch the external subset
4637: * entity and fill it up !
4638: */
1.72 daniel 4639: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 4640: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 4641:
4642: /*
4643: * Is there any DTD definition ?
4644: */
1.40 daniel 4645: if (CUR == '[') {
1.96 daniel 4646: ctxt->instate = XML_PARSER_DTD;
1.40 daniel 4647: NEXT;
1.22 daniel 4648: /*
4649: * Parse the succession of Markup declarations and
4650: * PEReferences.
4651: * Subsequence (markupdecl | PEReference | S)*
4652: */
1.40 daniel 4653: while (CUR != ']') {
4654: const CHAR *check = CUR_PTR;
1.115 ! daniel 4655: int cons = ctxt->input->consumed;
1.22 daniel 4656:
1.42 daniel 4657: SKIP_BLANKS;
1.22 daniel 4658: xmlParseMarkupDecl(ctxt);
1.50 daniel 4659: xmlParsePEReference(ctxt);
1.22 daniel 4660:
1.115 ! daniel 4661: /*
! 4662: * Pop-up of finished entities.
! 4663: */
! 4664: while ((CUR == 0) && (ctxt->inputNr > 1))
! 4665: xmlPopInput(ctxt);
! 4666:
! 4667: if ((CUR_PTR == check) && (cons = ctxt->input->consumed)) {
1.55 daniel 4668: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4669: ctxt->sax->error(ctxt->userData,
1.31 daniel 4670: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 4671: ctxt->wellFormed = 0;
1.22 daniel 4672: break;
4673: }
4674: }
1.40 daniel 4675: if (CUR == ']') NEXT;
1.22 daniel 4676: }
4677:
4678: /*
4679: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 4680: */
1.40 daniel 4681: if (CUR != '>') {
1.55 daniel 4682: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4683: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 4684: ctxt->wellFormed = 0;
1.22 daniel 4685: /* We shouldn't try to resynchronize ... */
1.21 daniel 4686: }
1.40 daniel 4687: NEXT;
1.22 daniel 4688:
4689: /*
1.99 daniel 4690: * Cleanup
1.22 daniel 4691: */
1.39 daniel 4692: if (URI != NULL) free(URI);
1.22 daniel 4693: if (ExternalID != NULL) free(ExternalID);
4694: if (name != NULL) free(name);
1.21 daniel 4695: }
4696:
1.50 daniel 4697: /**
4698: * xmlParseAttribute:
4699: * @ctxt: an XML parser context
1.72 daniel 4700: * @value: a CHAR ** used to store the value of the attribute
1.50 daniel 4701: *
4702: * parse an attribute
1.3 veillard 4703: *
1.22 daniel 4704: * [41] Attribute ::= Name Eq AttValue
4705: *
1.98 daniel 4706: * [ WFC: No External Entity References ]
4707: * Attribute values cannot contain direct or indirect entity references
4708: * to external entities.
4709: *
4710: * [ WFC: No < in Attribute Values ]
4711: * The replacement text of any entity referred to directly or indirectly in
4712: * an attribute value (other than "<") must not contain a <.
4713: *
4714: * [ VC: Attribute Value Type ]
4715: * TODO The attribute must have been declared; the value must be of the type
1.99 daniel 4716: * declared for it.
1.98 daniel 4717: *
1.22 daniel 4718: * [25] Eq ::= S? '=' S?
4719: *
1.29 daniel 4720: * With namespace:
4721: *
4722: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 4723: *
4724: * Also the case QName == xmlns:??? is handled independently as a namespace
4725: * definition.
1.69 daniel 4726: *
1.72 daniel 4727: * Returns the attribute name, and the value in *value.
1.3 veillard 4728: */
4729:
1.72 daniel 4730: CHAR *
4731: xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
1.59 daniel 4732: CHAR *name, *val;
1.3 veillard 4733:
1.72 daniel 4734: *value = NULL;
4735: name = xmlParseName(ctxt);
1.22 daniel 4736: if (name == NULL) {
1.55 daniel 4737: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4738: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 4739: ctxt->wellFormed = 0;
1.52 daniel 4740: return(NULL);
1.3 veillard 4741: }
4742:
4743: /*
1.29 daniel 4744: * read the value
1.3 veillard 4745: */
1.42 daniel 4746: SKIP_BLANKS;
1.40 daniel 4747: if (CUR == '=') {
4748: NEXT;
1.42 daniel 4749: SKIP_BLANKS;
1.72 daniel 4750: val = xmlParseAttValue(ctxt);
1.96 daniel 4751: ctxt->instate = XML_PARSER_CONTENT;
1.29 daniel 4752: } else {
1.55 daniel 4753: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4754: ctxt->sax->error(ctxt->userData,
1.59 daniel 4755: "Specification mandate value for attribute %s\n", name);
4756: ctxt->wellFormed = 0;
1.52 daniel 4757: return(NULL);
1.43 daniel 4758: }
4759:
1.72 daniel 4760: *value = val;
4761: return(name);
1.3 veillard 4762: }
4763:
1.50 daniel 4764: /**
4765: * xmlParseStartTag:
4766: * @ctxt: an XML parser context
4767: *
4768: * parse a start of tag either for rule element or
4769: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 4770: *
4771: * [40] STag ::= '<' Name (S Attribute)* S? '>'
4772: *
1.98 daniel 4773: * [ WFC: Unique Att Spec ]
4774: * No attribute name may appear more than once in the same start-tag or
4775: * empty-element tag.
4776: *
1.29 daniel 4777: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
4778: *
1.98 daniel 4779: * [ WFC: Unique Att Spec ]
4780: * No attribute name may appear more than once in the same start-tag or
4781: * empty-element tag.
4782: *
1.29 daniel 4783: * With namespace:
4784: *
4785: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
4786: *
4787: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 4788: *
4789: * Returns the element name parsed
1.2 veillard 4790: */
4791:
1.83 daniel 4792: CHAR *
1.69 daniel 4793: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.72 daniel 4794: CHAR *name;
4795: CHAR *attname;
4796: CHAR *attvalue;
4797: const CHAR **atts = NULL;
4798: int nbatts = 0;
4799: int maxatts = 0;
4800: int i;
1.2 veillard 4801:
1.83 daniel 4802: if (CUR != '<') return(NULL);
1.40 daniel 4803: NEXT;
1.3 veillard 4804:
1.72 daniel 4805: name = xmlParseName(ctxt);
1.59 daniel 4806: if (name == NULL) {
4807: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4808: ctxt->sax->error(ctxt->userData,
1.59 daniel 4809: "xmlParseStartTag: invalid element name\n");
4810: ctxt->wellFormed = 0;
1.83 daniel 4811: return(NULL);
1.50 daniel 4812: }
4813:
4814: /*
1.3 veillard 4815: * Now parse the attributes, it ends up with the ending
4816: *
4817: * (S Attribute)* S?
4818: */
1.42 daniel 4819: SKIP_BLANKS;
1.91 daniel 4820: GROW;
1.40 daniel 4821: while ((IS_CHAR(CUR)) &&
4822: (CUR != '>') &&
4823: ((CUR != '/') || (NXT(1) != '>'))) {
4824: const CHAR *q = CUR_PTR;
1.91 daniel 4825: int cons = ctxt->input->consumed;
1.29 daniel 4826:
1.72 daniel 4827: attname = xmlParseAttribute(ctxt, &attvalue);
4828: if ((attname != NULL) && (attvalue != NULL)) {
4829: /*
1.98 daniel 4830: * [ WFC: Unique Att Spec ]
4831: * No attribute name may appear more than once in the same
4832: * start-tag or empty-element tag.
1.72 daniel 4833: */
4834: for (i = 0; i < nbatts;i += 2) {
4835: if (!xmlStrcmp(atts[i], attname)) {
4836: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.98 daniel 4837: ctxt->sax->error(ctxt->userData,
4838: "Attribute %s redefined\n",
4839: attname);
1.72 daniel 4840: ctxt->wellFormed = 0;
4841: free(attname);
4842: free(attvalue);
1.98 daniel 4843: goto failed;
1.72 daniel 4844: }
4845: }
4846:
4847: /*
4848: * Add the pair to atts
4849: */
4850: if (atts == NULL) {
4851: maxatts = 10;
4852: atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
4853: if (atts == NULL) {
1.86 daniel 4854: fprintf(stderr, "malloc of %ld byte failed\n",
4855: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4856: return(NULL);
1.72 daniel 4857: }
4858: } else if (nbatts + 2 < maxatts) {
4859: maxatts *= 2;
4860: atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
4861: if (atts == NULL) {
1.86 daniel 4862: fprintf(stderr, "realloc of %ld byte failed\n",
4863: maxatts * (long)sizeof(CHAR *));
1.83 daniel 4864: return(NULL);
1.72 daniel 4865: }
4866: }
4867: atts[nbatts++] = attname;
4868: atts[nbatts++] = attvalue;
4869: atts[nbatts] = NULL;
4870: atts[nbatts + 1] = NULL;
1.98 daniel 4871: failed:
1.72 daniel 4872: }
4873:
1.42 daniel 4874: SKIP_BLANKS;
1.91 daniel 4875: if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
1.55 daniel 4876: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4877: ctxt->sax->error(ctxt->userData,
1.31 daniel 4878: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 4879: ctxt->wellFormed = 0;
1.29 daniel 4880: break;
1.3 veillard 4881: }
1.91 daniel 4882: GROW;
1.3 veillard 4883: }
4884:
1.43 daniel 4885: /*
1.72 daniel 4886: * SAX: Start of Element !
1.43 daniel 4887: */
1.72 daniel 4888: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 4889: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 4890:
1.72 daniel 4891: if (atts != NULL) {
4892: for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]);
4893: free(atts);
4894: }
1.83 daniel 4895: return(name);
1.3 veillard 4896: }
4897:
1.50 daniel 4898: /**
4899: * xmlParseEndTag:
4900: * @ctxt: an XML parser context
1.83 daniel 4901: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 4902: *
4903: * parse an end of tag
1.27 daniel 4904: *
4905: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 4906: *
4907: * With namespace
4908: *
1.72 daniel 4909: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 4910: */
4911:
1.55 daniel 4912: void
1.83 daniel 4913: xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
1.72 daniel 4914: CHAR *name;
1.7 veillard 4915:
1.91 daniel 4916: GROW;
1.40 daniel 4917: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 4918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4919: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 4920: ctxt->wellFormed = 0;
1.27 daniel 4921: return;
4922: }
1.40 daniel 4923: SKIP(2);
1.7 veillard 4924:
1.72 daniel 4925: name = xmlParseName(ctxt);
1.7 veillard 4926:
4927: /*
4928: * We should definitely be at the ending "S? '>'" part
4929: */
1.91 daniel 4930: GROW;
1.42 daniel 4931: SKIP_BLANKS;
1.40 daniel 4932: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 4933: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4934: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 4935: ctxt->wellFormed = 0;
1.7 veillard 4936: } else
1.40 daniel 4937: NEXT;
1.7 veillard 4938:
1.72 daniel 4939: /*
1.98 daniel 4940: * [ WFC: Element Type Match ]
4941: * The Name in an element's end-tag must match the element type in the
4942: * start-tag.
4943: *
1.83 daniel 4944: */
4945: if (xmlStrcmp(name, tagname)) {
4946: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4947: ctxt->sax->error(ctxt->userData,
4948: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
4949: ctxt->wellFormed = 0;
4950: }
4951:
4952: /*
1.72 daniel 4953: * SAX: End of Tag
4954: */
4955: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 4956: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 4957:
4958: if (name != NULL)
4959: free(name);
4960:
1.7 veillard 4961: return;
4962: }
4963:
1.50 daniel 4964: /**
4965: * xmlParseCDSect:
4966: * @ctxt: an XML parser context
4967: *
4968: * Parse escaped pure raw content.
1.29 daniel 4969: *
4970: * [18] CDSect ::= CDStart CData CDEnd
4971: *
4972: * [19] CDStart ::= '<![CDATA['
4973: *
4974: * [20] Data ::= (Char* - (Char* ']]>' Char*))
4975: *
4976: * [21] CDEnd ::= ']]>'
1.3 veillard 4977: */
1.55 daniel 4978: void
4979: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.110 daniel 4980: const CHAR *base;
4981: CHAR r, s;
1.108 veillard 4982: CHAR cur;
1.3 veillard 4983:
1.106 daniel 4984: if ((NXT(0) == '<') && (NXT(1) == '!') &&
1.40 daniel 4985: (NXT(2) == '[') && (NXT(3) == 'C') &&
4986: (NXT(4) == 'D') && (NXT(5) == 'A') &&
4987: (NXT(6) == 'T') && (NXT(7) == 'A') &&
4988: (NXT(8) == '[')) {
4989: SKIP(9);
1.29 daniel 4990: } else
1.45 daniel 4991: return;
1.109 daniel 4992:
4993: ctxt->instate = XML_PARSER_CDATA_SECTION;
1.40 daniel 4994: base = CUR_PTR;
4995: if (!IS_CHAR(CUR)) {
1.55 daniel 4996: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4997: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 4998: ctxt->wellFormed = 0;
1.109 daniel 4999: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 5000: return;
1.3 veillard 5001: }
1.110 daniel 5002: r = CUR;
1.91 daniel 5003: NEXT;
1.40 daniel 5004: if (!IS_CHAR(CUR)) {
1.55 daniel 5005: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5006: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 5007: ctxt->wellFormed = 0;
1.109 daniel 5008: ctxt->instate = XML_PARSER_CONTENT;
1.45 daniel 5009: return;
1.3 veillard 5010: }
1.110 daniel 5011: s = CUR;
1.91 daniel 5012: NEXT;
1.108 veillard 5013: cur = CUR;
5014: while (IS_CHAR(cur) &&
1.110 daniel 5015: ((r != ']') || (s != ']') || (cur != '>'))) {
5016: r = s;
5017: s = cur;
5018: NEXT;
1.108 veillard 5019: cur = CUR;
1.3 veillard 5020: }
1.109 daniel 5021: ctxt->instate = XML_PARSER_CONTENT;
1.40 daniel 5022: if (!IS_CHAR(CUR)) {
1.55 daniel 5023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5024: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 5025: ctxt->wellFormed = 0;
1.45 daniel 5026: return;
1.3 veillard 5027: }
1.107 daniel 5028: NEXT;
1.16 daniel 5029:
1.45 daniel 5030: /*
5031: * Ok the segment [base CUR_PTR] is to be consumed as chars.
5032: */
5033: if (ctxt->sax != NULL) {
1.107 daniel 5034: if (ctxt->sax->cdataBlock != NULL)
1.110 daniel 5035: ctxt->sax->cdataBlock(ctxt->userData, base, (CUR_PTR - base) - 3);
1.45 daniel 5036: }
1.2 veillard 5037: }
5038:
1.50 daniel 5039: /**
5040: * xmlParseContent:
5041: * @ctxt: an XML parser context
5042: *
5043: * Parse a content:
1.2 veillard 5044: *
1.27 daniel 5045: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 5046: */
5047:
1.55 daniel 5048: void
5049: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.97 daniel 5050: GROW;
1.40 daniel 5051: while ((CUR != '<') || (NXT(1) != '/')) {
5052: const CHAR *test = CUR_PTR;
1.91 daniel 5053: int cons = ctxt->input->consumed;
1.113 daniel 5054: CHAR tok = ctxt->token;
1.27 daniel 5055:
5056: /*
5057: * First case : a Processing Instruction.
5058: */
1.40 daniel 5059: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 5060: xmlParsePI(ctxt);
5061: }
1.72 daniel 5062:
1.27 daniel 5063: /*
5064: * Second case : a CDSection
5065: */
1.40 daniel 5066: else if ((CUR == '<') && (NXT(1) == '!') &&
5067: (NXT(2) == '[') && (NXT(3) == 'C') &&
5068: (NXT(4) == 'D') && (NXT(5) == 'A') &&
5069: (NXT(6) == 'T') && (NXT(7) == 'A') &&
5070: (NXT(8) == '[')) {
1.45 daniel 5071: xmlParseCDSect(ctxt);
1.27 daniel 5072: }
1.72 daniel 5073:
1.27 daniel 5074: /*
5075: * Third case : a comment
5076: */
1.40 daniel 5077: else if ((CUR == '<') && (NXT(1) == '!') &&
5078: (NXT(2) == '-') && (NXT(3) == '-')) {
1.114 daniel 5079: xmlParseComment(ctxt);
1.97 daniel 5080: ctxt->instate = XML_PARSER_CONTENT;
1.27 daniel 5081: }
1.72 daniel 5082:
1.27 daniel 5083: /*
5084: * Fourth case : a sub-element.
5085: */
1.40 daniel 5086: else if (CUR == '<') {
1.72 daniel 5087: xmlParseElement(ctxt);
1.45 daniel 5088: }
1.72 daniel 5089:
1.45 daniel 5090: /*
1.50 daniel 5091: * Fifth case : a reference. If if has not been resolved,
5092: * parsing returns it's Name, create the node
1.45 daniel 5093: */
1.97 daniel 5094:
1.45 daniel 5095: else if (CUR == '&') {
1.77 daniel 5096: xmlParseReference(ctxt);
1.27 daniel 5097: }
1.72 daniel 5098:
1.27 daniel 5099: /*
5100: * Last case, text. Note that References are handled directly.
5101: */
5102: else {
1.45 daniel 5103: xmlParseCharData(ctxt, 0);
1.3 veillard 5104: }
1.14 veillard 5105:
1.91 daniel 5106: GROW;
1.14 veillard 5107: /*
1.45 daniel 5108: * Pop-up of finished entities.
1.14 veillard 5109: */
1.69 daniel 5110: while ((CUR == 0) && (ctxt->inputNr > 1))
5111: xmlPopInput(ctxt);
1.45 daniel 5112:
1.113 daniel 5113: if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
5114: (tok == ctxt->token)) {
1.55 daniel 5115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5116: ctxt->sax->error(ctxt->userData,
1.59 daniel 5117: "detected an error in element content\n");
5118: ctxt->wellFormed = 0;
1.29 daniel 5119: break;
5120: }
1.3 veillard 5121: }
1.2 veillard 5122: }
5123:
1.50 daniel 5124: /**
5125: * xmlParseElement:
5126: * @ctxt: an XML parser context
5127: *
5128: * parse an XML element, this is highly recursive
1.26 daniel 5129: *
5130: * [39] element ::= EmptyElemTag | STag content ETag
5131: *
1.98 daniel 5132: * [ WFC: Element Type Match ]
5133: * The Name in an element's end-tag must match the element type in the
5134: * start-tag.
5135: *
5136: * [ VC: Element Valid ]
5137: * TODO An element is valid if there is a declaration matching elementdecl
1.99 daniel 5138: * where the Name matches the element type and one of the following holds:
5139: * - The declaration matches EMPTY and the element has no content.
5140: * - The declaration matches children and the sequence of child elements
5141: * belongs to the language generated by the regular expression in the
5142: * content model, with optional white space (characters matching the
5143: * nonterminal S) between each pair of child elements.
5144: * - The declaration matches Mixed and the content consists of character
5145: * data and child elements whose types match names in the content model.
5146: * - The declaration matches ANY, and the types of any child elements have
5147: * been declared.
1.2 veillard 5148: */
1.26 daniel 5149:
1.72 daniel 5150: void
1.69 daniel 5151: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.40 daniel 5152: const CHAR *openTag = CUR_PTR;
1.83 daniel 5153: CHAR *name;
1.32 daniel 5154: xmlParserNodeInfo node_info;
1.2 veillard 5155:
1.32 daniel 5156: /* Capture start position */
1.40 daniel 5157: node_info.begin_pos = CUR_PTR - ctxt->input->base;
5158: node_info.begin_line = ctxt->input->line;
1.32 daniel 5159:
1.83 daniel 5160: name = xmlParseStartTag(ctxt);
5161: if (name == NULL) {
5162: return;
5163: }
1.2 veillard 5164:
5165: /*
1.99 daniel 5166: * [ VC: Root Element Type ]
5167: * The Name in the document type declaration must match the element
5168: * type of the root element.
5169: */
1.105 daniel 5170: if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
5171: ctxt->node && (ctxt->node == ctxt->myDoc->root))
1.102 daniel 5172: ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
1.99 daniel 5173:
5174: /*
1.2 veillard 5175: * Check for an Empty Element.
5176: */
1.40 daniel 5177: if ((CUR == '/') && (NXT(1) == '>')) {
5178: SKIP(2);
1.72 daniel 5179: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 5180: ctxt->sax->endElement(ctxt->userData, name);
5181: free(name);
1.72 daniel 5182: return;
1.2 veillard 5183: }
1.91 daniel 5184: if (CUR == '>') {
5185: NEXT;
5186: } else {
1.55 daniel 5187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5188: ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 5189: openTag);
1.59 daniel 5190: ctxt->wellFormed = 0;
1.45 daniel 5191:
5192: /*
5193: * end of parsing of this node.
5194: */
5195: nodePop(ctxt);
1.83 daniel 5196: free(name);
1.72 daniel 5197: return;
1.2 veillard 5198: }
5199:
5200: /*
5201: * Parse the content of the element:
5202: */
1.45 daniel 5203: xmlParseContent(ctxt);
1.40 daniel 5204: if (!IS_CHAR(CUR)) {
1.55 daniel 5205: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5206: ctxt->sax->error(ctxt->userData,
1.57 daniel 5207: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 5208: ctxt->wellFormed = 0;
1.45 daniel 5209:
5210: /*
5211: * end of parsing of this node.
5212: */
5213: nodePop(ctxt);
1.83 daniel 5214: free(name);
1.72 daniel 5215: return;
1.2 veillard 5216: }
5217:
5218: /*
1.27 daniel 5219: * parse the end of tag: '</' should be here.
1.2 veillard 5220: */
1.83 daniel 5221: xmlParseEndTag(ctxt, name);
5222: free(name);
1.2 veillard 5223: }
5224:
1.50 daniel 5225: /**
5226: * xmlParseVersionNum:
5227: * @ctxt: an XML parser context
5228: *
5229: * parse the XML version value.
1.29 daniel 5230: *
5231: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 5232: *
5233: * Returns the string giving the XML version number, or NULL
1.29 daniel 5234: */
1.55 daniel 5235: CHAR *
5236: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 5237: const CHAR *q = CUR_PTR;
1.29 daniel 5238: CHAR *ret;
5239:
1.40 daniel 5240: while (IS_CHAR(CUR) &&
5241: (((CUR >= 'a') && (CUR <= 'z')) ||
5242: ((CUR >= 'A') && (CUR <= 'Z')) ||
5243: ((CUR >= '0') && (CUR <= '9')) ||
5244: (CUR == '_') || (CUR == '.') ||
5245: (CUR == ':') || (CUR == '-'))) NEXT;
5246: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5247: return(ret);
5248: }
5249:
1.50 daniel 5250: /**
5251: * xmlParseVersionInfo:
5252: * @ctxt: an XML parser context
5253: *
5254: * parse the XML version.
1.29 daniel 5255: *
5256: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
5257: *
5258: * [25] Eq ::= S? '=' S?
1.50 daniel 5259: *
1.68 daniel 5260: * Returns the version string, e.g. "1.0"
1.29 daniel 5261: */
5262:
1.55 daniel 5263: CHAR *
5264: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 5265: CHAR *version = NULL;
5266: const CHAR *q;
5267:
1.40 daniel 5268: if ((CUR == 'v') && (NXT(1) == 'e') &&
5269: (NXT(2) == 'r') && (NXT(3) == 's') &&
5270: (NXT(4) == 'i') && (NXT(5) == 'o') &&
5271: (NXT(6) == 'n')) {
5272: SKIP(7);
1.42 daniel 5273: SKIP_BLANKS;
1.40 daniel 5274: if (CUR != '=') {
1.55 daniel 5275: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5276: ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 5277: ctxt->wellFormed = 0;
1.31 daniel 5278: return(NULL);
5279: }
1.40 daniel 5280: NEXT;
1.42 daniel 5281: SKIP_BLANKS;
1.40 daniel 5282: if (CUR == '"') {
5283: NEXT;
5284: q = CUR_PTR;
1.29 daniel 5285: version = xmlParseVersionNum(ctxt);
1.55 daniel 5286: if (CUR != '"') {
5287: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5288: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5289: ctxt->wellFormed = 0;
1.55 daniel 5290: } else
1.40 daniel 5291: NEXT;
5292: } else if (CUR == '\''){
5293: NEXT;
5294: q = CUR_PTR;
1.29 daniel 5295: version = xmlParseVersionNum(ctxt);
1.55 daniel 5296: if (CUR != '\'') {
5297: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5298: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5299: ctxt->wellFormed = 0;
1.55 daniel 5300: } else
1.40 daniel 5301: NEXT;
1.31 daniel 5302: } else {
1.55 daniel 5303: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5304: ctxt->sax->error(ctxt->userData,
1.59 daniel 5305: "xmlParseVersionInfo : expected ' or \"\n");
5306: ctxt->wellFormed = 0;
1.29 daniel 5307: }
5308: }
5309: return(version);
5310: }
5311:
1.50 daniel 5312: /**
5313: * xmlParseEncName:
5314: * @ctxt: an XML parser context
5315: *
5316: * parse the XML encoding name
1.29 daniel 5317: *
5318: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 5319: *
1.68 daniel 5320: * Returns the encoding name value or NULL
1.29 daniel 5321: */
1.55 daniel 5322: CHAR *
5323: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 5324: const CHAR *q = CUR_PTR;
1.29 daniel 5325: CHAR *ret = NULL;
5326:
1.40 daniel 5327: if (((CUR >= 'a') && (CUR <= 'z')) ||
5328: ((CUR >= 'A') && (CUR <= 'Z'))) {
5329: NEXT;
5330: while (IS_CHAR(CUR) &&
5331: (((CUR >= 'a') && (CUR <= 'z')) ||
5332: ((CUR >= 'A') && (CUR <= 'Z')) ||
5333: ((CUR >= '0') && (CUR <= '9')) ||
5334: (CUR == '-'))) NEXT;
5335: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 5336: } else {
1.55 daniel 5337: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5338: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 5339: ctxt->wellFormed = 0;
1.29 daniel 5340: }
5341: return(ret);
5342: }
5343:
1.50 daniel 5344: /**
5345: * xmlParseEncodingDecl:
5346: * @ctxt: an XML parser context
5347: *
5348: * parse the XML encoding declaration
1.29 daniel 5349: *
5350: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 5351: *
5352: * TODO: this should setup the conversion filters.
5353: *
1.68 daniel 5354: * Returns the encoding value or NULL
1.29 daniel 5355: */
5356:
1.55 daniel 5357: CHAR *
5358: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5359: CHAR *encoding = NULL;
5360: const CHAR *q;
5361:
1.42 daniel 5362: SKIP_BLANKS;
1.40 daniel 5363: if ((CUR == 'e') && (NXT(1) == 'n') &&
5364: (NXT(2) == 'c') && (NXT(3) == 'o') &&
5365: (NXT(4) == 'd') && (NXT(5) == 'i') &&
5366: (NXT(6) == 'n') && (NXT(7) == 'g')) {
5367: SKIP(8);
1.42 daniel 5368: SKIP_BLANKS;
1.40 daniel 5369: if (CUR != '=') {
1.55 daniel 5370: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5371: ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 5372: ctxt->wellFormed = 0;
1.31 daniel 5373: return(NULL);
5374: }
1.40 daniel 5375: NEXT;
1.42 daniel 5376: SKIP_BLANKS;
1.40 daniel 5377: if (CUR == '"') {
5378: NEXT;
5379: q = CUR_PTR;
1.29 daniel 5380: encoding = xmlParseEncName(ctxt);
1.55 daniel 5381: if (CUR != '"') {
5382: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5383: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5384: ctxt->wellFormed = 0;
1.55 daniel 5385: } else
1.40 daniel 5386: NEXT;
5387: } else if (CUR == '\''){
5388: NEXT;
5389: q = CUR_PTR;
1.29 daniel 5390: encoding = xmlParseEncName(ctxt);
1.55 daniel 5391: if (CUR != '\'') {
5392: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5393: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 5394: ctxt->wellFormed = 0;
1.55 daniel 5395: } else
1.40 daniel 5396: NEXT;
5397: } else if (CUR == '"'){
1.55 daniel 5398: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5399: ctxt->sax->error(ctxt->userData,
1.59 daniel 5400: "xmlParseEncodingDecl : expected ' or \"\n");
5401: ctxt->wellFormed = 0;
1.29 daniel 5402: }
5403: }
5404: return(encoding);
5405: }
5406:
1.50 daniel 5407: /**
5408: * xmlParseSDDecl:
5409: * @ctxt: an XML parser context
5410: *
5411: * parse the XML standalone declaration
1.29 daniel 5412: *
5413: * [32] SDDecl ::= S 'standalone' Eq
5414: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.99 daniel 5415: *
5416: * [ VC: Standalone Document Declaration ]
5417: * TODO The standalone document declaration must have the value "no"
5418: * if any external markup declarations contain declarations of:
5419: * - attributes with default values, if elements to which these
5420: * attributes apply appear in the document without specifications
5421: * of values for these attributes, or
5422: * - entities (other than amp, lt, gt, apos, quot), if references
5423: * to those entities appear in the document, or
5424: * - attributes with values subject to normalization, where the
5425: * attribute appears in the document with a value which will change
5426: * as a result of normalization, or
5427: * - element types with element content, if white space occurs directly
5428: * within any instance of those types.
1.68 daniel 5429: *
5430: * Returns 1 if standalone, 0 otherwise
1.29 daniel 5431: */
5432:
1.55 daniel 5433: int
5434: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 5435: int standalone = -1;
5436:
1.42 daniel 5437: SKIP_BLANKS;
1.40 daniel 5438: if ((CUR == 's') && (NXT(1) == 't') &&
5439: (NXT(2) == 'a') && (NXT(3) == 'n') &&
5440: (NXT(4) == 'd') && (NXT(5) == 'a') &&
5441: (NXT(6) == 'l') && (NXT(7) == 'o') &&
5442: (NXT(8) == 'n') && (NXT(9) == 'e')) {
5443: SKIP(10);
1.81 daniel 5444: SKIP_BLANKS;
1.40 daniel 5445: if (CUR != '=') {
1.55 daniel 5446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5447: ctxt->sax->error(ctxt->userData,
1.59 daniel 5448: "XML standalone declaration : expected '='\n");
5449: ctxt->wellFormed = 0;
1.32 daniel 5450: return(standalone);
5451: }
1.40 daniel 5452: NEXT;
1.42 daniel 5453: SKIP_BLANKS;
1.40 daniel 5454: if (CUR == '\''){
5455: NEXT;
5456: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5457: standalone = 0;
1.40 daniel 5458: SKIP(2);
5459: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5460: (NXT(2) == 's')) {
1.29 daniel 5461: standalone = 1;
1.40 daniel 5462: SKIP(3);
1.29 daniel 5463: } else {
1.55 daniel 5464: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5465: ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 5466: ctxt->wellFormed = 0;
1.29 daniel 5467: }
1.55 daniel 5468: if (CUR != '\'') {
5469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5470: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5471: ctxt->wellFormed = 0;
1.55 daniel 5472: } else
1.40 daniel 5473: NEXT;
5474: } else if (CUR == '"'){
5475: NEXT;
5476: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 5477: standalone = 0;
1.40 daniel 5478: SKIP(2);
5479: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
5480: (NXT(2) == 's')) {
1.29 daniel 5481: standalone = 1;
1.40 daniel 5482: SKIP(3);
1.29 daniel 5483: } else {
1.55 daniel 5484: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5485: ctxt->sax->error(ctxt->userData,
1.59 daniel 5486: "standalone accepts only 'yes' or 'no'\n");
5487: ctxt->wellFormed = 0;
1.29 daniel 5488: }
1.55 daniel 5489: if (CUR != '"') {
5490: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5491: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 5492: ctxt->wellFormed = 0;
1.55 daniel 5493: } else
1.40 daniel 5494: NEXT;
1.37 daniel 5495: } else {
1.55 daniel 5496: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5497: ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
1.59 daniel 5498: ctxt->wellFormed = 0;
1.37 daniel 5499: }
1.29 daniel 5500: }
5501: return(standalone);
5502: }
5503:
1.50 daniel 5504: /**
5505: * xmlParseXMLDecl:
5506: * @ctxt: an XML parser context
5507: *
5508: * parse an XML declaration header
1.29 daniel 5509: *
5510: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 5511: */
5512:
1.55 daniel 5513: void
5514: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 5515: CHAR *version;
5516:
5517: /*
1.19 daniel 5518: * We know that '<?xml' is here.
1.1 veillard 5519: */
1.40 daniel 5520: SKIP(5);
1.1 veillard 5521:
1.59 daniel 5522: if (!IS_BLANK(CUR)) {
5523: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5524: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 5525: ctxt->wellFormed = 0;
5526: }
1.42 daniel 5527: SKIP_BLANKS;
1.1 veillard 5528:
5529: /*
1.29 daniel 5530: * We should have the VersionInfo here.
1.1 veillard 5531: */
1.29 daniel 5532: version = xmlParseVersionInfo(ctxt);
5533: if (version == NULL)
1.45 daniel 5534: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 5535: ctxt->version = xmlStrdup(version);
1.45 daniel 5536: free(version);
1.29 daniel 5537:
5538: /*
5539: * We may have the encoding declaration
5540: */
1.59 daniel 5541: if (!IS_BLANK(CUR)) {
5542: if ((CUR == '?') && (NXT(1) == '>')) {
5543: SKIP(2);
5544: return;
5545: }
5546: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5547: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5548: ctxt->wellFormed = 0;
5549: }
1.72 daniel 5550: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 5551:
5552: /*
1.29 daniel 5553: * We may have the standalone status.
1.1 veillard 5554: */
1.72 daniel 5555: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 5556: if ((CUR == '?') && (NXT(1) == '>')) {
5557: SKIP(2);
5558: return;
5559: }
5560: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5561: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 5562: ctxt->wellFormed = 0;
5563: }
5564: SKIP_BLANKS;
1.72 daniel 5565: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 5566:
1.42 daniel 5567: SKIP_BLANKS;
1.40 daniel 5568: if ((CUR == '?') && (NXT(1) == '>')) {
5569: SKIP(2);
5570: } else if (CUR == '>') {
1.31 daniel 5571: /* Deprecated old WD ... */
1.55 daniel 5572: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5573: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
1.59 daniel 5574: ctxt->wellFormed = 0;
1.40 daniel 5575: NEXT;
1.29 daniel 5576: } else {
1.55 daniel 5577: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5578: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
1.59 daniel 5579: ctxt->wellFormed = 0;
1.40 daniel 5580: MOVETO_ENDTAG(CUR_PTR);
5581: NEXT;
1.29 daniel 5582: }
1.1 veillard 5583: }
5584:
1.50 daniel 5585: /**
5586: * xmlParseMisc:
5587: * @ctxt: an XML parser context
5588: *
5589: * parse an XML Misc* optionnal field.
1.21 daniel 5590: *
1.22 daniel 5591: * [27] Misc ::= Comment | PI | S
1.1 veillard 5592: */
5593:
1.55 daniel 5594: void
5595: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 5596: while (((CUR == '<') && (NXT(1) == '?')) ||
5597: ((CUR == '<') && (NXT(1) == '!') &&
5598: (NXT(2) == '-') && (NXT(3) == '-')) ||
5599: IS_BLANK(CUR)) {
5600: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 5601: xmlParsePI(ctxt);
1.40 daniel 5602: } else if (IS_BLANK(CUR)) {
5603: NEXT;
1.1 veillard 5604: } else
1.114 daniel 5605: xmlParseComment(ctxt);
1.1 veillard 5606: }
5607: }
5608:
1.50 daniel 5609: /**
5610: * xmlParseDocument :
5611: * @ctxt: an XML parser context
5612: *
5613: * parse an XML document (and build a tree if using the standard SAX
5614: * interface).
1.21 daniel 5615: *
1.22 daniel 5616: * [1] document ::= prolog element Misc*
1.29 daniel 5617: *
5618: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 5619: *
1.68 daniel 5620: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 5621: * as a result of the parsing.
1.1 veillard 5622: */
5623:
1.55 daniel 5624: int
5625: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 5626: xmlDefaultSAXHandlerInit();
5627:
1.91 daniel 5628: GROW;
5629:
1.14 veillard 5630: /*
1.44 daniel 5631: * SAX: beginning of the document processing.
5632: */
1.72 daniel 5633: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 5634: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 5635:
5636: /*
1.14 veillard 5637: * We should check for encoding here and plug-in some
5638: * conversion code TODO !!!!
5639: */
1.1 veillard 5640:
5641: /*
5642: * Wipe out everything which is before the first '<'
5643: */
1.59 daniel 5644: if (IS_BLANK(CUR)) {
5645: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5646: ctxt->sax->error(ctxt->userData,
1.59 daniel 5647: "Extra spaces at the beginning of the document are not allowed\n");
5648: ctxt->wellFormed = 0;
5649: SKIP_BLANKS;
5650: }
5651:
5652: if (CUR == 0) {
5653: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5654: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 5655: ctxt->wellFormed = 0;
5656: }
1.1 veillard 5657:
5658: /*
5659: * Check for the XMLDecl in the Prolog.
5660: */
1.91 daniel 5661: GROW;
1.40 daniel 5662: if ((CUR == '<') && (NXT(1) == '?') &&
5663: (NXT(2) == 'x') && (NXT(3) == 'm') &&
5664: (NXT(4) == 'l')) {
1.19 daniel 5665: xmlParseXMLDecl(ctxt);
5666: /* SKIP_EOL(cur); */
1.42 daniel 5667: SKIP_BLANKS;
1.40 daniel 5668: } else if ((CUR == '<') && (NXT(1) == '?') &&
5669: (NXT(2) == 'X') && (NXT(3) == 'M') &&
5670: (NXT(4) == 'L')) {
1.19 daniel 5671: /*
5672: * The first drafts were using <?XML and the final W3C REC
5673: * now use <?xml ...
5674: */
1.16 daniel 5675: xmlParseXMLDecl(ctxt);
1.1 veillard 5676: /* SKIP_EOL(cur); */
1.42 daniel 5677: SKIP_BLANKS;
1.1 veillard 5678: } else {
1.72 daniel 5679: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 5680: }
1.72 daniel 5681: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 5682: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 5683:
5684: /*
5685: * The Misc part of the Prolog
5686: */
1.91 daniel 5687: GROW;
1.16 daniel 5688: xmlParseMisc(ctxt);
1.1 veillard 5689:
5690: /*
1.29 daniel 5691: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 5692: * (doctypedecl Misc*)?
5693: */
1.91 daniel 5694: GROW;
1.40 daniel 5695: if ((CUR == '<') && (NXT(1) == '!') &&
5696: (NXT(2) == 'D') && (NXT(3) == 'O') &&
5697: (NXT(4) == 'C') && (NXT(5) == 'T') &&
5698: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
5699: (NXT(8) == 'E')) {
1.22 daniel 5700: xmlParseDocTypeDecl(ctxt);
1.96 daniel 5701: ctxt->instate = XML_PARSER_PROLOG;
1.22 daniel 5702: xmlParseMisc(ctxt);
1.21 daniel 5703: }
5704:
5705: /*
5706: * Time to start parsing the tree itself
1.1 veillard 5707: */
1.91 daniel 5708: GROW;
1.96 daniel 5709: ctxt->instate = XML_PARSER_CONTENT;
1.72 daniel 5710: xmlParseElement(ctxt);
1.96 daniel 5711: ctxt->instate = XML_PARSER_EPILOG;
1.33 daniel 5712:
5713: /*
5714: * The Misc part at the end
5715: */
5716: xmlParseMisc(ctxt);
1.16 daniel 5717:
1.59 daniel 5718: if (CUR != 0) {
5719: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5720: ctxt->sax->error(ctxt->userData,
1.59 daniel 5721: "Extra content at the end of the document\n");
5722: ctxt->wellFormed = 0;
5723: }
1.96 daniel 5724: ctxt->instate = XML_PARSER_EOF;
1.59 daniel 5725:
1.44 daniel 5726: /*
5727: * SAX: end of the document processing.
5728: */
1.72 daniel 5729: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 5730: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 5731: if (! ctxt->wellFormed) return(-1);
1.16 daniel 5732: return(0);
5733: }
5734:
1.98 daniel 5735: /************************************************************************
5736: * *
5737: * I/O front end functions to the parser *
5738: * *
5739: ************************************************************************/
5740:
1.50 daniel 5741: /**
1.86 daniel 5742: * xmlCreateDocParserCtxt :
1.50 daniel 5743: * @cur: a pointer to an array of CHAR
5744: *
1.69 daniel 5745: * Create a parser context for an XML in-memory document.
5746: *
5747: * Returns the new parser context or NULL
1.16 daniel 5748: */
1.69 daniel 5749: xmlParserCtxtPtr
5750: xmlCreateDocParserCtxt(CHAR *cur) {
1.16 daniel 5751: xmlParserCtxtPtr ctxt;
1.40 daniel 5752: xmlParserInputPtr input;
1.75 daniel 5753: xmlCharEncoding enc;
1.16 daniel 5754:
1.97 daniel 5755: ctxt = xmlNewParserCtxt();
1.16 daniel 5756: if (ctxt == NULL) {
5757: return(NULL);
5758: }
1.96 daniel 5759: input = xmlNewInputStream(ctxt);
1.40 daniel 5760: if (input == NULL) {
1.97 daniel 5761: xmlFreeParserCtxt(ctxt);
1.40 daniel 5762: return(NULL);
5763: }
5764:
1.75 daniel 5765: /*
5766: * plug some encoding conversion routines here. !!!
5767: */
5768: enc = xmlDetectCharEncoding(cur);
5769: xmlSwitchEncoding(ctxt, enc);
5770:
1.40 daniel 5771: input->base = cur;
5772: input->cur = cur;
5773:
5774: inputPush(ctxt, input);
1.69 daniel 5775: return(ctxt);
5776: }
5777:
5778: /**
5779: * xmlSAXParseDoc :
5780: * @sax: the SAX handler block
5781: * @cur: a pointer to an array of CHAR
5782: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
5783: * documents
5784: *
5785: * parse an XML in-memory document and build a tree.
5786: * It use the given SAX function block to handle the parsing callback.
5787: * If sax is NULL, fallback to the default DOM tree building routines.
5788: *
5789: * Returns the resulting document tree
5790: */
5791:
5792: xmlDocPtr
5793: xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
5794: xmlDocPtr ret;
5795: xmlParserCtxtPtr ctxt;
5796:
5797: if (cur == NULL) return(NULL);
1.16 daniel 5798:
5799:
1.69 daniel 5800: ctxt = xmlCreateDocParserCtxt(cur);
5801: if (ctxt == NULL) return(NULL);
1.74 daniel 5802: if (sax != NULL) {
5803: ctxt->sax = sax;
5804: ctxt->userData = NULL;
5805: }
1.69 daniel 5806:
1.16 daniel 5807: xmlParseDocument(ctxt);
1.72 daniel 5808: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 5809: else {
5810: ret = NULL;
1.72 daniel 5811: xmlFreeDoc(ctxt->myDoc);
5812: ctxt->myDoc = NULL;
1.59 daniel 5813: }
1.86 daniel 5814: if (sax != NULL)
5815: ctxt->sax = NULL;
1.69 daniel 5816: xmlFreeParserCtxt(ctxt);
1.16 daniel 5817:
1.1 veillard 5818: return(ret);
5819: }
5820:
1.50 daniel 5821: /**
1.55 daniel 5822: * xmlParseDoc :
5823: * @cur: a pointer to an array of CHAR
5824: *
5825: * parse an XML in-memory document and build a tree.
5826: *
1.68 daniel 5827: * Returns the resulting document tree
1.55 daniel 5828: */
5829:
1.69 daniel 5830: xmlDocPtr
5831: xmlParseDoc(CHAR *cur) {
1.59 daniel 5832: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 5833: }
5834:
5835: /**
5836: * xmlSAXParseDTD :
5837: * @sax: the SAX handler block
5838: * @ExternalID: a NAME* containing the External ID of the DTD
5839: * @SystemID: a NAME* containing the URL to the DTD
5840: *
5841: * Load and parse an external subset.
5842: *
5843: * Returns the resulting xmlDtdPtr or NULL in case of error.
5844: */
5845:
5846: xmlDtdPtr
5847: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
5848: const CHAR *SystemID) {
5849: xmlDtdPtr ret = NULL;
5850: xmlParserCtxtPtr ctxt;
1.83 daniel 5851: xmlParserInputPtr input = NULL;
1.76 daniel 5852: xmlCharEncoding enc;
5853:
5854: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
5855:
1.97 daniel 5856: ctxt = xmlNewParserCtxt();
1.76 daniel 5857: if (ctxt == NULL) {
5858: return(NULL);
5859: }
5860:
5861: /*
5862: * Set-up the SAX context
5863: */
5864: if (ctxt == NULL) return(NULL);
5865: if (sax != NULL) {
1.93 veillard 5866: if (ctxt->sax != NULL)
5867: free(ctxt->sax);
1.76 daniel 5868: ctxt->sax = sax;
5869: ctxt->userData = NULL;
5870: }
5871:
5872: /*
5873: * Ask the Entity resolver to load the damn thing
5874: */
5875:
5876: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
5877: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
5878: if (input == NULL) {
1.86 daniel 5879: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5880: xmlFreeParserCtxt(ctxt);
5881: return(NULL);
5882: }
5883:
5884: /*
5885: * plug some encoding conversion routines here. !!!
5886: */
5887: xmlPushInput(ctxt, input);
5888: enc = xmlDetectCharEncoding(ctxt->input->cur);
5889: xmlSwitchEncoding(ctxt, enc);
5890:
1.95 veillard 5891: if (input->filename == NULL)
5892: input->filename = xmlStrdup(SystemID);
1.76 daniel 5893: input->line = 1;
5894: input->col = 1;
5895: input->base = ctxt->input->cur;
5896: input->cur = ctxt->input->cur;
5897: input->free = NULL;
5898:
5899: /*
5900: * let's parse that entity knowing it's an external subset.
5901: */
1.79 daniel 5902: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 5903:
5904: if (ctxt->myDoc != NULL) {
5905: if (ctxt->wellFormed) {
5906: ret = ctxt->myDoc->intSubset;
5907: ctxt->myDoc->intSubset = NULL;
5908: } else {
5909: ret = NULL;
5910: }
5911: xmlFreeDoc(ctxt->myDoc);
5912: ctxt->myDoc = NULL;
5913: }
1.86 daniel 5914: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 5915: xmlFreeParserCtxt(ctxt);
5916:
5917: return(ret);
5918: }
5919:
5920: /**
5921: * xmlParseDTD :
5922: * @ExternalID: a NAME* containing the External ID of the DTD
5923: * @SystemID: a NAME* containing the URL to the DTD
5924: *
5925: * Load and parse an external subset.
5926: *
5927: * Returns the resulting xmlDtdPtr or NULL in case of error.
5928: */
5929:
5930: xmlDtdPtr
5931: xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
5932: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 5933: }
5934:
5935: /**
5936: * xmlRecoverDoc :
5937: * @cur: a pointer to an array of CHAR
5938: *
5939: * parse an XML in-memory document and build a tree.
5940: * In the case the document is not Well Formed, a tree is built anyway
5941: *
1.68 daniel 5942: * Returns the resulting document tree
1.59 daniel 5943: */
5944:
1.69 daniel 5945: xmlDocPtr
5946: xmlRecoverDoc(CHAR *cur) {
1.59 daniel 5947: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 5948: }
5949:
5950: /**
1.69 daniel 5951: * xmlCreateFileParserCtxt :
1.50 daniel 5952: * @filename: the filename
5953: *
1.69 daniel 5954: * Create a parser context for a file content.
5955: * Automatic support for ZLIB/Compress compressed document is provided
5956: * by default if found at compile-time.
1.50 daniel 5957: *
1.69 daniel 5958: * Returns the new parser context or NULL
1.9 httpng 5959: */
1.69 daniel 5960: xmlParserCtxtPtr
5961: xmlCreateFileParserCtxt(const char *filename)
5962: {
5963: xmlParserCtxtPtr ctxt;
1.40 daniel 5964: xmlParserInputPtr inputStream;
1.91 daniel 5965: xmlParserInputBufferPtr buf;
1.111 daniel 5966: char *directory = NULL;
1.9 httpng 5967:
1.91 daniel 5968: buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
5969: if (buf == NULL) return(NULL);
1.9 httpng 5970:
1.97 daniel 5971: ctxt = xmlNewParserCtxt();
1.16 daniel 5972: if (ctxt == NULL) {
5973: return(NULL);
5974: }
1.97 daniel 5975:
1.96 daniel 5976: inputStream = xmlNewInputStream(ctxt);
1.40 daniel 5977: if (inputStream == NULL) {
1.97 daniel 5978: xmlFreeParserCtxt(ctxt);
1.40 daniel 5979: return(NULL);
5980: }
5981:
5982: inputStream->filename = strdup(filename);
1.91 daniel 5983: inputStream->buf = buf;
5984: inputStream->base = inputStream->buf->buffer->content;
5985: inputStream->cur = inputStream->buf->buffer->content;
1.16 daniel 5986:
1.40 daniel 5987: inputPush(ctxt, inputStream);
1.110 daniel 5988: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 5989: directory = xmlParserGetDirectory(filename);
5990: if ((ctxt->directory == NULL) && (directory != NULL))
1.110 daniel 5991: ctxt->directory = directory;
1.106 daniel 5992:
1.69 daniel 5993: return(ctxt);
5994: }
5995:
5996: /**
5997: * xmlSAXParseFile :
5998: * @sax: the SAX handler block
5999: * @filename: the filename
6000: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6001: * documents
6002: *
6003: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6004: * compressed document is provided by default if found at compile-time.
6005: * It use the given SAX function block to handle the parsing callback.
6006: * If sax is NULL, fallback to the default DOM tree building routines.
6007: *
6008: * Returns the resulting document tree
6009: */
6010:
1.79 daniel 6011: xmlDocPtr
6012: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 6013: int recovery) {
6014: xmlDocPtr ret;
6015: xmlParserCtxtPtr ctxt;
1.111 daniel 6016: char *directory = NULL;
1.69 daniel 6017:
6018: ctxt = xmlCreateFileParserCtxt(filename);
6019: if (ctxt == NULL) return(NULL);
1.74 daniel 6020: if (sax != NULL) {
1.93 veillard 6021: if (ctxt->sax != NULL)
6022: free(ctxt->sax);
1.74 daniel 6023: ctxt->sax = sax;
6024: ctxt->userData = NULL;
6025: }
1.106 daniel 6026:
1.110 daniel 6027: if ((ctxt->directory == NULL) && (directory == NULL))
1.106 daniel 6028: directory = xmlParserGetDirectory(filename);
6029: if ((ctxt->directory == NULL) && (directory != NULL))
6030: ctxt->directory = xmlStrdup(directory);
1.16 daniel 6031:
6032: xmlParseDocument(ctxt);
1.40 daniel 6033:
1.72 daniel 6034: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6035: else {
6036: ret = NULL;
1.72 daniel 6037: xmlFreeDoc(ctxt->myDoc);
6038: ctxt->myDoc = NULL;
1.59 daniel 6039: }
1.86 daniel 6040: if (sax != NULL)
6041: ctxt->sax = NULL;
1.69 daniel 6042: xmlFreeParserCtxt(ctxt);
1.20 daniel 6043:
6044: return(ret);
6045: }
6046:
1.55 daniel 6047: /**
6048: * xmlParseFile :
6049: * @filename: the filename
6050: *
6051: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6052: * compressed document is provided by default if found at compile-time.
6053: *
1.68 daniel 6054: * Returns the resulting document tree
1.55 daniel 6055: */
6056:
1.79 daniel 6057: xmlDocPtr
6058: xmlParseFile(const char *filename) {
1.59 daniel 6059: return(xmlSAXParseFile(NULL, filename, 0));
6060: }
6061:
6062: /**
6063: * xmlRecoverFile :
6064: * @filename: the filename
6065: *
6066: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
6067: * compressed document is provided by default if found at compile-time.
6068: * In the case the document is not Well Formed, a tree is built anyway
6069: *
1.68 daniel 6070: * Returns the resulting document tree
1.59 daniel 6071: */
6072:
1.79 daniel 6073: xmlDocPtr
6074: xmlRecoverFile(const char *filename) {
1.59 daniel 6075: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 6076: }
1.32 daniel 6077:
1.50 daniel 6078: /**
1.69 daniel 6079: * xmlCreateMemoryParserCtxt :
1.68 daniel 6080: * @buffer: an pointer to a char array
1.50 daniel 6081: * @size: the siwe of the array
6082: *
1.69 daniel 6083: * Create a parser context for an XML in-memory document.
1.50 daniel 6084: *
1.69 daniel 6085: * Returns the new parser context or NULL
1.20 daniel 6086: */
1.69 daniel 6087: xmlParserCtxtPtr
6088: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 6089: xmlParserCtxtPtr ctxt;
1.40 daniel 6090: xmlParserInputPtr input;
1.75 daniel 6091: xmlCharEncoding enc;
1.40 daniel 6092:
6093: buffer[size - 1] = '\0';
6094:
1.97 daniel 6095: ctxt = xmlNewParserCtxt();
1.20 daniel 6096: if (ctxt == NULL) {
6097: return(NULL);
6098: }
1.97 daniel 6099:
1.96 daniel 6100: input = xmlNewInputStream(ctxt);
1.40 daniel 6101: if (input == NULL) {
1.97 daniel 6102: xmlFreeParserCtxt(ctxt);
1.40 daniel 6103: return(NULL);
6104: }
1.20 daniel 6105:
1.40 daniel 6106: input->filename = NULL;
6107: input->line = 1;
6108: input->col = 1;
1.96 daniel 6109: input->buf = NULL;
1.91 daniel 6110: input->consumed = 0;
1.45 daniel 6111:
6112: /*
1.75 daniel 6113: * plug some encoding conversion routines here. !!!
1.45 daniel 6114: */
1.75 daniel 6115: enc = xmlDetectCharEncoding(buffer);
6116: xmlSwitchEncoding(ctxt, enc);
6117:
1.40 daniel 6118: input->base = buffer;
6119: input->cur = buffer;
1.69 daniel 6120: input->free = NULL;
1.20 daniel 6121:
1.40 daniel 6122: inputPush(ctxt, input);
1.69 daniel 6123: return(ctxt);
6124: }
6125:
6126: /**
6127: * xmlSAXParseMemory :
6128: * @sax: the SAX handler block
6129: * @buffer: an pointer to a char array
6130: * @size: the siwe of the array
6131: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
6132: * documents
6133: *
6134: * parse an XML in-memory block and use the given SAX function block
6135: * to handle the parsing callback. If sax is NULL, fallback to the default
6136: * DOM tree building routines.
6137: *
6138: * Returns the resulting document tree
6139: */
6140: xmlDocPtr
6141: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
6142: xmlDocPtr ret;
6143: xmlParserCtxtPtr ctxt;
6144:
6145: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
6146: if (ctxt == NULL) return(NULL);
1.74 daniel 6147: if (sax != NULL) {
6148: ctxt->sax = sax;
6149: ctxt->userData = NULL;
6150: }
1.20 daniel 6151:
6152: xmlParseDocument(ctxt);
1.40 daniel 6153:
1.72 daniel 6154: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 6155: else {
6156: ret = NULL;
1.72 daniel 6157: xmlFreeDoc(ctxt->myDoc);
6158: ctxt->myDoc = NULL;
1.59 daniel 6159: }
1.86 daniel 6160: if (sax != NULL)
6161: ctxt->sax = NULL;
1.69 daniel 6162: xmlFreeParserCtxt(ctxt);
1.16 daniel 6163:
1.9 httpng 6164: return(ret);
1.17 daniel 6165: }
6166:
1.55 daniel 6167: /**
6168: * xmlParseMemory :
1.68 daniel 6169: * @buffer: an pointer to a char array
1.55 daniel 6170: * @size: the size of the array
6171: *
6172: * parse an XML in-memory block and build a tree.
6173: *
1.68 daniel 6174: * Returns the resulting document tree
1.55 daniel 6175: */
6176:
6177: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 6178: return(xmlSAXParseMemory(NULL, buffer, size, 0));
6179: }
6180:
6181: /**
6182: * xmlRecoverMemory :
1.68 daniel 6183: * @buffer: an pointer to a char array
1.59 daniel 6184: * @size: the size of the array
6185: *
6186: * parse an XML in-memory block and build a tree.
6187: * In the case the document is not Well Formed, a tree is built anyway
6188: *
1.68 daniel 6189: * Returns the resulting document tree
1.59 daniel 6190: */
6191:
6192: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
6193: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.17 daniel 6194: }
6195:
6196:
1.50 daniel 6197: /**
6198: * xmlSetupParserForBuffer:
6199: * @ctxt: an XML parser context
6200: * @buffer: a CHAR * buffer
6201: * @filename: a file name
6202: *
1.19 daniel 6203: * Setup the parser context to parse a new buffer; Clears any prior
6204: * contents from the parser context. The buffer parameter must not be
6205: * NULL, but the filename parameter can be
6206: */
1.55 daniel 6207: void
6208: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 6209: const char* filename)
6210: {
1.96 daniel 6211: xmlParserInputPtr input;
1.40 daniel 6212:
1.96 daniel 6213: input = xmlNewInputStream(ctxt);
6214: if (input == NULL) {
6215: perror("malloc");
6216: free(ctxt);
6217: exit(1);
6218: }
6219:
6220: xmlClearParserCtxt(ctxt);
6221: if (filename != NULL)
6222: input->filename = strdup(filename);
6223: input->base = buffer;
6224: input->cur = buffer;
6225: inputPush(ctxt, input);
1.17 daniel 6226: }
6227:
1.32 daniel 6228:
1.98 daniel 6229: /************************************************************************
6230: * *
6231: * Miscelaneous *
6232: * *
6233: ************************************************************************/
6234:
6235:
1.50 daniel 6236: /**
6237: * xmlParserFindNodeInfo:
6238: * @ctxt: an XML parser context
6239: * @node: an XML node within the tree
6240: *
6241: * Find the parser node info struct for a given node
6242: *
1.68 daniel 6243: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 6244: */
6245: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
6246: const xmlNode* node)
6247: {
6248: unsigned long pos;
6249:
6250: /* Find position where node should be at */
6251: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
6252: if ( ctx->node_seq.buffer[pos].node == node )
6253: return &ctx->node_seq.buffer[pos];
6254: else
6255: return NULL;
6256: }
6257:
6258:
1.50 daniel 6259: /**
6260: * xmlInitNodeInfoSeq :
6261: * @seq: a node info sequence pointer
6262: *
6263: * -- Initialize (set to initial state) node info sequence
1.32 daniel 6264: */
1.55 daniel 6265: void
6266: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6267: {
6268: seq->length = 0;
6269: seq->maximum = 0;
6270: seq->buffer = NULL;
6271: }
6272:
1.50 daniel 6273: /**
6274: * xmlClearNodeInfoSeq :
6275: * @seq: a node info sequence pointer
6276: *
6277: * -- Clear (release memory and reinitialize) node
1.32 daniel 6278: * info sequence
6279: */
1.55 daniel 6280: void
6281: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 6282: {
6283: if ( seq->buffer != NULL )
6284: free(seq->buffer);
6285: xmlInitNodeInfoSeq(seq);
6286: }
6287:
6288:
1.50 daniel 6289: /**
6290: * xmlParserFindNodeInfoIndex:
6291: * @seq: a node info sequence pointer
6292: * @node: an XML node pointer
6293: *
6294: *
1.32 daniel 6295: * xmlParserFindNodeInfoIndex : Find the index that the info record for
6296: * the given node is or should be at in a sorted sequence
1.68 daniel 6297: *
6298: * Returns a long indicating the position of the record
1.32 daniel 6299: */
6300: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
6301: const xmlNode* node)
6302: {
6303: unsigned long upper, lower, middle;
6304: int found = 0;
6305:
6306: /* Do a binary search for the key */
6307: lower = 1;
6308: upper = seq->length;
6309: middle = 0;
6310: while ( lower <= upper && !found) {
6311: middle = lower + (upper - lower) / 2;
6312: if ( node == seq->buffer[middle - 1].node )
6313: found = 1;
6314: else if ( node < seq->buffer[middle - 1].node )
6315: upper = middle - 1;
6316: else
6317: lower = middle + 1;
6318: }
6319:
6320: /* Return position */
6321: if ( middle == 0 || seq->buffer[middle - 1].node < node )
6322: return middle;
6323: else
6324: return middle - 1;
6325: }
6326:
6327:
1.50 daniel 6328: /**
6329: * xmlParserAddNodeInfo:
6330: * @ctxt: an XML parser context
1.68 daniel 6331: * @info: a node info sequence pointer
1.50 daniel 6332: *
6333: * Insert node info record into the sorted sequence
1.32 daniel 6334: */
1.55 daniel 6335: void
6336: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 6337: const xmlParserNodeInfo* info)
1.32 daniel 6338: {
6339: unsigned long pos;
6340: static unsigned int block_size = 5;
6341:
6342: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 6343: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
6344: if ( pos < ctxt->node_seq.length
6345: && ctxt->node_seq.buffer[pos].node == info->node ) {
6346: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 6347: }
6348:
6349: /* Otherwise, we need to add new node to buffer */
6350: else {
6351: /* Expand buffer by 5 if needed */
1.55 daniel 6352: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 6353: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 6354: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
6355: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 6356:
1.55 daniel 6357: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 6358: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
6359: else
1.55 daniel 6360: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 6361:
6362: if ( tmp_buffer == NULL ) {
1.55 daniel 6363: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 6364: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.32 daniel 6365: return;
6366: }
1.55 daniel 6367: ctxt->node_seq.buffer = tmp_buffer;
6368: ctxt->node_seq.maximum += block_size;
1.32 daniel 6369: }
6370:
6371: /* If position is not at end, move elements out of the way */
1.55 daniel 6372: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 6373: unsigned long i;
6374:
1.55 daniel 6375: for ( i = ctxt->node_seq.length; i > pos; i-- )
6376: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 6377: }
6378:
6379: /* Copy element and increase length */
1.55 daniel 6380: ctxt->node_seq.buffer[pos] = *info;
6381: ctxt->node_seq.length++;
1.32 daniel 6382: }
6383: }
1.77 daniel 6384:
1.98 daniel 6385:
6386: /**
6387: * xmlSubstituteEntitiesDefault :
6388: * @val: int 0 or 1
6389: *
6390: * Set and return the previous value for default entity support.
6391: * Initially the parser always keep entity references instead of substituting
6392: * entity values in the output. This function has to be used to change the
6393: * default parser behaviour
6394: * SAX::subtituteEntities() has to be used for changing that on a file by
6395: * file basis.
6396: *
6397: * Returns the last value for 0 for no substitution, 1 for substitution.
6398: */
6399:
6400: int
6401: xmlSubstituteEntitiesDefault(int val) {
6402: int old = xmlSubstituteEntitiesDefaultValue;
6403:
6404: xmlSubstituteEntitiesDefaultValue = val;
6405: return(old);
6406: }
1.77 daniel 6407:
Webmaster