Annotation of XML/parser.c, revision 1.89
1.1 veillard 1: /*
1.3 veillard 2: * parser.c : an XML 1.0 non-verifying parser
1.15 veillard 3: *
4: * See Copyright for the status of this software.
5: *
1.60 daniel 6: * Daniel.Veillard@w3.org
1.1 veillard 7: */
8:
1.26 daniel 9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.9 httpng 13: #include <config.h>
1.26 daniel 14: #endif
1.1 veillard 15: #include <stdio.h>
16: #include <ctype.h>
1.14 veillard 17: #include <string.h> /* for memset() only */
1.50 daniel 18: #include <stdlib.h>
1.9 httpng 19: #include <sys/stat.h>
20: #ifdef HAVE_FCNTL_H
21: #include <fcntl.h>
22: #endif
1.10 httpng 23: #ifdef HAVE_UNISTD_H
24: #include <unistd.h>
25: #endif
1.20 daniel 26: #ifdef HAVE_ZLIB_H
27: #include <zlib.h>
28: #endif
1.1 veillard 29:
1.14 veillard 30: #include "tree.h"
1.1 veillard 31: #include "parser.h"
1.14 veillard 32: #include "entities.h"
1.75 daniel 33: #include "encoding.h"
1.61 daniel 34: #include "valid.h"
1.69 daniel 35: #include "parserInternals.h"
1.1 veillard 36:
1.86 daniel 37: const char *xmlParserVersion = LIBXML_VERSION;
38:
1.45 daniel 39: /************************************************************************
40: * *
41: * Parser stacks related functions and macros *
42: * *
43: ************************************************************************/
1.79 daniel 44:
45: int xmlSubstituteEntitiesDefaultValue = 0;
46:
1.1 veillard 47: /*
1.40 daniel 48: * Generic function for accessing stacks in the Parser Context
1.1 veillard 49: */
50:
1.31 daniel 51: #define PUSH_AND_POP(type, name) \
1.72 daniel 52: extern int name##Push(xmlParserCtxtPtr ctxt, type value) { \
1.31 daniel 53: if (ctxt->name##Nr >= ctxt->name##Max) { \
54: ctxt->name##Max *= 2; \
1.40 daniel 55: ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
56: ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
57: if (ctxt->name##Tab == NULL) { \
1.31 daniel 58: fprintf(stderr, "realloc failed !\n"); \
59: exit(1); \
60: } \
61: } \
1.40 daniel 62: ctxt->name##Tab[ctxt->name##Nr] = value; \
63: ctxt->name = value; \
64: return(ctxt->name##Nr++); \
1.31 daniel 65: } \
1.72 daniel 66: extern type name##Pop(xmlParserCtxtPtr ctxt) { \
1.69 daniel 67: type ret; \
1.40 daniel 68: if (ctxt->name##Nr <= 0) return(0); \
69: ctxt->name##Nr--; \
1.50 daniel 70: if (ctxt->name##Nr > 0) \
71: ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
72: else \
73: ctxt->name = NULL; \
1.69 daniel 74: ret = ctxt->name##Tab[ctxt->name##Nr]; \
75: ctxt->name##Tab[ctxt->name##Nr] = 0; \
76: return(ret); \
1.31 daniel 77: } \
78:
1.40 daniel 79: PUSH_AND_POP(xmlParserInputPtr, input)
1.41 daniel 80: PUSH_AND_POP(xmlNodePtr, node)
1.40 daniel 81:
1.55 daniel 82: /*
83: * Macros for accessing the content. Those should be used only by the parser,
84: * and not exported.
85: *
86: * Dirty macros, i.e. one need to make assumption on the context to use them
87: *
88: * CUR_PTR return the current pointer to the CHAR to be parsed.
89: * CUR returns the current CHAR value, i.e. a 8 bit value if compiled
90: * in ISO-Latin or UTF-8, and the current 16 bit value if compiled
91: * in UNICODE mode. This should be used internally by the parser
92: * only to compare to ASCII values otherwise it would break when
93: * running with UTF-8 encoding.
94: * NXT(n) returns the n'th next CHAR. Same as CUR is should be used only
95: * to compare on ASCII based substring.
96: * SKIP(n) Skip n CHAR, and must also be used only to skip ASCII defined
97: * strings within the parser.
98: *
1.77 daniel 99: * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1.55 daniel 100: *
101: * CURRENT Returns the current char value, with the full decoding of
102: * UTF-8 if we are using this mode. It returns an int.
103: * NEXT Skip to the next character, this does the proper decoding
104: * in UTF-8 mode. It also pop-up unfinished entities on the fly.
105: * It returns the pointer to the current CHAR.
1.77 daniel 106: * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
1.55 daniel 107: */
1.45 daniel 108:
109: #define CUR (*ctxt->input->cur)
1.55 daniel 110: #define SKIP(val) ctxt->input->cur += (val)
111: #define NXT(val) ctxt->input->cur[(val)]
112: #define CUR_PTR ctxt->input->cur
113:
114: #define SKIP_BLANKS \
115: while (IS_BLANK(*(ctxt->input->cur))) NEXT
116:
117: #ifndef USE_UTF_8
118: #define CURRENT (*ctxt->input->cur)
1.45 daniel 119: #define NEXT ((*ctxt->input->cur) ? \
120: (((*(ctxt->input->cur) == '\n') ? \
121: (ctxt->input->line++, ctxt->input->col = 1) : \
122: (ctxt->input->col++)), ctxt->input->cur++) : \
123: (xmlPopInput(ctxt), ctxt->input->cur))
1.55 daniel 124: #else
125: #endif
1.42 daniel 126:
1.40 daniel 127:
1.50 daniel 128: /**
129: * xmlPopInput:
130: * @ctxt: an XML parser context
131: *
1.40 daniel 132: * xmlPopInput: the current input pointed by ctxt->input came to an end
133: * pop it and return the next char.
1.45 daniel 134: *
135: * TODO A deallocation of the popped Input structure is needed
1.68 daniel 136: *
137: * Returns the current CHAR in the parser context
1.40 daniel 138: */
1.55 daniel 139: CHAR
140: xmlPopInput(xmlParserCtxtPtr ctxt) {
1.40 daniel 141: if (ctxt->inputNr == 1) return(0); /* End of main Input */
1.69 daniel 142: xmlFreeInputStream(inputPop(ctxt));
1.40 daniel 143: return(CUR);
144: }
145:
1.50 daniel 146: /**
147: * xmlPushInput:
148: * @ctxt: an XML parser context
149: * @input: an XML parser input fragment (entity, XML fragment ...).
150: *
1.40 daniel 151: * xmlPushInput: switch to a new input stream which is stacked on top
152: * of the previous one(s).
153: */
1.55 daniel 154: void
155: xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
1.40 daniel 156: if (input == NULL) return;
157: inputPush(ctxt, input);
158: }
159:
1.50 daniel 160: /**
1.69 daniel 161: * xmlFreeInputStream:
162: * @input: an xmlParserInputPtr
163: *
164: * Free up an input stream.
165: */
166: void
167: xmlFreeInputStream(xmlParserInputPtr input) {
168: if (input == NULL) return;
169:
170: if (input->filename != NULL) free((char *) input->filename);
171: if ((input->free != NULL) && (input->base != NULL))
172: input->free((char *) input->base);
173: memset(input, -1, sizeof(xmlParserInput));
174: free(input);
175: }
176:
177: /**
1.50 daniel 178: * xmlNewEntityInputStream:
179: * @ctxt: an XML parser context
180: * @entity: an Entity pointer
181: *
1.82 daniel 182: * Create a new input stream based on an xmlEntityPtr
1.68 daniel 183: * Returns the new input stream
1.45 daniel 184: */
1.50 daniel 185: xmlParserInputPtr
186: xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 187: xmlParserInputPtr input;
188:
189: if (entity == NULL) {
1.55 daniel 190: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 191: ctxt->sax->error(ctxt->userData,
1.45 daniel 192: "internal: xmlNewEntityInputStream entity = NULL\n");
1.50 daniel 193: return(NULL);
1.45 daniel 194: }
195: if (entity->content == NULL) {
1.55 daniel 196: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 197: ctxt->sax->error(ctxt->userData,
1.45 daniel 198: "internal: xmlNewEntityInputStream entity->input = NULL\n");
1.50 daniel 199: return(NULL);
1.45 daniel 200: }
201: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
202: if (input == NULL) {
1.55 daniel 203: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 204: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
1.50 daniel 205: return(NULL);
1.45 daniel 206: }
207: input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
208: input->base = entity->content;
209: input->cur = entity->content;
210: input->line = 1;
211: input->col = 1;
1.69 daniel 212: input->free = NULL;
1.50 daniel 213: return(input);
1.45 daniel 214: }
215:
1.59 daniel 216: /**
217: * xmlNewStringInputStream:
218: * @ctxt: an XML parser context
1.82 daniel 219: * @entity: an Entity memory buffer
1.59 daniel 220: *
221: * Create a new input stream based on a memory buffer.
1.68 daniel 222: * Returns the new input stream
1.59 daniel 223: */
224: xmlParserInputPtr
1.82 daniel 225: xmlNewStringInputStream(xmlParserCtxtPtr ctxt, CHAR *entity) {
1.59 daniel 226: xmlParserInputPtr input;
227:
1.82 daniel 228: if (entity == NULL) {
1.59 daniel 229: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 230: ctxt->sax->error(ctxt->userData,
1.59 daniel 231: "internal: xmlNewStringInputStream string = NULL\n");
232: return(NULL);
233: }
234: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
235: if (input == NULL) {
236: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 237: ctxt->sax->error(ctxt->userData, "malloc: couldn't allocate a new input stream\n");
1.59 daniel 238: return(NULL);
239: }
240: input->filename = NULL;
1.82 daniel 241: input->base = entity;
242: input->cur = entity;
1.59 daniel 243: input->line = 1;
244: input->col = 1;
1.69 daniel 245: input->free = NULL;
1.59 daniel 246: return(input);
247: }
248:
1.76 daniel 249: /**
250: * xmlNewInputFromFile:
251: * @ctxt: an XML parser context
252: * @filename: the filename to use as entity
253: *
254: * Create a new input stream based on a file.
255: *
256: * Returns the new input stream or NULL in case of error
257: */
258: xmlParserInputPtr
1.79 daniel 259: xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1.76 daniel 260: #ifdef HAVE_ZLIB_H
261: gzFile input;
262: #else
263: int input;
264: #endif
265: int res;
266: int len;
1.86 daniel 267: int cnt;
1.76 daniel 268: struct stat buf;
1.86 daniel 269: char *buffer, *nbuf;
1.76 daniel 270: xmlParserInputPtr inputStream;
1.77 daniel 271: /* xmlCharEncoding enc; */
1.76 daniel 272:
1.86 daniel 273: #define MINLEN 40000
1.76 daniel 274:
1.86 daniel 275: if (strcmp(filename,"-") == 0) {
1.76 daniel 276: #ifdef HAVE_ZLIB_H
1.86 daniel 277: input = gzdopen (fileno(stdin), "r");
278: if (input == NULL) {
279: fprintf (stderr, "Cannot read from stdin\n");
280: perror ("gzdopen failed");
281: return(NULL);
282: }
1.76 daniel 283: #else
1.86 daniel 284: #ifdef WIN32
285: input = -1;
286: #else
287: input = fileno(stdin);
1.76 daniel 288: #endif
1.86 daniel 289: if (input < 0) {
290: fprintf (stderr, "Cannot read from stdin\n");
291: perror ("open failed");
1.76 daniel 292: return(NULL);
293: }
1.86 daniel 294: #endif
295: len = MINLEN;
296: } else {
1.76 daniel 297: #ifdef HAVE_ZLIB_H
1.86 daniel 298: input = gzopen (filename, "r");
299: if (input == NULL) {
300: fprintf (stderr, "Cannot read file %s :\n", filename);
301: perror ("gzopen failed");
302: return(NULL);
303: }
1.76 daniel 304: #else
305: #ifdef WIN32
1.86 daniel 306: input = _open (filename, O_RDONLY | _O_BINARY);
1.76 daniel 307: #else
1.86 daniel 308: input = open (filename, O_RDONLY);
309: #endif
310: if (input < 0) {
311: fprintf (stderr, "Cannot read file %s :\n", filename);
312: perror ("open failed");
313: return(NULL);
314: }
1.76 daniel 315: #endif
1.86 daniel 316: res = stat(filename, &buf);
317: if (res < 0)
318: return(NULL);
1.87 daniel 319: len = buf.st_size;
1.86 daniel 320: if (len < MINLEN)
321: len = MINLEN;
322: }
1.87 daniel 323: buffer = (char *)malloc((len+1)*sizeof(char));
1.86 daniel 324: if (buffer == NULL) {
325: fprintf (stderr, "Cannot malloc\n");
326: perror ("malloc failed");
327: return(NULL);
1.76 daniel 328: }
1.86 daniel 329:
330: cnt = 0;
331: while(1) {
332: if (cnt >= len) {
333: len *= 2;
1.87 daniel 334: nbuf = (char *)realloc(buffer,(len+1)*sizeof(char));
1.86 daniel 335: if (nbuf == NULL) {
336: fprintf(stderr,"Cannot realloc\n");
337: free(buffer);
338: perror ("realloc failed");
339: return(NULL);
340: }
341: buffer = nbuf;
342: }
1.76 daniel 343: #ifdef HAVE_ZLIB_H
1.86 daniel 344: res = gzread(input, &buffer[cnt], len-cnt);
1.76 daniel 345: #else
1.86 daniel 346: res = read(input, &buffer[cnt], len-cnt);
1.76 daniel 347: #endif
1.86 daniel 348: if (res < 0) {
349: fprintf (stderr, "Cannot read file %s :\n", filename);
1.76 daniel 350: #ifdef HAVE_ZLIB_H
1.86 daniel 351: perror ("gzread failed");
1.76 daniel 352: #else
1.86 daniel 353: perror ("read failed");
1.76 daniel 354: #endif
1.86 daniel 355: return(NULL);
356: }
357: if (res == 0)
358: break;
359: cnt += res;
1.76 daniel 360: }
361: #ifdef HAVE_ZLIB_H
362: gzclose(input);
363: #else
364: close(input);
365: #endif
366:
1.86 daniel 367: buffer[cnt] = '\0';
1.76 daniel 368:
369: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
370: if (inputStream == NULL) {
371: perror("malloc");
372: free(ctxt);
373: return(NULL);
374: }
375:
376: inputStream->filename = strdup(filename);
377: inputStream->line = 1;
378: inputStream->col = 1;
379:
380: /*
381: * plug some encoding conversion routines here. !!!
382: enc = xmlDetectCharEncoding(buffer);
383: xmlSwitchEncoding(ctxt, enc);
384: */
385:
386: inputStream->base = buffer;
387: inputStream->cur = buffer;
388: inputStream->free = (xmlParserInputDeallocate) free;
389:
390: return(inputStream);
391: }
392:
1.77 daniel 393: /************************************************************************
394: * *
395: * Commodity functions to handle entities *
396: * *
397: ************************************************************************/
398:
399: /*
400: * Macro used to grow the current buffer.
401: */
1.78 daniel 402: #define growBuffer(buffer) { \
403: buffer##_size *= 2; \
404: buffer = (CHAR *) realloc(buffer, buffer##_size * sizeof(CHAR)); \
1.77 daniel 405: if (buffer == NULL) { \
406: perror("realloc failed"); \
407: exit(1); \
408: } \
409: }
410:
411:
412: /**
413: * xmlDecodeEntities:
414: * @ctxt: the parser context
415: * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
416: * @len: the len to decode (in bytes !), -1 for no size limit
417: * @end: an end marker CHAR, 0 if none
418: * @end2: an end marker CHAR, 0 if none
419: * @end3: an end marker CHAR, 0 if none
420: *
421: * [67] Reference ::= EntityRef | CharRef
422: *
423: * [69] PEReference ::= '%' Name ';'
424: *
425: * Returns A newly allocated string with the substitution done. The caller
426: * must deallocate it !
427: */
428: CHAR *
429: xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
430: CHAR end, CHAR end2, CHAR end3) {
431: CHAR *buffer = NULL;
1.78 daniel 432: int buffer_size = 0;
1.77 daniel 433: CHAR *out = NULL;
1.78 daniel 434:
1.77 daniel 435: CHAR *cur = NULL;
436: xmlEntityPtr ent;
437: const CHAR *start = CUR_PTR;
438: unsigned int max = (unsigned int) len;
439:
440: /*
441: * allocate a translation buffer.
442: */
443: buffer_size = 1000;
444: buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
445: if (buffer == NULL) {
446: perror("xmlDecodeEntities: malloc failed");
447: return(NULL);
448: }
449: out = buffer;
450:
1.78 daniel 451: /*
452: * Ok loop until we reach one of the ending char or a size limit.
453: */
1.77 daniel 454: while ((CUR_PTR - start < max) && (CUR != end) &&
455: (CUR != end2) && (CUR != end3)) {
456:
457: if (CUR == '&' && (what & XML_SUBSTITUTE_REF)) {
458: if (NXT(1) == '#') {
459: int val = xmlParseCharRef(ctxt);
460: /* TODO: invalid for UTF-8 variable encoding !!! */
461: *out++ = val;
462: } else {
463: ent = xmlParseEntityRef(ctxt);
464: if (ent != NULL) {
465: cur = ent->content;
466: while (*cur != 0) {
467: *out++ = *cur++;
468: if (out - buffer > buffer_size - 100) {
469: int index = out - buffer;
470:
1.78 daniel 471: growBuffer(buffer);
1.77 daniel 472: out = &buffer[index];
473: }
474: }
475: }
476: }
477: } else if (CUR == '%' && (what & XML_SUBSTITUTE_PEREF)) {
478: /*
479: * a PEReference induce to switch the entity flow,
480: * we break here to flush the current set of chars
481: * parsed if any. We will be called back later.
482: */
483: if (CUR_PTR != start) break;
484:
485: xmlParsePEReference(ctxt);
1.79 daniel 486:
487: /*
488: * Pop-up of finished entities.
489: */
490: while ((CUR == 0) && (ctxt->inputNr > 1))
491: xmlPopInput(ctxt);
492:
1.78 daniel 493: break;
1.77 daniel 494: } else {
495: /* TODO: invalid for UTF-8 , use COPY(out); */
496: *out++ = CUR;
1.86 daniel 497: if (out - buffer > buffer_size - 100) {
498: int index = out - buffer;
499:
500: growBuffer(buffer);
501: out = &buffer[index];
502: }
1.77 daniel 503: NEXT;
504: }
505: }
506: *out++ = 0;
507: return(buffer);
508: }
509:
1.1 veillard 510:
1.28 daniel 511: /************************************************************************
512: * *
1.75 daniel 513: * Commodity functions to handle encodings *
514: * *
515: ************************************************************************/
516:
517: /**
518: * xmlSwitchEncoding:
519: * @ctxt: the parser context
520: * @len: the len of @cur
521: *
522: * change the input functions when discovering the character encoding
523: * of a given entity.
524: *
525: */
526: void
527: xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
528: {
529: switch (enc) {
530: case XML_CHAR_ENCODING_ERROR:
531: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
532: ctxt->sax->error(ctxt->userData, "encoding unknown\n");
533: ctxt->wellFormed = 0;
534: break;
535: case XML_CHAR_ENCODING_NONE:
536: /* let's assume it's UTF-8 without the XML decl */
537: return;
538: case XML_CHAR_ENCODING_UTF8:
539: /* default encoding, no conversion should be needed */
540: return;
541: case XML_CHAR_ENCODING_UTF16LE:
542: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
543: ctxt->sax->error(ctxt->userData,
544: "char encoding UTF16 little endian not supported\n");
545: break;
546: case XML_CHAR_ENCODING_UTF16BE:
547: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
548: ctxt->sax->error(ctxt->userData,
549: "char encoding UTF16 big endian not supported\n");
550: break;
551: case XML_CHAR_ENCODING_UCS4LE:
552: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
553: ctxt->sax->error(ctxt->userData,
554: "char encoding USC4 little endian not supported\n");
555: break;
556: case XML_CHAR_ENCODING_UCS4BE:
557: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
558: ctxt->sax->error(ctxt->userData,
559: "char encoding USC4 big endian not supported\n");
560: break;
561: case XML_CHAR_ENCODING_EBCDIC:
562: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
563: ctxt->sax->error(ctxt->userData,
564: "char encoding EBCDIC not supported\n");
565: break;
566: case XML_CHAR_ENCODING_UCS4_2143:
567: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
568: ctxt->sax->error(ctxt->userData,
569: "char encoding UCS4 2143 not supported\n");
570: break;
571: case XML_CHAR_ENCODING_UCS4_3412:
572: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
573: ctxt->sax->error(ctxt->userData,
574: "char encoding UCS4 3412 not supported\n");
575: break;
576: case XML_CHAR_ENCODING_UCS2:
577: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
578: ctxt->sax->error(ctxt->userData,
579: "char encoding UCS2 not supported\n");
580: break;
581: case XML_CHAR_ENCODING_8859_1:
582: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
583: ctxt->sax->error(ctxt->userData,
584: "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
585: break;
586: case XML_CHAR_ENCODING_8859_2:
587: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
588: ctxt->sax->error(ctxt->userData,
589: "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
590: break;
591: case XML_CHAR_ENCODING_8859_3:
592: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
593: ctxt->sax->error(ctxt->userData,
594: "char encoding ISO_8859_3 not supported\n");
595: break;
596: case XML_CHAR_ENCODING_8859_4:
597: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
598: ctxt->sax->error(ctxt->userData,
599: "char encoding ISO_8859_4 not supported\n");
600: break;
601: case XML_CHAR_ENCODING_8859_5:
602: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
603: ctxt->sax->error(ctxt->userData,
604: "char encoding ISO_8859_5 not supported\n");
605: break;
606: case XML_CHAR_ENCODING_8859_6:
607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
608: ctxt->sax->error(ctxt->userData,
609: "char encoding ISO_8859_6 not supported\n");
610: break;
611: case XML_CHAR_ENCODING_8859_7:
612: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
613: ctxt->sax->error(ctxt->userData,
614: "char encoding ISO_8859_7 not supported\n");
615: break;
616: case XML_CHAR_ENCODING_8859_8:
617: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
618: ctxt->sax->error(ctxt->userData,
619: "char encoding ISO_8859_8 not supported\n");
620: break;
621: case XML_CHAR_ENCODING_8859_9:
622: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
623: ctxt->sax->error(ctxt->userData,
624: "char encoding ISO_8859_9 not supported\n");
625: break;
626: case XML_CHAR_ENCODING_2022_JP:
627: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
628: ctxt->sax->error(ctxt->userData,
629: "char encoding ISO-2022-JPnot supported\n");
630: break;
631: case XML_CHAR_ENCODING_SHIFT_JIS:
632: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
633: ctxt->sax->error(ctxt->userData,
634: "char encoding Shift_JISnot supported\n");
635: break;
636: case XML_CHAR_ENCODING_EUC_JP:
637: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
638: ctxt->sax->error(ctxt->userData,
639: "char encoding EUC-JPnot supported\n");
640: break;
641: }
642: }
643:
644: /************************************************************************
645: * *
1.28 daniel 646: * Commodity functions to handle CHARs *
647: * *
648: ************************************************************************/
649:
1.50 daniel 650: /**
651: * xmlStrndup:
652: * @cur: the input CHAR *
653: * @len: the len of @cur
654: *
655: * a strndup for array of CHAR's
1.68 daniel 656: *
657: * Returns a new CHAR * or NULL
1.1 veillard 658: */
1.55 daniel 659: CHAR *
660: xmlStrndup(const CHAR *cur, int len) {
1.1 veillard 661: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
662:
663: if (ret == NULL) {
1.86 daniel 664: fprintf(stderr, "malloc of %ld byte failed\n",
665: (len + 1) * (long)sizeof(CHAR));
1.1 veillard 666: return(NULL);
667: }
668: memcpy(ret, cur, len * sizeof(CHAR));
669: ret[len] = 0;
670: return(ret);
671: }
672:
1.50 daniel 673: /**
674: * xmlStrdup:
675: * @cur: the input CHAR *
676: *
677: * a strdup for array of CHAR's
1.68 daniel 678: *
679: * Returns a new CHAR * or NULL
1.1 veillard 680: */
1.55 daniel 681: CHAR *
682: xmlStrdup(const CHAR *cur) {
1.6 httpng 683: const CHAR *p = cur;
1.1 veillard 684:
685: while (IS_CHAR(*p)) p++;
686: return(xmlStrndup(cur, p - cur));
687: }
688:
1.50 daniel 689: /**
690: * xmlCharStrndup:
691: * @cur: the input char *
692: * @len: the len of @cur
693: *
694: * a strndup for char's to CHAR's
1.68 daniel 695: *
696: * Returns a new CHAR * or NULL
1.45 daniel 697: */
698:
1.55 daniel 699: CHAR *
700: xmlCharStrndup(const char *cur, int len) {
1.45 daniel 701: int i;
702: CHAR *ret = malloc((len + 1) * sizeof(CHAR));
703:
704: if (ret == NULL) {
1.86 daniel 705: fprintf(stderr, "malloc of %ld byte failed\n",
706: (len + 1) * (long)sizeof(CHAR));
1.45 daniel 707: return(NULL);
708: }
709: for (i = 0;i < len;i++)
710: ret[i] = (CHAR) cur[i];
711: ret[len] = 0;
712: return(ret);
713: }
714:
1.50 daniel 715: /**
716: * xmlCharStrdup:
717: * @cur: the input char *
718: * @len: the len of @cur
719: *
720: * a strdup for char's to CHAR's
1.68 daniel 721: *
722: * Returns a new CHAR * or NULL
1.45 daniel 723: */
724:
1.55 daniel 725: CHAR *
726: xmlCharStrdup(const char *cur) {
1.45 daniel 727: const char *p = cur;
728:
729: while (*p != '\0') p++;
730: return(xmlCharStrndup(cur, p - cur));
731: }
732:
1.50 daniel 733: /**
734: * xmlStrcmp:
735: * @str1: the first CHAR *
736: * @str2: the second CHAR *
737: *
738: * a strcmp for CHAR's
1.68 daniel 739: *
740: * Returns the integer result of the comparison
1.14 veillard 741: */
742:
1.55 daniel 743: int
744: xmlStrcmp(const CHAR *str1, const CHAR *str2) {
1.14 veillard 745: register int tmp;
746:
747: do {
748: tmp = *str1++ - *str2++;
749: if (tmp != 0) return(tmp);
750: } while ((*str1 != 0) && (*str2 != 0));
751: return (*str1 - *str2);
752: }
753:
1.50 daniel 754: /**
755: * xmlStrncmp:
756: * @str1: the first CHAR *
757: * @str2: the second CHAR *
758: * @len: the max comparison length
759: *
760: * a strncmp for CHAR's
1.68 daniel 761: *
762: * Returns the integer result of the comparison
1.14 veillard 763: */
764:
1.55 daniel 765: int
766: xmlStrncmp(const CHAR *str1, const CHAR *str2, int len) {
1.14 veillard 767: register int tmp;
768:
769: if (len <= 0) return(0);
770: do {
771: tmp = *str1++ - *str2++;
772: if (tmp != 0) return(tmp);
773: len--;
774: if (len <= 0) return(0);
775: } while ((*str1 != 0) && (*str2 != 0));
776: return (*str1 - *str2);
777: }
778:
1.50 daniel 779: /**
780: * xmlStrchr:
781: * @str: the CHAR * array
782: * @val: the CHAR to search
783: *
784: * a strchr for CHAR's
1.68 daniel 785: *
786: * Returns the CHAR * for the first occurence or NULL.
1.14 veillard 787: */
788:
1.89 ! daniel 789: const CHAR *
1.55 daniel 790: xmlStrchr(const CHAR *str, CHAR val) {
1.14 veillard 791: while (*str != 0) {
792: if (*str == val) return((CHAR *) str);
793: str++;
794: }
795: return(NULL);
1.89 ! daniel 796: }
! 797:
! 798: /**
! 799: * xmlStrstr:
! 800: * @str: the CHAR * array (haystack)
! 801: * @val: the CHAR to search (needle)
! 802: *
! 803: * a strstr for CHAR's
! 804: *
! 805: * Returns the CHAR * for the first occurence or NULL.
! 806: */
! 807:
! 808: const CHAR *
! 809: xmlStrstr(const CHAR *str, CHAR *val) {
! 810: int n;
! 811:
! 812: if (str == NULL) return(NULL);
! 813: if (val == NULL) return(NULL);
! 814: n = xmlStrlen(val);
! 815:
! 816: if (n == 0) return(str);
! 817: while (*str != 0) {
! 818: if (*str == *val) {
! 819: if (!xmlStrncmp(str, val, n)) return((const CHAR *) str);
! 820: }
! 821: str++;
! 822: }
! 823: return(NULL);
! 824: }
! 825:
! 826: /**
! 827: * xmlStrsub:
! 828: * @str: the CHAR * array (haystack)
! 829: * @start: the index of the first char (zero based)
! 830: * @len: the length of the substring
! 831: *
! 832: * Extract a substring of a given string
! 833: *
! 834: * Returns the CHAR * for the first occurence or NULL.
! 835: */
! 836:
! 837: CHAR *
! 838: xmlStrsub(const CHAR *str, int start, int len) {
! 839: int i;
! 840:
! 841: if (str == NULL) return(NULL);
! 842: if (start < 0) return(NULL);
! 843:
! 844: for (i = 0;i < start;i++) {
! 845: if (*str == 0) return(NULL);
! 846: str++;
! 847: }
! 848: if (*str == 0) return(NULL);
! 849: return(xmlStrndup(str, len));
1.14 veillard 850: }
1.28 daniel 851:
1.50 daniel 852: /**
853: * xmlStrlen:
854: * @str: the CHAR * array
855: *
856: * lenght of a CHAR's string
1.68 daniel 857: *
858: * Returns the number of CHAR contained in the ARRAY.
1.45 daniel 859: */
860:
1.55 daniel 861: int
862: xmlStrlen(const CHAR *str) {
1.45 daniel 863: int len = 0;
864:
865: if (str == NULL) return(0);
866: while (*str != 0) {
867: str++;
868: len++;
869: }
870: return(len);
871: }
872:
1.50 daniel 873: /**
874: * xmlStrncat:
1.68 daniel 875: * @cur: the original CHAR * array
1.50 daniel 876: * @add: the CHAR * array added
877: * @len: the length of @add
878: *
879: * a strncat for array of CHAR's
1.68 daniel 880: *
881: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 882: */
883:
1.55 daniel 884: CHAR *
885: xmlStrncat(CHAR *cur, const CHAR *add, int len) {
1.45 daniel 886: int size;
887: CHAR *ret;
888:
889: if ((add == NULL) || (len == 0))
890: return(cur);
891: if (cur == NULL)
892: return(xmlStrndup(add, len));
893:
894: size = xmlStrlen(cur);
895: ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
896: if (ret == NULL) {
1.86 daniel 897: fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
898: (size + len + 1) * (long)sizeof(CHAR));
1.45 daniel 899: return(cur);
900: }
901: memcpy(&ret[size], add, len * sizeof(CHAR));
902: ret[size + len] = 0;
903: return(ret);
904: }
905:
1.50 daniel 906: /**
907: * xmlStrcat:
1.68 daniel 908: * @cur: the original CHAR * array
1.50 daniel 909: * @add: the CHAR * array added
910: *
911: * a strcat for array of CHAR's
1.68 daniel 912: *
913: * Returns a new CHAR * containing the concatenated string.
1.45 daniel 914: */
1.55 daniel 915: CHAR *
916: xmlStrcat(CHAR *cur, const CHAR *add) {
1.45 daniel 917: const CHAR *p = add;
918:
919: if (add == NULL) return(cur);
920: if (cur == NULL)
921: return(xmlStrdup(add));
922:
923: while (IS_CHAR(*p)) p++;
924: return(xmlStrncat(cur, add, p - add));
925: }
926:
927: /************************************************************************
928: * *
929: * Commodity functions, cleanup needed ? *
930: * *
931: ************************************************************************/
932:
1.50 daniel 933: /**
934: * areBlanks:
935: * @ctxt: an XML parser context
936: * @str: a CHAR *
937: * @len: the size of @str
938: *
1.45 daniel 939: * Is this a sequence of blank chars that one can ignore ?
1.50 daniel 940: *
941: * TODO: to be corrected accodingly to DTD information if available
1.68 daniel 942: *
943: * Returns 1 if ignorable 0 otherwise.
1.45 daniel 944: */
945:
946: static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
947: int i;
948: xmlNodePtr lastChild;
949:
950: for (i = 0;i < len;i++)
951: if (!(IS_BLANK(str[i]))) return(0);
952:
953: if (CUR != '<') return(0);
1.72 daniel 954: if (ctxt->node == NULL) return(0);
1.45 daniel 955: lastChild = xmlGetLastChild(ctxt->node);
956: if (lastChild == NULL) {
957: if (ctxt->node->content != NULL) return(0);
958: } else if (xmlNodeIsText(lastChild))
959: return(0);
960: return(1);
961: }
962:
1.50 daniel 963: /**
964: * xmlHandleEntity:
965: * @ctxt: an XML parser context
966: * @entity: an XML entity pointer.
967: *
968: * Default handling of defined entities, when should we define a new input
1.45 daniel 969: * stream ? When do we just handle that as a set of chars ?
1.50 daniel 970: * TODO: we should call the SAX handler here and have it resolve the issue
1.45 daniel 971: */
972:
1.55 daniel 973: void
974: xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1.45 daniel 975: int len;
1.50 daniel 976: xmlParserInputPtr input;
1.45 daniel 977:
978: if (entity->content == NULL) {
1.55 daniel 979: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 980: ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
1.45 daniel 981: entity->name);
1.59 daniel 982: ctxt->wellFormed = 0;
1.45 daniel 983: return;
984: }
985: len = xmlStrlen(entity->content);
986: if (len <= 2) goto handle_as_char;
987:
988: /*
989: * Redefine its content as an input stream.
990: */
1.50 daniel 991: input = xmlNewEntityInputStream(ctxt, entity);
992: xmlPushInput(ctxt, input);
1.45 daniel 993: return;
994:
995: handle_as_char:
996: /*
997: * Just handle the content as a set of chars.
998: */
1.72 daniel 999: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
1.74 daniel 1000: ctxt->sax->characters(ctxt->userData, entity->content, len);
1.45 daniel 1001:
1002: }
1003:
1004: /*
1005: * Forward definition for recusive behaviour.
1006: */
1.77 daniel 1007: void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1008: void xmlParseReference(xmlParserCtxtPtr ctxt);
1.45 daniel 1009:
1.28 daniel 1010: /************************************************************************
1011: * *
1012: * Extra stuff for namespace support *
1013: * Relates to http://www.w3.org/TR/WD-xml-names *
1014: * *
1015: ************************************************************************/
1016:
1.50 daniel 1017: /**
1018: * xmlNamespaceParseNCName:
1019: * @ctxt: an XML parser context
1020: *
1021: * parse an XML namespace name.
1.28 daniel 1022: *
1023: * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
1024: *
1025: * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
1026: * CombiningChar | Extender
1.68 daniel 1027: *
1028: * Returns the namespace name or NULL
1.28 daniel 1029: */
1030:
1.55 daniel 1031: CHAR *
1032: xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
1.28 daniel 1033: const CHAR *q;
1034: CHAR *ret = NULL;
1035:
1.40 daniel 1036: if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
1037: q = NEXT;
1.28 daniel 1038:
1.40 daniel 1039: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1040: (CUR == '.') || (CUR == '-') ||
1041: (CUR == '_') ||
1042: (IS_COMBINING(CUR)) ||
1043: (IS_EXTENDER(CUR)))
1044: NEXT;
1.28 daniel 1045:
1.40 daniel 1046: ret = xmlStrndup(q, CUR_PTR - q);
1.28 daniel 1047:
1048: return(ret);
1049: }
1050:
1.50 daniel 1051: /**
1052: * xmlNamespaceParseQName:
1053: * @ctxt: an XML parser context
1054: * @prefix: a CHAR **
1055: *
1056: * parse an XML qualified name
1.28 daniel 1057: *
1058: * [NS 5] QName ::= (Prefix ':')? LocalPart
1059: *
1060: * [NS 6] Prefix ::= NCName
1061: *
1062: * [NS 7] LocalPart ::= NCName
1.68 daniel 1063: *
1064: * Returns the function returns the local part, and prefix is updated
1.50 daniel 1065: * to get the Prefix if any.
1.28 daniel 1066: */
1067:
1.55 daniel 1068: CHAR *
1069: xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR **prefix) {
1.28 daniel 1070: CHAR *ret = NULL;
1071:
1072: *prefix = NULL;
1073: ret = xmlNamespaceParseNCName(ctxt);
1.40 daniel 1074: if (CUR == ':') {
1.28 daniel 1075: *prefix = ret;
1.40 daniel 1076: NEXT;
1.28 daniel 1077: ret = xmlNamespaceParseNCName(ctxt);
1078: }
1079:
1080: return(ret);
1081: }
1082:
1.50 daniel 1083: /**
1.72 daniel 1084: * xmlSplitQName:
1085: * @name: an XML parser context
1086: * @prefix: a CHAR **
1087: *
1088: * parse an XML qualified name string
1089: *
1090: * [NS 5] QName ::= (Prefix ':')? LocalPart
1091: *
1092: * [NS 6] Prefix ::= NCName
1093: *
1094: * [NS 7] LocalPart ::= NCName
1095: *
1096: * Returns the function returns the local part, and prefix is updated
1097: * to get the Prefix if any.
1098: */
1099:
1100: CHAR *
1101: xmlSplitQName(const CHAR *name, CHAR **prefix) {
1102: CHAR *ret = NULL;
1103: const CHAR *q;
1104: const CHAR *cur = name;
1105:
1106: *prefix = NULL;
1107: if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
1108: q = cur++;
1109:
1110: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1111: (*cur == '.') || (*cur == '-') ||
1112: (*cur == '_') ||
1113: (IS_COMBINING(*cur)) ||
1114: (IS_EXTENDER(*cur)))
1115: cur++;
1116:
1117: ret = xmlStrndup(q, cur - q);
1118:
1119: if (*cur == ':') {
1120: cur++;
1121: if (!IS_LETTER(*cur) && (*cur != '_')) return(ret);
1122: *prefix = ret;
1123:
1124: q = cur++;
1125:
1126: while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
1127: (*cur == '.') || (*cur == '-') ||
1128: (*cur == '_') ||
1129: (IS_COMBINING(*cur)) ||
1130: (IS_EXTENDER(*cur)))
1131: cur++;
1132:
1133: ret = xmlStrndup(q, cur - q);
1134: }
1135:
1136: return(ret);
1137: }
1138: /**
1.50 daniel 1139: * xmlNamespaceParseNSDef:
1140: * @ctxt: an XML parser context
1141: *
1142: * parse a namespace prefix declaration
1.28 daniel 1143: *
1144: * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
1145: *
1146: * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
1.68 daniel 1147: *
1148: * Returns the namespace name
1.28 daniel 1149: */
1150:
1.55 daniel 1151: CHAR *
1152: xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
1.28 daniel 1153: CHAR *name = NULL;
1154:
1.40 daniel 1155: if ((CUR == 'x') && (NXT(1) == 'm') &&
1156: (NXT(2) == 'l') && (NXT(3) == 'n') &&
1157: (NXT(4) == 's')) {
1158: SKIP(5);
1159: if (CUR == ':') {
1160: NEXT;
1.28 daniel 1161: name = xmlNamespaceParseNCName(ctxt);
1162: }
1163: }
1.39 daniel 1164: return(name);
1.28 daniel 1165: }
1166:
1.50 daniel 1167: /**
1168: * xmlParseQuotedString:
1169: * @ctxt: an XML parser context
1170: *
1.45 daniel 1171: * [OLD] Parse and return a string between quotes or doublequotes
1.68 daniel 1172: *
1173: * Returns the string parser or NULL.
1.45 daniel 1174: */
1.55 daniel 1175: CHAR *
1176: xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
1.45 daniel 1177: CHAR *ret = NULL;
1178: const CHAR *q;
1179:
1180: if (CUR == '"') {
1181: NEXT;
1182: q = CUR_PTR;
1183: while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
1.55 daniel 1184: if (CUR != '"') {
1185: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1186: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1187: ctxt->wellFormed = 0;
1.55 daniel 1188: } else {
1.45 daniel 1189: ret = xmlStrndup(q, CUR_PTR - q);
1190: NEXT;
1191: }
1192: } else if (CUR == '\''){
1193: NEXT;
1194: q = CUR_PTR;
1195: while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
1.55 daniel 1196: if (CUR != '\'') {
1197: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1198: ctxt->sax->error(ctxt->userData, "String not closed \"%.50s\"\n", q);
1.59 daniel 1199: ctxt->wellFormed = 0;
1.55 daniel 1200: } else {
1.45 daniel 1201: ret = xmlStrndup(q, CUR_PTR - q);
1202: NEXT;
1203: }
1204: }
1205: return(ret);
1206: }
1207:
1.50 daniel 1208: /**
1209: * xmlParseNamespace:
1210: * @ctxt: an XML parser context
1211: *
1.45 daniel 1212: * [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
1213: *
1214: * This is what the older xml-name Working Draft specified, a bunch of
1215: * other stuff may still rely on it, so support is still here as
1216: * if ot was declared on the root of the Tree:-(
1217: */
1218:
1.55 daniel 1219: void
1220: xmlParseNamespace(xmlParserCtxtPtr ctxt) {
1.45 daniel 1221: CHAR *href = NULL;
1222: CHAR *prefix = NULL;
1223: int garbage = 0;
1224:
1225: /*
1226: * We just skipped "namespace" or "xml:namespace"
1227: */
1228: SKIP_BLANKS;
1229:
1230: while (IS_CHAR(CUR) && (CUR != '>')) {
1231: /*
1232: * We can have "ns" or "prefix" attributes
1233: * Old encoding as 'href' or 'AS' attributes is still supported
1234: */
1235: if ((CUR == 'n') && (NXT(1) == 's')) {
1236: garbage = 0;
1237: SKIP(2);
1238: SKIP_BLANKS;
1239:
1240: if (CUR != '=') continue;
1241: NEXT;
1242: SKIP_BLANKS;
1243:
1244: href = xmlParseQuotedString(ctxt);
1245: SKIP_BLANKS;
1246: } else if ((CUR == 'h') && (NXT(1) == 'r') &&
1247: (NXT(2) == 'e') && (NXT(3) == 'f')) {
1248: garbage = 0;
1249: SKIP(4);
1250: SKIP_BLANKS;
1251:
1252: if (CUR != '=') continue;
1253: NEXT;
1254: SKIP_BLANKS;
1255:
1256: href = xmlParseQuotedString(ctxt);
1257: SKIP_BLANKS;
1258: } else if ((CUR == 'p') && (NXT(1) == 'r') &&
1259: (NXT(2) == 'e') && (NXT(3) == 'f') &&
1260: (NXT(4) == 'i') && (NXT(5) == 'x')) {
1261: garbage = 0;
1262: SKIP(6);
1263: SKIP_BLANKS;
1264:
1265: if (CUR != '=') continue;
1266: NEXT;
1267: SKIP_BLANKS;
1268:
1269: prefix = xmlParseQuotedString(ctxt);
1270: SKIP_BLANKS;
1271: } else if ((CUR == 'A') && (NXT(1) == 'S')) {
1272: garbage = 0;
1273: SKIP(2);
1274: SKIP_BLANKS;
1275:
1276: if (CUR != '=') continue;
1277: NEXT;
1278: SKIP_BLANKS;
1279:
1280: prefix = xmlParseQuotedString(ctxt);
1281: SKIP_BLANKS;
1282: } else if ((CUR == '?') && (NXT(1) == '>')) {
1283: garbage = 0;
1284: CUR_PTR ++;
1285: } else {
1286: /*
1287: * Found garbage when parsing the namespace
1288: */
1289: if (!garbage)
1.55 daniel 1290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1291: ctxt->sax->error(ctxt->userData, "xmlParseNamespace found garbage\n");
1.59 daniel 1292: ctxt->wellFormed = 0;
1.45 daniel 1293: NEXT;
1294: }
1295: }
1296:
1297: MOVETO_ENDTAG(CUR_PTR);
1298: NEXT;
1299:
1300: /*
1301: * Register the DTD.
1.72 daniel 1302: if (href != NULL)
1303: if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
1.74 daniel 1304: ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
1.45 daniel 1305: */
1306:
1307: if (prefix != NULL) free(prefix);
1308: if (href != NULL) free(href);
1309: }
1310:
1.28 daniel 1311: /************************************************************************
1312: * *
1313: * The parser itself *
1314: * Relates to http://www.w3.org/TR/REC-xml *
1315: * *
1316: ************************************************************************/
1.14 veillard 1317:
1.50 daniel 1318: /**
1319: * xmlParseName:
1320: * @ctxt: an XML parser context
1321: *
1322: * parse an XML name.
1.22 daniel 1323: *
1324: * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1325: * CombiningChar | Extender
1326: *
1327: * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1328: *
1329: * [6] Names ::= Name (S Name)*
1.68 daniel 1330: *
1331: * Returns the Name parsed or NULL
1.1 veillard 1332: */
1333:
1.55 daniel 1334: CHAR *
1335: xmlParseName(xmlParserCtxtPtr ctxt) {
1.17 daniel 1336: const CHAR *q;
1337: CHAR *ret = NULL;
1.1 veillard 1338:
1.40 daniel 1339: if (!IS_LETTER(CUR) && (CUR != '_') &&
1340: (CUR != ':')) return(NULL);
1341: q = NEXT;
1342:
1343: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1344: (CUR == '.') || (CUR == '-') ||
1345: (CUR == '_') || (CUR == ':') ||
1346: (IS_COMBINING(CUR)) ||
1347: (IS_EXTENDER(CUR)))
1348: NEXT;
1.22 daniel 1349:
1.40 daniel 1350: ret = xmlStrndup(q, CUR_PTR - q);
1.22 daniel 1351:
1352: return(ret);
1353: }
1354:
1.50 daniel 1355: /**
1356: * xmlParseNmtoken:
1357: * @ctxt: an XML parser context
1358: *
1359: * parse an XML Nmtoken.
1.22 daniel 1360: *
1361: * [7] Nmtoken ::= (NameChar)+
1362: *
1363: * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1.68 daniel 1364: *
1365: * Returns the Nmtoken parsed or NULL
1.22 daniel 1366: */
1367:
1.55 daniel 1368: CHAR *
1369: xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1.22 daniel 1370: const CHAR *q;
1371: CHAR *ret = NULL;
1372:
1.40 daniel 1373: q = NEXT;
1.22 daniel 1374:
1.40 daniel 1375: while ((IS_LETTER(CUR)) || (IS_DIGIT(CUR)) ||
1376: (CUR == '.') || (CUR == '-') ||
1377: (CUR == '_') || (CUR == ':') ||
1378: (IS_COMBINING(CUR)) ||
1379: (IS_EXTENDER(CUR)))
1380: NEXT;
1.3 veillard 1381:
1.40 daniel 1382: ret = xmlStrndup(q, CUR_PTR - q);
1.1 veillard 1383:
1.3 veillard 1384: return(ret);
1.1 veillard 1385: }
1386:
1.50 daniel 1387: /**
1388: * xmlParseEntityValue:
1389: * @ctxt: an XML parser context
1.78 daniel 1390: * @orig: if non-NULL store a copy of the original entity value
1.50 daniel 1391: *
1392: * parse a value for ENTITY decl.
1.24 daniel 1393: *
1394: * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1395: * "'" ([^%&'] | PEReference | Reference)* "'"
1.68 daniel 1396: *
1.78 daniel 1397: * Returns the EntityValue parsed with reference substitued or NULL
1.24 daniel 1398: */
1399:
1.55 daniel 1400: CHAR *
1.78 daniel 1401: xmlParseEntityValue(xmlParserCtxtPtr ctxt, CHAR **orig) {
1.77 daniel 1402: CHAR *ret = NULL;
1.78 daniel 1403: const CHAR *org = NULL;
1.79 daniel 1404: const CHAR *tst = NULL;
1405: const CHAR *temp = NULL;
1.24 daniel 1406:
1.40 daniel 1407: if (CUR == '"') {
1408: NEXT;
1.78 daniel 1409: org = CUR_PTR;
1.79 daniel 1410: while (CUR != '"') {
1411: tst = CUR_PTR;
1412: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_BOTH, '"', 0, 0);
1413: if ((temp == NULL) && (tst == CUR_PTR)) break;
1414: ret = xmlStrcat(ret, temp);
1.80 daniel 1415: if (temp != NULL) free((char *)temp);
1.79 daniel 1416: }
1.77 daniel 1417: if (CUR != '"') {
1.55 daniel 1418: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.79 daniel 1419: ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1.59 daniel 1420: ctxt->wellFormed = 0;
1.78 daniel 1421: } else {
1422: if (orig != NULL)
1423: *orig = xmlStrndup(org, CUR_PTR - org);
1.40 daniel 1424: NEXT;
1.78 daniel 1425: }
1.40 daniel 1426: } else if (CUR == '\'') {
1427: NEXT;
1.78 daniel 1428: org = CUR_PTR;
1.80 daniel 1429: while (CUR != '\'') {
1.79 daniel 1430: tst = CUR_PTR;
1431: temp = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_BOTH, '\'', 0, 0);
1432: if ((temp == NULL) && (tst == CUR_PTR)) break;
1433: ret = xmlStrcat(ret, temp);
1.80 daniel 1434: if (temp != NULL) free((char *)temp);
1.79 daniel 1435: }
1.77 daniel 1436: if (CUR != '\'') {
1.55 daniel 1437: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 1438: ctxt->sax->error(ctxt->userData, "EntityValue: ' expected\n");
1.59 daniel 1439: ctxt->wellFormed = 0;
1.78 daniel 1440: } else {
1441: if (orig != NULL)
1442: *orig = xmlStrndup(org, CUR_PTR - org);
1.40 daniel 1443: NEXT;
1.78 daniel 1444: }
1.24 daniel 1445: } else {
1.55 daniel 1446: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 1447: ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1.59 daniel 1448: ctxt->wellFormed = 0;
1.24 daniel 1449: }
1450:
1451: return(ret);
1452: }
1453:
1.50 daniel 1454: /**
1455: * xmlParseAttValue:
1456: * @ctxt: an XML parser context
1457: *
1458: * parse a value for an attribute
1.78 daniel 1459: * Note: the parser won't do substitution of entities here, this
1.79 daniel 1460: * will be handled later in xmlStringGetNodeList, unless it was
1461: * asked for ctxt->replaceEntities != 0
1.29 daniel 1462: *
1463: * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
1464: * "'" ([^<&'] | Reference)* "'"
1.68 daniel 1465: *
1466: * Returns the AttValue parsed or NULL.
1.29 daniel 1467: */
1468:
1.55 daniel 1469: CHAR *
1470: xmlParseAttValue(xmlParserCtxtPtr ctxt) {
1.77 daniel 1471: CHAR *ret = NULL;
1.29 daniel 1472:
1.40 daniel 1473: if (CUR == '"') {
1474: NEXT;
1.79 daniel 1475: if (ctxt->replaceEntities != 0)
1476: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '"', '<', 0);
1477: else
1478: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_NONE, '"', '<', 0);
1.77 daniel 1479: if (CUR == '<') {
1480: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1481: ctxt->sax->error(ctxt->userData,
1482: "Unescaped '<' not allowed in attributes values\n");
1483: ctxt->wellFormed = 0;
1.29 daniel 1484: }
1.77 daniel 1485: if (CUR != '"') {
1.55 daniel 1486: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 1487: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 1488: ctxt->wellFormed = 0;
1.77 daniel 1489: } else
1.40 daniel 1490: NEXT;
1491: } else if (CUR == '\'') {
1492: NEXT;
1.79 daniel 1493: if (ctxt->replaceEntities != 0)
1494: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_REF, '\'', '<', 0);
1495: else
1496: ret = xmlDecodeEntities(ctxt, -1, XML_SUBSTITUTE_NONE, '\'', '<', 0);
1.77 daniel 1497: if (CUR == '<') {
1498: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1499: ctxt->sax->error(ctxt->userData,
1500: "Unescaped '<' not allowed in attributes values\n");
1501: ctxt->wellFormed = 0;
1.29 daniel 1502: }
1.77 daniel 1503: if (CUR != '\'') {
1.55 daniel 1504: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.77 daniel 1505: ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
1.59 daniel 1506: ctxt->wellFormed = 0;
1.77 daniel 1507: } else
1.40 daniel 1508: NEXT;
1.29 daniel 1509: } else {
1.55 daniel 1510: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1511: ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
1.59 daniel 1512: ctxt->wellFormed = 0;
1.29 daniel 1513: }
1514:
1515: return(ret);
1516: }
1517:
1.50 daniel 1518: /**
1519: * xmlParseSystemLiteral:
1520: * @ctxt: an XML parser context
1521: *
1522: * parse an XML Literal
1.21 daniel 1523: *
1.22 daniel 1524: * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
1.68 daniel 1525: *
1526: * Returns the SystemLiteral parsed or NULL
1.21 daniel 1527: */
1528:
1.55 daniel 1529: CHAR *
1530: xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1531: const CHAR *q;
1532: CHAR *ret = NULL;
1533:
1.40 daniel 1534: if (CUR == '"') {
1535: NEXT;
1536: q = CUR_PTR;
1537: while ((IS_CHAR(CUR)) && (CUR != '"'))
1538: NEXT;
1539: if (!IS_CHAR(CUR)) {
1.55 daniel 1540: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1541: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 1542: ctxt->wellFormed = 0;
1.21 daniel 1543: } else {
1.40 daniel 1544: ret = xmlStrndup(q, CUR_PTR - q);
1545: NEXT;
1.21 daniel 1546: }
1.40 daniel 1547: } else if (CUR == '\'') {
1548: NEXT;
1549: q = CUR_PTR;
1550: while ((IS_CHAR(CUR)) && (CUR != '\''))
1551: NEXT;
1552: if (!IS_CHAR(CUR)) {
1.55 daniel 1553: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1554: ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
1.59 daniel 1555: ctxt->wellFormed = 0;
1.21 daniel 1556: } else {
1.40 daniel 1557: ret = xmlStrndup(q, CUR_PTR - q);
1558: NEXT;
1.21 daniel 1559: }
1560: } else {
1.55 daniel 1561: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1562: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 1563: ctxt->wellFormed = 0;
1.21 daniel 1564: }
1565:
1566: return(ret);
1567: }
1568:
1.50 daniel 1569: /**
1570: * xmlParsePubidLiteral:
1571: * @ctxt: an XML parser context
1.21 daniel 1572: *
1.50 daniel 1573: * parse an XML public literal
1.68 daniel 1574: *
1575: * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1576: *
1577: * Returns the PubidLiteral parsed or NULL.
1.21 daniel 1578: */
1579:
1.55 daniel 1580: CHAR *
1581: xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
1.21 daniel 1582: const CHAR *q;
1583: CHAR *ret = NULL;
1584: /*
1585: * Name ::= (Letter | '_') (NameChar)*
1586: */
1.40 daniel 1587: if (CUR == '"') {
1588: NEXT;
1589: q = CUR_PTR;
1590: while (IS_PUBIDCHAR(CUR)) NEXT;
1591: if (CUR != '"') {
1.55 daniel 1592: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1593: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 1594: ctxt->wellFormed = 0;
1.21 daniel 1595: } else {
1.40 daniel 1596: ret = xmlStrndup(q, CUR_PTR - q);
1597: NEXT;
1.21 daniel 1598: }
1.40 daniel 1599: } else if (CUR == '\'') {
1600: NEXT;
1601: q = CUR_PTR;
1602: while ((IS_LETTER(CUR)) && (CUR != '\''))
1603: NEXT;
1604: if (!IS_LETTER(CUR)) {
1.55 daniel 1605: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1606: ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
1.59 daniel 1607: ctxt->wellFormed = 0;
1.21 daniel 1608: } else {
1.40 daniel 1609: ret = xmlStrndup(q, CUR_PTR - q);
1610: NEXT;
1.21 daniel 1611: }
1612: } else {
1.55 daniel 1613: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1614: ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
1.59 daniel 1615: ctxt->wellFormed = 0;
1.21 daniel 1616: }
1617:
1618: return(ret);
1619: }
1620:
1.50 daniel 1621: /**
1622: * xmlParseCharData:
1623: * @ctxt: an XML parser context
1624: * @cdata: int indicating whether we are within a CDATA section
1625: *
1626: * parse a CharData section.
1627: * if we are within a CDATA section ']]>' marks an end of section.
1.27 daniel 1628: *
1629: * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1630: */
1631:
1.55 daniel 1632: void
1633: xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
1.27 daniel 1634: const CHAR *q;
1635:
1.40 daniel 1636: q = CUR_PTR;
1637: while ((IS_CHAR(CUR)) && (CUR != '<') &&
1638: (CUR != '&')) {
1.59 daniel 1639: if ((CUR == ']') && (NXT(1) == ']') &&
1640: (NXT(2) == '>')) {
1641: if (cdata) break;
1642: else {
1643: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1644: ctxt->sax->error(ctxt->userData,
1.59 daniel 1645: "Sequence ']]>' not allowed in content\n");
1646: ctxt->wellFormed = 0;
1647: }
1648: }
1.40 daniel 1649: NEXT;
1.27 daniel 1650: }
1.45 daniel 1651: if (q == CUR_PTR) return;
1652:
1653: /*
1654: * Ok the segment [q CUR_PTR] is to be consumed as chars.
1655: */
1656: if (ctxt->sax != NULL) {
1.72 daniel 1657: if (areBlanks(ctxt, q, CUR_PTR - q)) {
1658: if (ctxt->sax->ignorableWhitespace != NULL)
1.74 daniel 1659: ctxt->sax->ignorableWhitespace(ctxt->userData, q, CUR_PTR - q);
1.72 daniel 1660: } else {
1661: if (ctxt->sax->characters != NULL)
1.74 daniel 1662: ctxt->sax->characters(ctxt->userData, q, CUR_PTR - q);
1.72 daniel 1663: }
1.45 daniel 1664: }
1.27 daniel 1665: }
1666:
1.50 daniel 1667: /**
1668: * xmlParseExternalID:
1669: * @ctxt: an XML parser context
1670: * @publicID: a CHAR** receiving PubidLiteral
1.67 daniel 1671: * @strict: indicate whether we should restrict parsing to only
1672: * production [75], see NOTE below
1.50 daniel 1673: *
1.67 daniel 1674: * Parse an External ID or a Public ID
1675: *
1676: * NOTE: Productions [75] and [83] interract badly since [75] can generate
1677: * 'PUBLIC' S PubidLiteral S SystemLiteral
1.22 daniel 1678: *
1679: * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
1680: * | 'PUBLIC' S PubidLiteral S SystemLiteral
1.67 daniel 1681: *
1682: * [83] PublicID ::= 'PUBLIC' S PubidLiteral
1683: *
1.68 daniel 1684: * Returns the function returns SystemLiteral and in the second
1.67 daniel 1685: * case publicID receives PubidLiteral, is strict is off
1686: * it is possible to return NULL and have publicID set.
1.22 daniel 1687: */
1688:
1.55 daniel 1689: CHAR *
1.67 daniel 1690: xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR **publicID, int strict) {
1.39 daniel 1691: CHAR *URI = NULL;
1.22 daniel 1692:
1.40 daniel 1693: if ((CUR == 'S') && (NXT(1) == 'Y') &&
1694: (NXT(2) == 'S') && (NXT(3) == 'T') &&
1695: (NXT(4) == 'E') && (NXT(5) == 'M')) {
1696: SKIP(6);
1.59 daniel 1697: if (!IS_BLANK(CUR)) {
1698: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1699: ctxt->sax->error(ctxt->userData,
1.59 daniel 1700: "Space required after 'SYSTEM'\n");
1701: ctxt->wellFormed = 0;
1702: }
1.42 daniel 1703: SKIP_BLANKS;
1.39 daniel 1704: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1705: if (URI == NULL) {
1.55 daniel 1706: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1707: ctxt->sax->error(ctxt->userData,
1.39 daniel 1708: "xmlParseExternalID: SYSTEM, no URI\n");
1.59 daniel 1709: ctxt->wellFormed = 0;
1710: }
1.40 daniel 1711: } else if ((CUR == 'P') && (NXT(1) == 'U') &&
1712: (NXT(2) == 'B') && (NXT(3) == 'L') &&
1713: (NXT(4) == 'I') && (NXT(5) == 'C')) {
1714: SKIP(6);
1.59 daniel 1715: if (!IS_BLANK(CUR)) {
1716: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1717: ctxt->sax->error(ctxt->userData,
1.59 daniel 1718: "Space required after 'PUBLIC'\n");
1719: ctxt->wellFormed = 0;
1720: }
1.42 daniel 1721: SKIP_BLANKS;
1.39 daniel 1722: *publicID = xmlParsePubidLiteral(ctxt);
1.59 daniel 1723: if (*publicID == NULL) {
1.55 daniel 1724: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1725: ctxt->sax->error(ctxt->userData,
1.39 daniel 1726: "xmlParseExternalID: PUBLIC, no Public Identifier\n");
1.59 daniel 1727: ctxt->wellFormed = 0;
1728: }
1.67 daniel 1729: if (strict) {
1730: /*
1731: * We don't handle [83] so "S SystemLiteral" is required.
1732: */
1733: if (!IS_BLANK(CUR)) {
1734: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1735: ctxt->sax->error(ctxt->userData,
1.67 daniel 1736: "Space required after the Public Identifier\n");
1737: ctxt->wellFormed = 0;
1738: }
1739: } else {
1740: /*
1741: * We handle [83] so we return immediately, if
1742: * "S SystemLiteral" is not detected. From a purely parsing
1743: * point of view that's a nice mess.
1744: */
1745: const CHAR *ptr = CUR_PTR;
1746: if (!IS_BLANK(*ptr)) return(NULL);
1747:
1748: while (IS_BLANK(*ptr)) ptr++;
1749: if ((*ptr != '\'') || (*ptr != '"')) return(NULL);
1.59 daniel 1750: }
1.42 daniel 1751: SKIP_BLANKS;
1.39 daniel 1752: URI = xmlParseSystemLiteral(ctxt);
1.59 daniel 1753: if (URI == NULL) {
1.55 daniel 1754: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1755: ctxt->sax->error(ctxt->userData,
1.39 daniel 1756: "xmlParseExternalID: PUBLIC, no URI\n");
1.59 daniel 1757: ctxt->wellFormed = 0;
1758: }
1.22 daniel 1759: }
1.39 daniel 1760: return(URI);
1.22 daniel 1761: }
1762:
1.50 daniel 1763: /**
1764: * xmlParseComment:
1.69 daniel 1765: * @ctxt: an XML parser context
1766: * @create: should we create a node, or just skip the content
1.50 daniel 1767: *
1.3 veillard 1768: * Skip an XML (SGML) comment <!-- .... -->
1.31 daniel 1769: * This may or may not create a node (depending on the context)
1.38 daniel 1770: * The spec says that "For compatibility, the string "--" (double-hyphen)
1771: * must not occur within comments. "
1.22 daniel 1772: *
1773: * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1.3 veillard 1774: */
1.72 daniel 1775: void
1.69 daniel 1776: xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
1.17 daniel 1777: const CHAR *q, *start;
1778: const CHAR *r;
1.39 daniel 1779: CHAR *val;
1.3 veillard 1780:
1781: /*
1.22 daniel 1782: * Check that there is a comment right here.
1.3 veillard 1783: */
1.40 daniel 1784: if ((CUR != '<') || (NXT(1) != '!') ||
1.72 daniel 1785: (NXT(2) != '-') || (NXT(3) != '-')) return;
1.3 veillard 1786:
1.40 daniel 1787: SKIP(4);
1788: start = q = CUR_PTR;
1789: NEXT;
1790: r = CUR_PTR;
1791: NEXT;
1792: while (IS_CHAR(CUR) &&
1793: ((CUR == ':') || (CUR != '>') ||
1.16 daniel 1794: (*r != '-') || (*q != '-'))) {
1.59 daniel 1795: if ((*r == '-') && (*q == '-')) {
1.55 daniel 1796: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1797: ctxt->sax->error(ctxt->userData,
1.38 daniel 1798: "Comment must not contain '--' (double-hyphen)`\n");
1.59 daniel 1799: ctxt->wellFormed = 0;
1800: }
1.40 daniel 1801: NEXT;r++;q++;
1.3 veillard 1802: }
1.40 daniel 1803: if (!IS_CHAR(CUR)) {
1.55 daniel 1804: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1805: ctxt->sax->error(ctxt->userData, "Comment not terminated \n<!--%.50s\n", start);
1.59 daniel 1806: ctxt->wellFormed = 0;
1.3 veillard 1807: } else {
1.40 daniel 1808: NEXT;
1.31 daniel 1809: if (create) {
1.39 daniel 1810: val = xmlStrndup(start, q - start);
1.72 daniel 1811: if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL))
1.74 daniel 1812: ctxt->sax->comment(ctxt->userData, val);
1.39 daniel 1813: free(val);
1.31 daniel 1814: }
1.3 veillard 1815: }
1816: }
1817:
1.50 daniel 1818: /**
1819: * xmlParsePITarget:
1820: * @ctxt: an XML parser context
1821: *
1822: * parse the name of a PI
1.22 daniel 1823: *
1824: * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
1.68 daniel 1825: *
1826: * Returns the PITarget name or NULL
1.22 daniel 1827: */
1828:
1.55 daniel 1829: CHAR *
1830: xmlParsePITarget(xmlParserCtxtPtr ctxt) {
1.22 daniel 1831: CHAR *name;
1832:
1833: name = xmlParseName(ctxt);
1834: if ((name != NULL) && (name[3] == 0) &&
1835: ((name[0] == 'x') || (name[0] == 'X')) &&
1.31 daniel 1836: ((name[1] == 'm') || (name[1] == 'M')) &&
1837: ((name[2] == 'l') || (name[2] == 'L'))) {
1.55 daniel 1838: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1839: ctxt->sax->error(ctxt->userData, "xmlParsePItarget: invalid name prefix 'xml'\n");
1.22 daniel 1840: return(NULL);
1841: }
1842: return(name);
1843: }
1844:
1.50 daniel 1845: /**
1846: * xmlParsePI:
1847: * @ctxt: an XML parser context
1848: *
1849: * parse an XML Processing Instruction.
1.22 daniel 1850: *
1851: * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
1.68 daniel 1852: *
1.69 daniel 1853: * The processing is transfered to SAX once parsed.
1.3 veillard 1854: */
1855:
1.55 daniel 1856: void
1857: xmlParsePI(xmlParserCtxtPtr ctxt) {
1.22 daniel 1858: CHAR *target;
1859:
1.40 daniel 1860: if ((CUR == '<') && (NXT(1) == '?')) {
1.3 veillard 1861: /*
1862: * this is a Processing Instruction.
1863: */
1.40 daniel 1864: SKIP(2);
1.3 veillard 1865:
1866: /*
1.22 daniel 1867: * Parse the target name and check for special support like
1868: * namespace.
1869: *
1870: * TODO : PI handling should be dynamically redefinable using an
1871: * API. Only namespace should be in the code IMHO ...
1.3 veillard 1872: */
1.22 daniel 1873: target = xmlParsePITarget(ctxt);
1874: if (target != NULL) {
1.72 daniel 1875: const CHAR *q = CUR_PTR;
1876:
1877: while (IS_CHAR(CUR) &&
1878: ((CUR != '?') || (NXT(1) != '>')))
1879: NEXT;
1880: if (!IS_CHAR(CUR)) {
1881: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1882: ctxt->sax->error(ctxt->userData,
1.72 daniel 1883: "xmlParsePI: PI %s never end ...\n", target);
1884: ctxt->wellFormed = 0;
1.22 daniel 1885: } else {
1.72 daniel 1886: CHAR *data;
1.44 daniel 1887:
1.72 daniel 1888: data = xmlStrndup(q, CUR_PTR - q);
1889: SKIP(2);
1.44 daniel 1890:
1.72 daniel 1891: /*
1892: * SAX: PI detected.
1893: */
1894: if ((ctxt->sax) &&
1895: (ctxt->sax->processingInstruction != NULL))
1.74 daniel 1896: ctxt->sax->processingInstruction(ctxt->userData, target, data);
1.72 daniel 1897: free(data);
1.22 daniel 1898: }
1.39 daniel 1899: free(target);
1.3 veillard 1900: } else {
1.55 daniel 1901: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1902: ctxt->sax->error(ctxt->userData, "xmlParsePI : no target name\n");
1.59 daniel 1903: ctxt->wellFormed = 0;
1904:
1.22 daniel 1905: /********* Should we try to complete parsing the PI ???
1.40 daniel 1906: while (IS_CHAR(CUR) &&
1907: (CUR != '?') && (CUR != '>'))
1908: NEXT;
1909: if (!IS_CHAR(CUR)) {
1.22 daniel 1910: fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
1911: target);
1912: }
1913: ********************************************************/
1914: }
1915: }
1916: }
1917:
1.50 daniel 1918: /**
1919: * xmlParseNotationDecl:
1920: * @ctxt: an XML parser context
1921: *
1922: * parse a notation declaration
1.22 daniel 1923: *
1924: * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1925: *
1926: * Hence there is actually 3 choices:
1927: * 'PUBLIC' S PubidLiteral
1928: * 'PUBLIC' S PubidLiteral S SystemLiteral
1929: * and 'SYSTEM' S SystemLiteral
1.50 daniel 1930: *
1.67 daniel 1931: * See the NOTE on xmlParseExternalID().
1.22 daniel 1932: */
1933:
1.55 daniel 1934: void
1935: xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 1936: CHAR *name;
1.67 daniel 1937: CHAR *Pubid;
1938: CHAR *Systemid;
1.22 daniel 1939:
1.40 daniel 1940: if ((CUR == '<') && (NXT(1) == '!') &&
1941: (NXT(2) == 'N') && (NXT(3) == 'O') &&
1942: (NXT(4) == 'T') && (NXT(5) == 'A') &&
1943: (NXT(6) == 'T') && (NXT(7) == 'I') &&
1.67 daniel 1944: (NXT(8) == 'O') && (NXT(9) == 'N')) {
1.40 daniel 1945: SKIP(10);
1.67 daniel 1946: if (!IS_BLANK(CUR)) {
1947: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1948: ctxt->sax->error(ctxt->userData, "Space required after '<!NOTATION'\n");
1.67 daniel 1949: ctxt->wellFormed = 0;
1950: return;
1951: }
1952: SKIP_BLANKS;
1.22 daniel 1953:
1954: name = xmlParseName(ctxt);
1955: if (name == NULL) {
1.55 daniel 1956: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1957: ctxt->sax->error(ctxt->userData, "NOTATION: Name expected here\n");
1.67 daniel 1958: ctxt->wellFormed = 0;
1959: return;
1960: }
1961: if (!IS_BLANK(CUR)) {
1962: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1963: ctxt->sax->error(ctxt->userData,
1.67 daniel 1964: "Space required after the NOTATION name'\n");
1.59 daniel 1965: ctxt->wellFormed = 0;
1.22 daniel 1966: return;
1967: }
1.42 daniel 1968: SKIP_BLANKS;
1.67 daniel 1969:
1.22 daniel 1970: /*
1.67 daniel 1971: * Parse the IDs.
1.22 daniel 1972: */
1.67 daniel 1973: Systemid = xmlParseExternalID(ctxt, &Pubid, 1);
1974: SKIP_BLANKS;
1975:
1976: if (CUR == '>') {
1.40 daniel 1977: NEXT;
1.72 daniel 1978: if ((ctxt->sax != NULL) && (ctxt->sax->notationDecl != NULL))
1.74 daniel 1979: ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
1.67 daniel 1980: } else {
1981: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 1982: ctxt->sax->error(ctxt->userData,
1.67 daniel 1983: "'>' required to close NOTATION declaration\n");
1984: ctxt->wellFormed = 0;
1985: }
1.22 daniel 1986: free(name);
1.67 daniel 1987: if (Systemid != NULL) free(Systemid);
1988: if (Pubid != NULL) free(Pubid);
1.22 daniel 1989: }
1990: }
1991:
1.50 daniel 1992: /**
1993: * xmlParseEntityDecl:
1994: * @ctxt: an XML parser context
1995: *
1996: * parse <!ENTITY declarations
1.22 daniel 1997: *
1998: * [70] EntityDecl ::= GEDecl | PEDecl
1999: *
2000: * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2001: *
2002: * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2003: *
2004: * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2005: *
2006: * [74] PEDef ::= EntityValue | ExternalID
1.24 daniel 2007: *
2008: * [76] NDataDecl ::= S 'NDATA' S Name
1.22 daniel 2009: */
2010:
1.55 daniel 2011: void
2012: xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
1.39 daniel 2013: CHAR *name = NULL;
1.24 daniel 2014: CHAR *value = NULL;
1.39 daniel 2015: CHAR *URI = NULL, *literal = NULL;
1.24 daniel 2016: CHAR *ndata = NULL;
1.39 daniel 2017: int isParameter = 0;
1.78 daniel 2018: CHAR *orig = NULL;
1.22 daniel 2019:
1.40 daniel 2020: if ((CUR == '<') && (NXT(1) == '!') &&
2021: (NXT(2) == 'E') && (NXT(3) == 'N') &&
2022: (NXT(4) == 'T') && (NXT(5) == 'I') &&
1.59 daniel 2023: (NXT(6) == 'T') && (NXT(7) == 'Y')) {
1.40 daniel 2024: SKIP(8);
1.59 daniel 2025: if (!IS_BLANK(CUR)) {
2026: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2027: ctxt->sax->error(ctxt->userData, "Space required after '<!ENTITY'\n");
1.59 daniel 2028: ctxt->wellFormed = 0;
2029: }
2030: SKIP_BLANKS;
1.40 daniel 2031:
2032: if (CUR == '%') {
2033: NEXT;
1.59 daniel 2034: if (!IS_BLANK(CUR)) {
2035: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2036: ctxt->sax->error(ctxt->userData, "Space required after '%'\n");
1.59 daniel 2037: ctxt->wellFormed = 0;
2038: }
1.42 daniel 2039: SKIP_BLANKS;
1.39 daniel 2040: isParameter = 1;
1.22 daniel 2041: }
2042:
2043: name = xmlParseName(ctxt);
1.24 daniel 2044: if (name == NULL) {
1.55 daniel 2045: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2046: ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
1.59 daniel 2047: ctxt->wellFormed = 0;
1.24 daniel 2048: return;
2049: }
1.59 daniel 2050: if (!IS_BLANK(CUR)) {
2051: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2052: ctxt->sax->error(ctxt->userData,
1.59 daniel 2053: "Space required after the entity name\n");
2054: ctxt->wellFormed = 0;
2055: }
1.42 daniel 2056: SKIP_BLANKS;
1.24 daniel 2057:
1.22 daniel 2058: /*
1.68 daniel 2059: * handle the various case of definitions...
1.22 daniel 2060: */
1.39 daniel 2061: if (isParameter) {
1.40 daniel 2062: if ((CUR == '"') || (CUR == '\''))
1.78 daniel 2063: value = xmlParseEntityValue(ctxt, &orig);
1.39 daniel 2064: if (value) {
1.72 daniel 2065: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2066: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2067: XML_INTERNAL_PARAMETER_ENTITY,
2068: NULL, NULL, value);
2069: }
1.24 daniel 2070: else {
1.67 daniel 2071: URI = xmlParseExternalID(ctxt, &literal, 1);
1.39 daniel 2072: if (URI) {
1.72 daniel 2073: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2074: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2075: XML_EXTERNAL_PARAMETER_ENTITY,
2076: literal, URI, NULL);
2077: }
1.24 daniel 2078: }
2079: } else {
1.40 daniel 2080: if ((CUR == '"') || (CUR == '\'')) {
1.78 daniel 2081: value = xmlParseEntityValue(ctxt, &orig);
1.72 daniel 2082: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2083: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2084: XML_INTERNAL_GENERAL_ENTITY,
2085: NULL, NULL, value);
2086: } else {
1.67 daniel 2087: URI = xmlParseExternalID(ctxt, &literal, 1);
1.59 daniel 2088: if ((CUR != '>') && (!IS_BLANK(CUR))) {
2089: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2090: ctxt->sax->error(ctxt->userData,
1.59 daniel 2091: "Space required before 'NDATA'\n");
2092: ctxt->wellFormed = 0;
2093: }
1.42 daniel 2094: SKIP_BLANKS;
1.40 daniel 2095: if ((CUR == 'N') && (NXT(1) == 'D') &&
2096: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2097: (NXT(4) == 'A')) {
2098: SKIP(5);
1.59 daniel 2099: if (!IS_BLANK(CUR)) {
2100: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2101: ctxt->sax->error(ctxt->userData,
1.59 daniel 2102: "Space required after 'NDATA'\n");
2103: ctxt->wellFormed = 0;
2104: }
1.42 daniel 2105: SKIP_BLANKS;
1.24 daniel 2106: ndata = xmlParseName(ctxt);
1.72 daniel 2107: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2108: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2109: XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
2110: literal, URI, ndata);
2111: } else {
1.72 daniel 2112: if ((ctxt->sax != NULL) && (ctxt->sax->entityDecl != NULL))
1.74 daniel 2113: ctxt->sax->entityDecl(ctxt->userData, name,
1.39 daniel 2114: XML_EXTERNAL_GENERAL_PARSED_ENTITY,
2115: literal, URI, NULL);
1.24 daniel 2116: }
2117: }
2118: }
1.42 daniel 2119: SKIP_BLANKS;
1.40 daniel 2120: if (CUR != '>') {
1.55 daniel 2121: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2122: ctxt->sax->error(ctxt->userData,
1.31 daniel 2123: "xmlParseEntityDecl: entity %s not terminated\n", name);
1.59 daniel 2124: ctxt->wellFormed = 0;
1.24 daniel 2125: } else
1.40 daniel 2126: NEXT;
1.78 daniel 2127: if (orig != NULL) {
2128: /*
2129: * TODO: somwhat unclean, extending the SAx API would be better !
2130: */
2131: xmlEntityPtr cur = NULL;
2132:
2133: if ((ctxt->sax != NULL) && (ctxt->sax->getEntity != NULL))
2134: cur = ctxt->sax->getEntity(ctxt, name);
2135: if (cur != NULL)
2136: cur->orig = orig;
2137: else
2138: free(orig);
2139: }
1.39 daniel 2140: if (name != NULL) free(name);
2141: if (value != NULL) free(value);
2142: if (URI != NULL) free(URI);
2143: if (literal != NULL) free(literal);
2144: if (ndata != NULL) free(ndata);
1.22 daniel 2145: }
2146: }
2147:
1.50 daniel 2148: /**
1.59 daniel 2149: * xmlParseDefaultDecl:
2150: * @ctxt: an XML parser context
2151: * @value: Receive a possible fixed default value for the attribute
2152: *
2153: * Parse an attribute default declaration
2154: *
2155: * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
2156: *
2157: * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
2158: * or XML_ATTRIBUTE_FIXED.
2159: */
2160:
2161: int
2162: xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, CHAR **value) {
2163: int val;
2164: CHAR *ret;
2165:
2166: *value = NULL;
2167: if ((CUR == '#') && (NXT(1) == 'R') &&
2168: (NXT(2) == 'E') && (NXT(3) == 'Q') &&
2169: (NXT(4) == 'U') && (NXT(5) == 'I') &&
2170: (NXT(6) == 'R') && (NXT(7) == 'E') &&
2171: (NXT(8) == 'D')) {
2172: SKIP(9);
2173: return(XML_ATTRIBUTE_REQUIRED);
2174: }
2175: if ((CUR == '#') && (NXT(1) == 'I') &&
2176: (NXT(2) == 'M') && (NXT(3) == 'P') &&
2177: (NXT(4) == 'L') && (NXT(5) == 'I') &&
2178: (NXT(6) == 'E') && (NXT(7) == 'D')) {
2179: SKIP(8);
2180: return(XML_ATTRIBUTE_IMPLIED);
2181: }
2182: val = XML_ATTRIBUTE_NONE;
2183: if ((CUR == '#') && (NXT(1) == 'F') &&
2184: (NXT(2) == 'I') && (NXT(3) == 'X') &&
2185: (NXT(4) == 'E') && (NXT(5) == 'D')) {
2186: SKIP(6);
2187: val = XML_ATTRIBUTE_FIXED;
2188: if (!IS_BLANK(CUR)) {
2189: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2190: ctxt->sax->error(ctxt->userData, "Space required after '#FIXED'\n");
1.59 daniel 2191: ctxt->wellFormed = 0;
2192: }
2193: SKIP_BLANKS;
2194: }
2195: ret = xmlParseAttValue(ctxt);
2196: if (ret == NULL) {
2197: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2198: ctxt->sax->error(ctxt->userData,
1.59 daniel 2199: "Attribute default value declaration error\n");
2200: ctxt->wellFormed = 0;
2201: } else
2202: *value = ret;
2203: return(val);
2204: }
2205:
2206: /**
1.66 daniel 2207: * xmlParseNotationType:
2208: * @ctxt: an XML parser context
2209: *
2210: * parse an Notation attribute type.
2211: *
2212: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2213: *
2214: * Note: the leading 'NOTATION' S part has already being parsed...
2215: *
2216: * Returns: the notation attribute tree built while parsing
2217: */
2218:
2219: xmlEnumerationPtr
2220: xmlParseNotationType(xmlParserCtxtPtr ctxt) {
2221: CHAR *name;
2222: xmlEnumerationPtr ret = NULL, last = NULL, cur;
2223:
2224: if (CUR != '(') {
2225: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2226: ctxt->sax->error(ctxt->userData, "'(' required to start 'NOTATION'\n");
1.66 daniel 2227: ctxt->wellFormed = 0;
2228: return(NULL);
2229: }
2230: do {
2231: NEXT;
2232: SKIP_BLANKS;
2233: name = xmlParseName(ctxt);
2234: if (name == NULL) {
2235: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2236: ctxt->sax->error(ctxt->userData,
1.66 daniel 2237: "Name expected in NOTATION declaration\n");
2238: ctxt->wellFormed = 0;
2239: return(ret);
2240: }
2241: cur = xmlCreateEnumeration(name);
1.67 daniel 2242: free(name);
1.66 daniel 2243: if (cur == NULL) return(ret);
2244: if (last == NULL) ret = last = cur;
2245: else {
2246: last->next = cur;
2247: last = cur;
2248: }
2249: SKIP_BLANKS;
2250: } while (CUR == '|');
2251: if (CUR != ')') {
2252: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2253: ctxt->sax->error(ctxt->userData,
1.66 daniel 2254: "')' required to finish NOTATION declaration\n");
2255: ctxt->wellFormed = 0;
2256: return(ret);
2257: }
2258: NEXT;
2259: return(ret);
2260: }
2261:
2262: /**
2263: * xmlParseEnumerationType:
2264: * @ctxt: an XML parser context
2265: *
2266: * parse an Enumeration attribute type.
2267: *
2268: * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
2269: *
2270: * Returns: the enumeration attribute tree built while parsing
2271: */
2272:
2273: xmlEnumerationPtr
2274: xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
2275: CHAR *name;
2276: xmlEnumerationPtr ret = NULL, last = NULL, cur;
2277:
2278: if (CUR != '(') {
2279: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2280: ctxt->sax->error(ctxt->userData,
1.66 daniel 2281: "'(' required to start ATTLIST enumeration\n");
2282: ctxt->wellFormed = 0;
2283: return(NULL);
2284: }
2285: do {
2286: NEXT;
2287: SKIP_BLANKS;
2288: name = xmlParseNmtoken(ctxt);
2289: if (name == NULL) {
2290: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2291: ctxt->sax->error(ctxt->userData,
1.66 daniel 2292: "NmToken expected in ATTLIST enumeration\n");
2293: ctxt->wellFormed = 0;
2294: return(ret);
2295: }
2296: cur = xmlCreateEnumeration(name);
1.67 daniel 2297: free(name);
1.66 daniel 2298: if (cur == NULL) return(ret);
2299: if (last == NULL) ret = last = cur;
2300: else {
2301: last->next = cur;
2302: last = cur;
2303: }
2304: SKIP_BLANKS;
2305: } while (CUR == '|');
2306: if (CUR != ')') {
2307: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2308: ctxt->sax->error(ctxt->userData,
1.66 daniel 2309: "')' required to finish ATTLIST enumeration\n");
2310: ctxt->wellFormed = 0;
2311: return(ret);
2312: }
2313: NEXT;
2314: return(ret);
2315: }
2316:
2317: /**
1.50 daniel 2318: * xmlParseEnumeratedType:
2319: * @ctxt: an XML parser context
1.66 daniel 2320: * @tree: the enumeration tree built while parsing
1.50 daniel 2321: *
1.66 daniel 2322: * parse an Enumerated attribute type.
1.22 daniel 2323: *
2324: * [57] EnumeratedType ::= NotationType | Enumeration
2325: *
2326: * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
2327: *
1.50 daniel 2328: *
1.66 daniel 2329: * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
1.22 daniel 2330: */
2331:
1.66 daniel 2332: int
2333: xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
2334: if ((CUR == 'N') && (NXT(1) == 'O') &&
2335: (NXT(2) == 'T') && (NXT(3) == 'A') &&
2336: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2337: (NXT(6) == 'O') && (NXT(7) == 'N')) {
2338: SKIP(8);
2339: if (!IS_BLANK(CUR)) {
2340: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2341: ctxt->sax->error(ctxt->userData, "Space required after 'NOTATION'\n");
1.66 daniel 2342: ctxt->wellFormed = 0;
2343: return(0);
2344: }
2345: SKIP_BLANKS;
2346: *tree = xmlParseNotationType(ctxt);
2347: if (*tree == NULL) return(0);
2348: return(XML_ATTRIBUTE_NOTATION);
2349: }
2350: *tree = xmlParseEnumerationType(ctxt);
2351: if (*tree == NULL) return(0);
2352: return(XML_ATTRIBUTE_ENUMERATION);
1.22 daniel 2353: }
2354:
1.50 daniel 2355: /**
2356: * xmlParseAttributeType:
2357: * @ctxt: an XML parser context
1.66 daniel 2358: * @tree: the enumeration tree built while parsing
1.50 daniel 2359: *
1.59 daniel 2360: * parse the Attribute list def for an element
1.22 daniel 2361: *
2362: * [54] AttType ::= StringType | TokenizedType | EnumeratedType
2363: *
2364: * [55] StringType ::= 'CDATA'
2365: *
2366: * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
2367: * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
1.50 daniel 2368: *
1.69 daniel 2369: * Returns the attribute type
1.22 daniel 2370: */
1.59 daniel 2371: int
1.66 daniel 2372: xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
1.40 daniel 2373: if ((CUR == 'C') && (NXT(1) == 'D') &&
2374: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2375: (NXT(4) == 'A')) {
2376: SKIP(5);
1.66 daniel 2377: return(XML_ATTRIBUTE_CDATA);
1.40 daniel 2378: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2379: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2380: (NXT(4) == 'F')) {
2381: SKIP(5);
1.59 daniel 2382: return(XML_ATTRIBUTE_IDREF);
1.66 daniel 2383: } else if ((CUR == 'I') && (NXT(1) == 'D')) {
2384: SKIP(2);
2385: return(XML_ATTRIBUTE_ID);
1.40 daniel 2386: } else if ((CUR == 'I') && (NXT(1) == 'D') &&
2387: (NXT(2) == 'R') && (NXT(3) == 'E') &&
2388: (NXT(4) == 'F') && (NXT(5) == 'S')) {
2389: SKIP(6);
1.59 daniel 2390: return(XML_ATTRIBUTE_IDREFS);
1.40 daniel 2391: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2392: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2393: (NXT(4) == 'T') && (NXT(5) == 'Y')) {
2394: SKIP(6);
1.59 daniel 2395: return(XML_ATTRIBUTE_ENTITY);
1.40 daniel 2396: } else if ((CUR == 'E') && (NXT(1) == 'N') &&
2397: (NXT(2) == 'T') && (NXT(3) == 'I') &&
2398: (NXT(4) == 'T') && (NXT(5) == 'I') &&
2399: (NXT(6) == 'E') && (NXT(7) == 'S')) {
2400: SKIP(8);
1.59 daniel 2401: return(XML_ATTRIBUTE_ENTITIES);
1.40 daniel 2402: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2403: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2404: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.66 daniel 2405: (NXT(6) == 'N') && (NXT(7) == 'S')) {
2406: SKIP(8);
2407: return(XML_ATTRIBUTE_NMTOKENS);
2408: } else if ((CUR == 'N') && (NXT(1) == 'M') &&
2409: (NXT(2) == 'T') && (NXT(3) == 'O') &&
2410: (NXT(4) == 'K') && (NXT(5) == 'E') &&
1.40 daniel 2411: (NXT(6) == 'N')) {
2412: SKIP(7);
1.59 daniel 2413: return(XML_ATTRIBUTE_NMTOKEN);
1.22 daniel 2414: }
1.66 daniel 2415: return(xmlParseEnumeratedType(ctxt, tree));
1.22 daniel 2416: }
2417:
1.50 daniel 2418: /**
2419: * xmlParseAttributeListDecl:
2420: * @ctxt: an XML parser context
2421: *
2422: * : parse the Attribute list def for an element
1.22 daniel 2423: *
2424: * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
2425: *
2426: * [53] AttDef ::= S Name S AttType S DefaultDecl
1.50 daniel 2427: *
1.22 daniel 2428: */
1.55 daniel 2429: void
2430: xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
1.59 daniel 2431: CHAR *elemName;
2432: CHAR *attrName;
1.66 daniel 2433: xmlEnumerationPtr tree = NULL;
1.22 daniel 2434:
1.40 daniel 2435: if ((CUR == '<') && (NXT(1) == '!') &&
2436: (NXT(2) == 'A') && (NXT(3) == 'T') &&
2437: (NXT(4) == 'T') && (NXT(5) == 'L') &&
2438: (NXT(6) == 'I') && (NXT(7) == 'S') &&
1.59 daniel 2439: (NXT(8) == 'T')) {
1.40 daniel 2440: SKIP(9);
1.59 daniel 2441: if (!IS_BLANK(CUR)) {
2442: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2443: ctxt->sax->error(ctxt->userData, "Space required after '<!ATTLIST'\n");
1.59 daniel 2444: ctxt->wellFormed = 0;
2445: }
1.42 daniel 2446: SKIP_BLANKS;
1.59 daniel 2447: elemName = xmlParseName(ctxt);
2448: if (elemName == NULL) {
1.55 daniel 2449: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2450: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Element\n");
1.59 daniel 2451: ctxt->wellFormed = 0;
1.22 daniel 2452: return;
2453: }
1.42 daniel 2454: SKIP_BLANKS;
1.40 daniel 2455: while (CUR != '>') {
2456: const CHAR *check = CUR_PTR;
1.59 daniel 2457: int type;
2458: int def;
2459: CHAR *defaultValue = NULL;
2460:
2461: attrName = xmlParseName(ctxt);
2462: if (attrName == NULL) {
2463: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2464: ctxt->sax->error(ctxt->userData, "ATTLIST: no name for Attribute\n");
1.59 daniel 2465: ctxt->wellFormed = 0;
2466: break;
2467: }
2468: if (!IS_BLANK(CUR)) {
2469: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2470: ctxt->sax->error(ctxt->userData,
1.59 daniel 2471: "Space required after the attribute name\n");
2472: ctxt->wellFormed = 0;
2473: break;
2474: }
2475: SKIP_BLANKS;
2476:
1.66 daniel 2477: type = xmlParseAttributeType(ctxt, &tree);
1.59 daniel 2478: if (type <= 0) break;
1.22 daniel 2479:
1.59 daniel 2480: if (!IS_BLANK(CUR)) {
2481: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2482: ctxt->sax->error(ctxt->userData,
1.59 daniel 2483: "Space required after the attribute type\n");
2484: ctxt->wellFormed = 0;
2485: break;
2486: }
1.42 daniel 2487: SKIP_BLANKS;
1.59 daniel 2488:
2489: def = xmlParseDefaultDecl(ctxt, &defaultValue);
2490: if (def <= 0) break;
2491:
2492: if (CUR != '>') {
2493: if (!IS_BLANK(CUR)) {
2494: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2495: ctxt->sax->error(ctxt->userData,
1.59 daniel 2496: "Space required after the attribute default value\n");
2497: ctxt->wellFormed = 0;
2498: break;
2499: }
2500: SKIP_BLANKS;
2501: }
1.40 daniel 2502: if (check == CUR_PTR) {
1.55 daniel 2503: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2504: ctxt->sax->error(ctxt->userData,
1.59 daniel 2505: "xmlParseAttributeListDecl: detected internal error\n");
1.22 daniel 2506: break;
2507: }
1.72 daniel 2508: if ((ctxt->sax != NULL) && (ctxt->sax->attributeDecl != NULL))
1.74 daniel 2509: ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
1.66 daniel 2510: type, def, defaultValue, tree);
1.59 daniel 2511: if (attrName != NULL)
2512: free(attrName);
2513: if (defaultValue != NULL)
2514: free(defaultValue);
1.22 daniel 2515: }
1.40 daniel 2516: if (CUR == '>')
2517: NEXT;
1.22 daniel 2518:
1.59 daniel 2519: free(elemName);
1.22 daniel 2520: }
2521: }
2522:
1.50 daniel 2523: /**
1.61 daniel 2524: * xmlParseElementMixedContentDecl:
2525: * @ctxt: an XML parser context
2526: *
2527: * parse the declaration for a Mixed Element content
2528: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
2529: *
2530: * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
2531: * '(' S? '#PCDATA' S? ')'
2532: *
2533: * returns: the list of the xmlElementContentPtr describing the element choices
2534: */
2535: xmlElementContentPtr
1.62 daniel 2536: xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
1.64 daniel 2537: xmlElementContentPtr ret = NULL, cur = NULL, n;
1.61 daniel 2538: CHAR *elem = NULL;
2539:
2540: if ((CUR == '#') && (NXT(1) == 'P') &&
2541: (NXT(2) == 'C') && (NXT(3) == 'D') &&
2542: (NXT(4) == 'A') && (NXT(5) == 'T') &&
2543: (NXT(6) == 'A')) {
2544: SKIP(7);
2545: SKIP_BLANKS;
1.63 daniel 2546: if (CUR == ')') {
2547: NEXT;
2548: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2549: return(ret);
2550: }
1.61 daniel 2551: if ((CUR == '(') || (CUR == '|')) {
2552: ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
2553: if (ret == NULL) return(NULL);
1.63 daniel 2554: } /********** else {
1.61 daniel 2555: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2556: ctxt->sax->error(ctxt->userData,
1.61 daniel 2557: "xmlParseElementMixedContentDecl : '|' or ')' expected\n");
2558: ctxt->wellFormed = 0;
2559: return(NULL);
1.63 daniel 2560: } **********/
1.61 daniel 2561: while (CUR == '|') {
1.64 daniel 2562: NEXT;
1.61 daniel 2563: if (elem == NULL) {
2564: ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2565: if (ret == NULL) return(NULL);
2566: ret->c1 = cur;
1.64 daniel 2567: cur = ret;
1.61 daniel 2568: } else {
1.64 daniel 2569: n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2570: if (n == NULL) return(NULL);
2571: n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2572: cur->c2 = n;
2573: cur = n;
1.66 daniel 2574: free(elem);
1.61 daniel 2575: }
2576: SKIP_BLANKS;
2577: elem = xmlParseName(ctxt);
2578: if (elem == NULL) {
2579: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2580: ctxt->sax->error(ctxt->userData,
1.61 daniel 2581: "xmlParseElementMixedContentDecl : Name expected\n");
2582: ctxt->wellFormed = 0;
2583: xmlFreeElementContent(cur);
2584: return(NULL);
2585: }
2586: SKIP_BLANKS;
2587: }
1.63 daniel 2588: if ((CUR == ')') && (NXT(1) == '*')) {
1.66 daniel 2589: if (elem != NULL) {
1.61 daniel 2590: cur->c2 = xmlNewElementContent(elem,
2591: XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 2592: free(elem);
2593: }
1.65 daniel 2594: ret->ocur = XML_ELEMENT_CONTENT_MULT;
1.64 daniel 2595: SKIP(2);
1.61 daniel 2596: } else {
1.66 daniel 2597: if (elem != NULL) free(elem);
1.61 daniel 2598: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2599: ctxt->sax->error(ctxt->userData,
1.63 daniel 2600: "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
1.61 daniel 2601: ctxt->wellFormed = 0;
2602: xmlFreeElementContent(ret);
2603: return(NULL);
2604: }
2605:
2606: } else {
2607: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2608: ctxt->sax->error(ctxt->userData,
1.61 daniel 2609: "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
2610: ctxt->wellFormed = 0;
2611: }
2612: return(ret);
2613: }
2614:
2615: /**
2616: * xmlParseElementChildrenContentDecl:
1.50 daniel 2617: * @ctxt: an XML parser context
2618: *
1.61 daniel 2619: * parse the declaration for a Mixed Element content
2620: * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
1.22 daniel 2621: *
1.61 daniel 2622: *
1.22 daniel 2623: * [47] children ::= (choice | seq) ('?' | '*' | '+')?
2624: *
2625: * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
2626: *
2627: * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
2628: *
2629: * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
2630: *
1.62 daniel 2631: * returns: the tree of xmlElementContentPtr describing the element
1.61 daniel 2632: * hierarchy.
2633: */
2634: xmlElementContentPtr
1.62 daniel 2635: xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
1.63 daniel 2636: xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
1.62 daniel 2637: CHAR *elem;
2638: CHAR type = 0;
2639:
2640: SKIP_BLANKS;
2641: if (CUR == '(') {
1.63 daniel 2642: /* Recurse on first child */
1.62 daniel 2643: NEXT;
2644: SKIP_BLANKS;
2645: cur = ret = xmlParseElementChildrenContentDecl(ctxt);
2646: SKIP_BLANKS;
2647: } else {
2648: elem = xmlParseName(ctxt);
2649: if (elem == NULL) {
2650: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2651: ctxt->sax->error(ctxt->userData,
1.62 daniel 2652: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2653: ctxt->wellFormed = 0;
2654: return(NULL);
2655: }
2656: cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
2657: if (CUR == '?') {
2658: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2659: NEXT;
2660: } else if (CUR == '*') {
2661: ret->ocur = XML_ELEMENT_CONTENT_MULT;
2662: NEXT;
2663: } else if (CUR == '+') {
2664: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2665: NEXT;
2666: } else {
2667: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2668: }
1.66 daniel 2669: free(elem);
1.62 daniel 2670: }
2671: SKIP_BLANKS;
2672: while (CUR != ')') {
1.63 daniel 2673: /*
2674: * Each loop we parse one separator and one element.
2675: */
1.62 daniel 2676: if (CUR == ',') {
2677: if (type == 0) type = CUR;
2678:
2679: /*
2680: * Detect "Name | Name , Name" error
2681: */
2682: else if (type != CUR) {
2683: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2684: ctxt->sax->error(ctxt->userData,
1.62 daniel 2685: "xmlParseElementChildrenContentDecl : '%c' expected\n",
2686: type);
2687: ctxt->wellFormed = 0;
2688: xmlFreeElementContent(ret);
2689: return(NULL);
2690: }
1.64 daniel 2691: NEXT;
1.62 daniel 2692:
1.63 daniel 2693: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
2694: if (op == NULL) {
2695: xmlFreeElementContent(ret);
2696: return(NULL);
2697: }
2698: if (last == NULL) {
2699: op->c1 = ret;
1.65 daniel 2700: ret = cur = op;
1.63 daniel 2701: } else {
2702: cur->c2 = op;
2703: op->c1 = last;
2704: cur =op;
1.65 daniel 2705: last = NULL;
1.63 daniel 2706: }
1.62 daniel 2707: } else if (CUR == '|') {
2708: if (type == 0) type = CUR;
2709:
2710: /*
1.63 daniel 2711: * Detect "Name , Name | Name" error
1.62 daniel 2712: */
2713: else if (type != CUR) {
2714: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2715: ctxt->sax->error(ctxt->userData,
1.62 daniel 2716: "xmlParseElementChildrenContentDecl : '%c' expected\n",
2717: type);
2718: ctxt->wellFormed = 0;
2719: xmlFreeElementContent(ret);
2720: return(NULL);
2721: }
1.64 daniel 2722: NEXT;
1.62 daniel 2723:
1.63 daniel 2724: op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
2725: if (op == NULL) {
2726: xmlFreeElementContent(ret);
2727: return(NULL);
2728: }
2729: if (last == NULL) {
2730: op->c1 = ret;
1.65 daniel 2731: ret = cur = op;
1.63 daniel 2732: } else {
2733: cur->c2 = op;
2734: op->c1 = last;
2735: cur =op;
1.65 daniel 2736: last = NULL;
1.63 daniel 2737: }
1.62 daniel 2738: } else {
2739: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2740: ctxt->sax->error(ctxt->userData,
1.62 daniel 2741: "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
2742: ctxt->wellFormed = 0;
2743: xmlFreeElementContent(ret);
2744: return(NULL);
2745: }
2746: SKIP_BLANKS;
2747: if (CUR == '(') {
1.63 daniel 2748: /* Recurse on second child */
1.62 daniel 2749: NEXT;
2750: SKIP_BLANKS;
1.65 daniel 2751: last = xmlParseElementChildrenContentDecl(ctxt);
1.62 daniel 2752: SKIP_BLANKS;
2753: } else {
2754: elem = xmlParseName(ctxt);
2755: if (elem == NULL) {
2756: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2757: ctxt->sax->error(ctxt->userData,
1.62 daniel 2758: "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
2759: ctxt->wellFormed = 0;
2760: return(NULL);
2761: }
1.65 daniel 2762: last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
1.66 daniel 2763: free(elem);
1.62 daniel 2764: }
1.63 daniel 2765: if (CUR == '?') {
2766: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2767: NEXT;
2768: } else if (CUR == '*') {
2769: ret->ocur = XML_ELEMENT_CONTENT_MULT;
2770: NEXT;
2771: } else if (CUR == '+') {
2772: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2773: NEXT;
2774: } else {
2775: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2776: }
2777: SKIP_BLANKS;
1.64 daniel 2778: }
1.65 daniel 2779: if ((cur != NULL) && (last != NULL)) {
2780: cur->c2 = last;
1.62 daniel 2781: }
2782: NEXT;
2783: if (CUR == '?') {
2784: ret->ocur = XML_ELEMENT_CONTENT_OPT;
2785: NEXT;
2786: } else if (CUR == '*') {
2787: ret->ocur = XML_ELEMENT_CONTENT_MULT;
2788: NEXT;
2789: } else if (CUR == '+') {
2790: ret->ocur = XML_ELEMENT_CONTENT_PLUS;
2791: NEXT;
2792: } else {
2793: ret->ocur = XML_ELEMENT_CONTENT_ONCE;
2794: }
2795: return(ret);
1.61 daniel 2796: }
2797:
2798: /**
2799: * xmlParseElementContentDecl:
2800: * @ctxt: an XML parser context
2801: * @name: the name of the element being defined.
2802: * @result: the Element Content pointer will be stored here if any
1.22 daniel 2803: *
1.61 daniel 2804: * parse the declaration for an Element content either Mixed or Children,
2805: * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
2806: *
2807: * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
1.50 daniel 2808: *
1.61 daniel 2809: * returns: the type of element content XML_ELEMENT_TYPE_xxx
1.22 daniel 2810: */
2811:
1.61 daniel 2812: int
2813: xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name,
2814: xmlElementContentPtr *result) {
2815:
2816: xmlElementContentPtr tree = NULL;
2817: int res;
2818:
2819: *result = NULL;
2820:
2821: if (CUR != '(') {
2822: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2823: ctxt->sax->error(ctxt->userData,
1.61 daniel 2824: "xmlParseElementContentDecl : '(' expected\n");
2825: ctxt->wellFormed = 0;
2826: return(-1);
2827: }
2828: NEXT;
2829: SKIP_BLANKS;
2830: if ((CUR == '#') && (NXT(1) == 'P') &&
2831: (NXT(2) == 'C') && (NXT(3) == 'D') &&
2832: (NXT(4) == 'A') && (NXT(5) == 'T') &&
2833: (NXT(6) == 'A')) {
1.62 daniel 2834: tree = xmlParseElementMixedContentDecl(ctxt);
1.61 daniel 2835: res = XML_ELEMENT_TYPE_MIXED;
2836: } else {
1.62 daniel 2837: tree = xmlParseElementChildrenContentDecl(ctxt);
1.61 daniel 2838: res = XML_ELEMENT_TYPE_ELEMENT;
2839: }
2840: SKIP_BLANKS;
1.63 daniel 2841: /****************************
1.61 daniel 2842: if (CUR != ')') {
2843: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2844: ctxt->sax->error(ctxt->userData,
1.61 daniel 2845: "xmlParseElementContentDecl : ')' expected\n");
2846: ctxt->wellFormed = 0;
2847: return(-1);
2848: }
1.63 daniel 2849: ****************************/
2850: *result = tree;
1.61 daniel 2851: return(res);
1.22 daniel 2852: }
2853:
1.50 daniel 2854: /**
2855: * xmlParseElementDecl:
2856: * @ctxt: an XML parser context
2857: *
2858: * parse an Element declaration.
1.22 daniel 2859: *
2860: * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
2861: *
2862: * TODO There is a check [ VC: Unique Element Type Declaration ]
1.69 daniel 2863: *
2864: * Returns the type of the element, or -1 in case of error
1.22 daniel 2865: */
1.59 daniel 2866: int
1.55 daniel 2867: xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2868: CHAR *name;
1.59 daniel 2869: int ret = -1;
1.61 daniel 2870: xmlElementContentPtr content = NULL;
1.22 daniel 2871:
1.40 daniel 2872: if ((CUR == '<') && (NXT(1) == '!') &&
2873: (NXT(2) == 'E') && (NXT(3) == 'L') &&
2874: (NXT(4) == 'E') && (NXT(5) == 'M') &&
2875: (NXT(6) == 'E') && (NXT(7) == 'N') &&
1.59 daniel 2876: (NXT(8) == 'T')) {
1.40 daniel 2877: SKIP(9);
1.59 daniel 2878: if (!IS_BLANK(CUR)) {
2879: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2880: ctxt->sax->error(ctxt->userData,
1.59 daniel 2881: "Space required after 'ELEMENT'\n");
2882: ctxt->wellFormed = 0;
2883: }
1.42 daniel 2884: SKIP_BLANKS;
1.22 daniel 2885: name = xmlParseName(ctxt);
2886: if (name == NULL) {
1.55 daniel 2887: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2888: ctxt->sax->error(ctxt->userData,
1.59 daniel 2889: "xmlParseElementDecl: no name for Element\n");
2890: ctxt->wellFormed = 0;
2891: return(-1);
2892: }
2893: if (!IS_BLANK(CUR)) {
2894: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2895: ctxt->sax->error(ctxt->userData,
1.59 daniel 2896: "Space required after the element name\n");
2897: ctxt->wellFormed = 0;
1.22 daniel 2898: }
1.42 daniel 2899: SKIP_BLANKS;
1.40 daniel 2900: if ((CUR == 'E') && (NXT(1) == 'M') &&
2901: (NXT(2) == 'P') && (NXT(3) == 'T') &&
2902: (NXT(4) == 'Y')) {
2903: SKIP(5);
1.22 daniel 2904: /*
2905: * Element must always be empty.
2906: */
1.59 daniel 2907: ret = XML_ELEMENT_TYPE_EMPTY;
1.40 daniel 2908: } else if ((CUR == 'A') && (NXT(1) == 'N') &&
2909: (NXT(2) == 'Y')) {
2910: SKIP(3);
1.22 daniel 2911: /*
2912: * Element is a generic container.
2913: */
1.59 daniel 2914: ret = XML_ELEMENT_TYPE_ANY;
1.61 daniel 2915: } else if (CUR == '(') {
2916: ret = xmlParseElementContentDecl(ctxt, name, &content);
1.22 daniel 2917: } else {
1.61 daniel 2918: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2919: ctxt->sax->error(ctxt->userData,
1.61 daniel 2920: "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
2921: ctxt->wellFormed = 0;
2922: if (name != NULL) free(name);
2923: return(-1);
1.22 daniel 2924: }
1.42 daniel 2925: SKIP_BLANKS;
1.40 daniel 2926: if (CUR != '>') {
1.55 daniel 2927: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 2928: ctxt->sax->error(ctxt->userData,
1.31 daniel 2929: "xmlParseElementDecl: expected '>' at the end\n");
1.59 daniel 2930: ctxt->wellFormed = 0;
1.61 daniel 2931: } else {
1.40 daniel 2932: NEXT;
1.72 daniel 2933: if ((ctxt->sax != NULL) && (ctxt->sax->elementDecl != NULL))
1.76 daniel 2934: ctxt->sax->elementDecl(ctxt->userData, name, ret,
2935: content);
1.61 daniel 2936: }
1.84 daniel 2937: if (content != NULL) {
2938: xmlFreeElementContent(content);
2939: }
1.61 daniel 2940: if (name != NULL) {
2941: free(name);
2942: }
1.22 daniel 2943: }
1.59 daniel 2944: return(ret);
1.22 daniel 2945: }
2946:
1.50 daniel 2947: /**
2948: * xmlParseMarkupDecl:
2949: * @ctxt: an XML parser context
2950: *
2951: * parse Markup declarations
1.22 daniel 2952: *
2953: * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
2954: * NotationDecl | PI | Comment
2955: *
2956: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
2957: */
1.55 daniel 2958: void
2959: xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
1.22 daniel 2960: xmlParseElementDecl(ctxt);
2961: xmlParseAttributeListDecl(ctxt);
2962: xmlParseEntityDecl(ctxt);
2963: xmlParseNotationDecl(ctxt);
2964: xmlParsePI(ctxt);
1.31 daniel 2965: xmlParseComment(ctxt, 0);
1.22 daniel 2966: }
2967:
1.50 daniel 2968: /**
1.76 daniel 2969: * xmlParseTextDecl:
2970: * @ctxt: an XML parser context
2971: *
2972: * parse an XML declaration header for external entities
2973: *
2974: * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
2975: *
2976: * Returns the only valuable info for an external parsed entity, the encoding
2977: */
2978:
2979: CHAR *
2980: xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
2981: CHAR *version;
2982: CHAR *encoding = NULL;
2983:
2984: /*
2985: * We know that '<?xml' is here.
2986: */
2987: SKIP(5);
2988:
2989: if (!IS_BLANK(CUR)) {
2990: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2991: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
2992: ctxt->wellFormed = 0;
2993: }
2994: SKIP_BLANKS;
2995:
2996: /*
2997: * We may have the VersionInfo here.
2998: */
2999: version = xmlParseVersionInfo(ctxt);
3000: /* TODO: we should actually inherit from the referencing doc if absent
3001: if (version == NULL)
3002: version = xmlCharStrdup(XML_DEFAULT_VERSION);
3003: ctxt->version = xmlStrdup(version);
3004: */
3005: if (version != NULL)
3006: free(version);
3007:
3008: /*
3009: * We must have the encoding declaration
3010: */
3011: if (!IS_BLANK(CUR)) {
3012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3013: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
3014: ctxt->wellFormed = 0;
3015: }
3016: encoding = xmlParseEncodingDecl(ctxt);
3017:
3018: SKIP_BLANKS;
3019: if ((CUR == '?') && (NXT(1) == '>')) {
3020: SKIP(2);
3021: } else if (CUR == '>') {
3022: /* Deprecated old WD ... */
3023: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3024: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
3025: ctxt->wellFormed = 0;
3026: NEXT;
3027: } else {
3028: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3029: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
3030: ctxt->wellFormed = 0;
3031: MOVETO_ENDTAG(CUR_PTR);
3032: NEXT;
3033: }
3034: return(encoding);
3035: }
3036:
3037: /*
3038: * xmlParseConditionalSections
3039: * @ctxt: an XML parser context
3040: *
3041: * TODO : Conditionnal section are not yet supported !
3042: *
3043: * [61] conditionalSect ::= includeSect | ignoreSect
3044: * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
3045: * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
3046: * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
3047: * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
3048: */
3049:
3050: void
3051: xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
3052: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3053: ctxt->sax->warning(ctxt->userData,
3054: "XML conditional section not supported\n");
3055: /*
3056: * Skip up to the end of the conditionnal section.
3057: */
3058: while ((CUR != 0) && ((CUR != ']') || (NXT(1) != ']') || (NXT(2) != '>')))
3059: NEXT;
3060: if (CUR == 0) {
3061: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3062: ctxt->sax->error(ctxt->userData,
3063: "XML conditional section not closed\n");
3064: ctxt->wellFormed = 0;
3065: }
3066: }
3067:
3068: /**
3069: * xmlParseExternalSubset
3070: * @ctxt: an XML parser context
3071: *
3072: * parse Markup declarations from an external subset
3073: *
3074: * [30] extSubset ::= textDecl? extSubsetDecl
3075: *
3076: * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
3077: *
3078: * TODO There is a check [ VC: Proper Declaration/PE Nesting ]
3079: */
3080: void
1.79 daniel 3081: xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const CHAR *ExternalID,
3082: const CHAR *SystemID) {
1.76 daniel 3083: if ((CUR == '<') && (NXT(1) == '?') &&
3084: (NXT(2) == 'x') && (NXT(3) == 'm') &&
3085: (NXT(4) == 'l')) {
3086: xmlParseTextDecl(ctxt);
3087: }
1.79 daniel 3088: if (ctxt->myDoc == NULL) {
3089: ctxt->myDoc = xmlNewDoc("1.0");
3090: }
3091: if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
3092: xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
3093:
1.76 daniel 3094: while (((CUR == '<') && (NXT(1) == '?')) ||
3095: ((CUR == '<') && (NXT(1) == '!')) ||
3096: IS_BLANK(CUR)) {
3097: if ((CUR == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
3098: xmlParseConditionalSections(ctxt);
3099: } else if (IS_BLANK(CUR)) {
3100: NEXT;
3101: } else if (CUR == '%') {
3102: xmlParsePEReference(ctxt);
3103: } else
3104: xmlParseMarkupDecl(ctxt);
1.77 daniel 3105:
3106: /*
3107: * Pop-up of finished entities.
3108: */
3109: while ((CUR == 0) && (ctxt->inputNr > 1))
3110: xmlPopInput(ctxt);
3111:
1.76 daniel 3112: }
3113:
3114: if (CUR != 0) {
3115: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3116: ctxt->sax->error(ctxt->userData,
3117: "Extra content at the end of the document\n");
3118: ctxt->wellFormed = 0;
3119: }
3120:
3121: }
3122:
3123: /**
1.50 daniel 3124: * xmlParseCharRef:
3125: * @ctxt: an XML parser context
3126: *
3127: * parse Reference declarations
1.24 daniel 3128: *
3129: * [66] CharRef ::= '&#' [0-9]+ ';' |
3130: * '&#x' [0-9a-fA-F]+ ';'
1.68 daniel 3131: *
1.77 daniel 3132: * Returns the value parsed (as an int)
1.24 daniel 3133: */
1.77 daniel 3134: int
1.55 daniel 3135: xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1.29 daniel 3136: int val = 0;
1.24 daniel 3137:
1.40 daniel 3138: if ((CUR == '&') && (NXT(1) == '#') &&
3139: (NXT(2) == 'x')) {
3140: SKIP(3);
3141: while (CUR != ';') {
3142: if ((CUR >= '0') && (CUR <= '9'))
3143: val = val * 16 + (CUR - '0');
3144: else if ((CUR >= 'a') && (CUR <= 'f'))
3145: val = val * 16 + (CUR - 'a') + 10;
3146: else if ((CUR >= 'A') && (CUR <= 'F'))
3147: val = val * 16 + (CUR - 'A') + 10;
1.24 daniel 3148: else {
1.55 daniel 3149: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3150: ctxt->sax->error(ctxt->userData,
1.59 daniel 3151: "xmlParseCharRef: invalid hexadecimal value\n");
3152: ctxt->wellFormed = 0;
1.29 daniel 3153: val = 0;
1.24 daniel 3154: break;
3155: }
1.47 daniel 3156: NEXT;
1.24 daniel 3157: }
1.55 daniel 3158: if (CUR == ';')
1.40 daniel 3159: NEXT;
3160: } else if ((CUR == '&') && (NXT(1) == '#')) {
3161: SKIP(2);
3162: while (CUR != ';') {
3163: if ((CUR >= '0') && (CUR <= '9'))
1.55 daniel 3164: val = val * 10 + (CUR - '0');
1.24 daniel 3165: else {
1.55 daniel 3166: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3167: ctxt->sax->error(ctxt->userData,
1.58 daniel 3168: "xmlParseCharRef: invalid decimal value\n");
1.59 daniel 3169: ctxt->wellFormed = 0;
1.29 daniel 3170: val = 0;
1.24 daniel 3171: break;
3172: }
1.47 daniel 3173: NEXT;
1.24 daniel 3174: }
1.55 daniel 3175: if (CUR == ';')
1.40 daniel 3176: NEXT;
1.24 daniel 3177: } else {
1.55 daniel 3178: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3179: ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid value\n");
1.59 daniel 3180: ctxt->wellFormed = 0;
1.24 daniel 3181: }
1.29 daniel 3182: /*
3183: * Check the value IS_CHAR ...
3184: */
1.44 daniel 3185: if (IS_CHAR(val)) {
1.77 daniel 3186: return(val);
1.44 daniel 3187: } else {
1.55 daniel 3188: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3189: ctxt->sax->error(ctxt->userData, "xmlParseCharRef: invalid CHAR value %d\n",
1.58 daniel 3190: val);
1.59 daniel 3191: ctxt->wellFormed = 0;
1.29 daniel 3192: }
1.77 daniel 3193: return(0);
3194: }
3195:
3196: /**
3197: * xmlParseReference:
3198: * @ctxt: an XML parser context
3199: *
3200: * parse and handle entity references in content, depending on the SAX
3201: * interface, this may end-up in a call to character() if this is a
1.79 daniel 3202: * CharRef, a predefined entity, if there is no reference() callback.
3203: * or if the parser was asked to switch to that mode.
1.77 daniel 3204: *
3205: * [67] Reference ::= EntityRef | CharRef
3206: */
3207: void
3208: xmlParseReference(xmlParserCtxtPtr ctxt) {
3209: xmlEntityPtr ent;
3210: CHAR *val;
3211: if (CUR != '&') return;
3212:
3213: if (NXT(1) == '#') {
3214: CHAR out[2];
3215: int val = xmlParseCharRef(ctxt);
3216: /* TODO: invalid for UTF-8 variable encoding !!! */
3217: out[0] = val;
3218: out[1] = 0;
3219: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3220: ctxt->sax->characters(ctxt->userData, out, 1);
3221: } else {
3222: ent = xmlParseEntityRef(ctxt);
3223: if (ent == NULL) return;
3224: if ((ent->name != NULL) &&
3225: (ent->type != XML_INTERNAL_PREDEFINED_ENTITY) &&
1.79 daniel 3226: (ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
3227: (ctxt->replaceEntities == 0)) {
3228:
1.77 daniel 3229: /*
3230: * Create a node.
3231: */
3232: ctxt->sax->reference(ctxt->userData, ent->name);
3233: return;
3234: }
3235: val = ent->content;
3236: if (val == NULL) return;
3237: /*
3238: * inline the entity.
3239: */
3240: if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL))
3241: ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
3242: }
1.24 daniel 3243: }
3244:
1.50 daniel 3245: /**
3246: * xmlParseEntityRef:
3247: * @ctxt: an XML parser context
3248: *
3249: * parse ENTITY references declarations
1.24 daniel 3250: *
3251: * [68] EntityRef ::= '&' Name ';'
1.68 daniel 3252: *
1.77 daniel 3253: * Returns the xmlEntityPtr if found, or NULL otherwise.
1.24 daniel 3254: */
1.77 daniel 3255: xmlEntityPtr
1.55 daniel 3256: xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
1.84 daniel 3257: const CHAR *q; /* !!!!!!!!!!! Unused !!!!!!!!!! */
1.24 daniel 3258: CHAR *name;
1.72 daniel 3259: xmlEntityPtr ent = NULL;
1.24 daniel 3260:
1.50 daniel 3261: q = CUR_PTR;
1.40 daniel 3262: if (CUR == '&') {
3263: NEXT;
1.24 daniel 3264: name = xmlParseName(ctxt);
3265: if (name == NULL) {
1.55 daniel 3266: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3267: ctxt->sax->error(ctxt->userData, "xmlParseEntityRef: no name\n");
1.59 daniel 3268: ctxt->wellFormed = 0;
1.24 daniel 3269: } else {
1.40 daniel 3270: if (CUR == ';') {
3271: NEXT;
1.24 daniel 3272: /*
1.77 daniel 3273: * Ask first SAX for entity resolution, otherwise try the
3274: * predefined set.
3275: */
3276: if (ctxt->sax != NULL) {
3277: if (ctxt->sax->getEntity != NULL)
3278: ent = ctxt->sax->getEntity(ctxt->userData, name);
3279: if (ent == NULL)
3280: ent = xmlGetPredefinedEntity(name);
3281: }
3282:
3283: /*
1.59 daniel 3284: * Well Formedness Constraint if:
3285: * - standalone
3286: * or
3287: * - no external subset and no external parameter entities
3288: * referenced
3289: * then
3290: * the entity referenced must have been declared
3291: *
1.72 daniel 3292: * TODO: to be double checked !!! This is wrong !
1.59 daniel 3293: */
1.77 daniel 3294: if (ent == NULL) {
3295: if (ctxt->sax != NULL) {
1.72 daniel 3296: if (((ctxt->sax->isStandalone != NULL) &&
1.77 daniel 3297: ctxt->sax->isStandalone(ctxt->userData) == 1) ||
1.72 daniel 3298: (((ctxt->sax->hasInternalSubset == NULL) ||
1.74 daniel 3299: ctxt->sax->hasInternalSubset(ctxt->userData) == 0) &&
1.72 daniel 3300: ((ctxt->sax->hasExternalSubset == NULL) ||
1.74 daniel 3301: ctxt->sax->hasExternalSubset(ctxt->userData) == 0))) {
1.77 daniel 3302: if (ctxt->sax->error != NULL)
3303: ctxt->sax->error(ctxt->userData,
3304: "Entity '%s' not defined\n", name);
3305: ctxt->wellFormed = 0;
3306: }
3307: } else {
3308: fprintf(stderr, "Entity '%s' not defined\n", name);
3309: ctxt->wellFormed = 0;
1.59 daniel 3310: }
1.77 daniel 3311: }
1.59 daniel 3312:
3313: /*
3314: * Well Formedness Constraint :
3315: * The referenced entity must be a parsed entity.
3316: */
3317: if (ent != NULL) {
3318: switch (ent->type) {
3319: case XML_INTERNAL_PARAMETER_ENTITY:
3320: case XML_EXTERNAL_PARAMETER_ENTITY:
3321: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3322: ctxt->sax->error(ctxt->userData,
1.59 daniel 3323: "Attempt to reference the parameter entity '%s'\n", name);
3324: ctxt->wellFormed = 0;
3325: break;
3326:
3327: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
3328: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3329: ctxt->sax->error(ctxt->userData,
1.59 daniel 3330: "Attempt to reference unparsed entity '%s'\n", name);
3331: ctxt->wellFormed = 0;
3332: break;
3333: }
3334: }
3335:
3336: /*
1.77 daniel 3337: * TODO: !!!
1.59 daniel 3338: * Well Formedness Constraint :
3339: * The referenced entity must not lead to recursion !
3340: */
3341:
1.77 daniel 3342:
1.24 daniel 3343: } else {
1.55 daniel 3344: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3345: ctxt->sax->error(ctxt->userData,
1.59 daniel 3346: "xmlParseEntityRef: expecting ';'\n");
3347: ctxt->wellFormed = 0;
1.24 daniel 3348: }
1.45 daniel 3349: free(name);
1.24 daniel 3350: }
3351: }
1.77 daniel 3352: return(ent);
1.24 daniel 3353: }
3354:
1.50 daniel 3355: /**
3356: * xmlParsePEReference:
3357: * @ctxt: an XML parser context
3358: *
3359: * parse PEReference declarations
1.77 daniel 3360: * The entity content is handled directly by pushing it's content as
3361: * a new input stream.
1.22 daniel 3362: *
3363: * [69] PEReference ::= '%' Name ';'
1.68 daniel 3364: *
1.22 daniel 3365: */
1.77 daniel 3366: void
1.55 daniel 3367: xmlParsePEReference(xmlParserCtxtPtr ctxt) {
1.22 daniel 3368: CHAR *name;
1.72 daniel 3369: xmlEntityPtr entity = NULL;
1.50 daniel 3370: xmlParserInputPtr input;
1.22 daniel 3371:
1.40 daniel 3372: if (CUR == '%') {
3373: NEXT;
1.22 daniel 3374: name = xmlParseName(ctxt);
3375: if (name == NULL) {
1.55 daniel 3376: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3377: ctxt->sax->error(ctxt->userData, "xmlParsePEReference: no name\n");
1.59 daniel 3378: ctxt->wellFormed = 0;
1.22 daniel 3379: } else {
1.40 daniel 3380: if (CUR == ';') {
3381: NEXT;
1.72 daniel 3382: if ((ctxt->sax != NULL) && (ctxt->sax->getEntity != NULL))
1.79 daniel 3383: entity = ctxt->sax->getEntity(ctxt->userData, name);
1.72 daniel 3384: /* TODO !!!! Must check that it's of the proper type !!! */
1.45 daniel 3385: if (entity == NULL) {
1.55 daniel 3386: if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
1.74 daniel 3387: ctxt->sax->warning(ctxt->userData,
1.59 daniel 3388: "xmlParsePEReference: %%%s; not found\n", name);
1.50 daniel 3389: } else {
3390: input = xmlNewEntityInputStream(ctxt, entity);
3391: xmlPushInput(ctxt, input);
1.45 daniel 3392: }
1.22 daniel 3393: } else {
1.55 daniel 3394: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3395: ctxt->sax->error(ctxt->userData,
1.59 daniel 3396: "xmlParsePEReference: expecting ';'\n");
3397: ctxt->wellFormed = 0;
1.22 daniel 3398: }
1.45 daniel 3399: free(name);
1.3 veillard 3400: }
3401: }
3402: }
3403:
1.50 daniel 3404: /**
3405: * xmlParseDocTypeDecl :
3406: * @ctxt: an XML parser context
3407: *
3408: * parse a DOCTYPE declaration
1.21 daniel 3409: *
1.22 daniel 3410: * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
3411: * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1.21 daniel 3412: */
3413:
1.55 daniel 3414: void
3415: xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
1.21 daniel 3416: CHAR *name;
3417: CHAR *ExternalID = NULL;
1.39 daniel 3418: CHAR *URI = NULL;
1.21 daniel 3419:
3420: /*
3421: * We know that '<!DOCTYPE' has been detected.
3422: */
1.40 daniel 3423: SKIP(9);
1.21 daniel 3424:
1.42 daniel 3425: SKIP_BLANKS;
1.21 daniel 3426:
3427: /*
3428: * Parse the DOCTYPE name.
3429: */
3430: name = xmlParseName(ctxt);
3431: if (name == NULL) {
1.55 daniel 3432: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3433: ctxt->sax->error(ctxt->userData, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
1.59 daniel 3434: ctxt->wellFormed = 0;
1.21 daniel 3435: }
3436:
1.42 daniel 3437: SKIP_BLANKS;
1.21 daniel 3438:
3439: /*
1.22 daniel 3440: * Check for SystemID and ExternalID
3441: */
1.67 daniel 3442: URI = xmlParseExternalID(ctxt, &ExternalID, 1);
1.42 daniel 3443: SKIP_BLANKS;
1.36 daniel 3444:
1.76 daniel 3445: /*
3446: * NOTE: the SAX callback may try to fetch the external subset
3447: * entity and fill it up !
3448: */
1.72 daniel 3449: if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL))
1.74 daniel 3450: ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
1.22 daniel 3451:
3452: /*
3453: * Is there any DTD definition ?
3454: */
1.40 daniel 3455: if (CUR == '[') {
3456: NEXT;
1.22 daniel 3457: /*
3458: * Parse the succession of Markup declarations and
3459: * PEReferences.
3460: * Subsequence (markupdecl | PEReference | S)*
3461: */
1.40 daniel 3462: while (CUR != ']') {
3463: const CHAR *check = CUR_PTR;
1.22 daniel 3464:
1.42 daniel 3465: SKIP_BLANKS;
1.22 daniel 3466: xmlParseMarkupDecl(ctxt);
1.50 daniel 3467: xmlParsePEReference(ctxt);
1.22 daniel 3468:
1.40 daniel 3469: if (CUR_PTR == check) {
1.55 daniel 3470: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3471: ctxt->sax->error(ctxt->userData,
1.31 daniel 3472: "xmlParseDocTypeDecl: error detected in Markup declaration\n");
1.59 daniel 3473: ctxt->wellFormed = 0;
1.22 daniel 3474: break;
3475: }
1.77 daniel 3476:
3477: /*
3478: * Pop-up of finished entities.
3479: */
3480: while ((CUR == 0) && (ctxt->inputNr > 1))
3481: xmlPopInput(ctxt);
3482:
1.22 daniel 3483: }
1.40 daniel 3484: if (CUR == ']') NEXT;
1.22 daniel 3485: }
3486:
3487: /*
3488: * We should be at the end of the DOCTYPE declaration.
1.21 daniel 3489: */
1.40 daniel 3490: if (CUR != '>') {
1.55 daniel 3491: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3492: ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
1.59 daniel 3493: ctxt->wellFormed = 0;
1.22 daniel 3494: /* We shouldn't try to resynchronize ... */
1.21 daniel 3495: }
1.40 daniel 3496: NEXT;
1.22 daniel 3497:
3498: /*
3499: * Cleanup, since we don't use all those identifiers
3500: * TODO : the DOCTYPE if available should be stored !
3501: */
1.39 daniel 3502: if (URI != NULL) free(URI);
1.22 daniel 3503: if (ExternalID != NULL) free(ExternalID);
3504: if (name != NULL) free(name);
1.21 daniel 3505: }
3506:
1.50 daniel 3507: /**
3508: * xmlParseAttribute:
3509: * @ctxt: an XML parser context
1.72 daniel 3510: * @value: a CHAR ** used to store the value of the attribute
1.50 daniel 3511: *
3512: * parse an attribute
1.3 veillard 3513: *
1.22 daniel 3514: * [41] Attribute ::= Name Eq AttValue
3515: *
3516: * [25] Eq ::= S? '=' S?
3517: *
1.29 daniel 3518: * With namespace:
3519: *
3520: * [NS 11] Attribute ::= QName Eq AttValue
1.43 daniel 3521: *
3522: * Also the case QName == xmlns:??? is handled independently as a namespace
3523: * definition.
1.69 daniel 3524: *
1.72 daniel 3525: * Returns the attribute name, and the value in *value.
1.3 veillard 3526: */
3527:
1.72 daniel 3528: CHAR *
3529: xmlParseAttribute(xmlParserCtxtPtr ctxt, CHAR **value) {
1.59 daniel 3530: CHAR *name, *val;
1.3 veillard 3531:
1.72 daniel 3532: *value = NULL;
3533: name = xmlParseName(ctxt);
1.22 daniel 3534: if (name == NULL) {
1.55 daniel 3535: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3536: ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
1.59 daniel 3537: ctxt->wellFormed = 0;
1.52 daniel 3538: return(NULL);
1.3 veillard 3539: }
3540:
3541: /*
1.29 daniel 3542: * read the value
1.3 veillard 3543: */
1.42 daniel 3544: SKIP_BLANKS;
1.40 daniel 3545: if (CUR == '=') {
3546: NEXT;
1.42 daniel 3547: SKIP_BLANKS;
1.72 daniel 3548: val = xmlParseAttValue(ctxt);
1.29 daniel 3549: } else {
1.55 daniel 3550: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3551: ctxt->sax->error(ctxt->userData,
1.59 daniel 3552: "Specification mandate value for attribute %s\n", name);
3553: ctxt->wellFormed = 0;
1.52 daniel 3554: return(NULL);
1.43 daniel 3555: }
3556:
1.72 daniel 3557: *value = val;
3558: return(name);
1.3 veillard 3559: }
3560:
1.50 daniel 3561: /**
3562: * xmlParseStartTag:
3563: * @ctxt: an XML parser context
3564: *
3565: * parse a start of tag either for rule element or
3566: * EmptyElement. In both case we don't parse the tag closing chars.
1.27 daniel 3567: *
3568: * [40] STag ::= '<' Name (S Attribute)* S? '>'
3569: *
1.29 daniel 3570: * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
3571: *
3572: * With namespace:
3573: *
3574: * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
3575: *
3576: * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
1.83 daniel 3577: *
3578: * Returns the element name parsed
1.2 veillard 3579: */
3580:
1.83 daniel 3581: CHAR *
1.69 daniel 3582: xmlParseStartTag(xmlParserCtxtPtr ctxt) {
1.72 daniel 3583: CHAR *name;
3584: CHAR *attname;
3585: CHAR *attvalue;
3586: const CHAR **atts = NULL;
3587: int nbatts = 0;
3588: int maxatts = 0;
3589: int i;
1.2 veillard 3590:
1.83 daniel 3591: if (CUR != '<') return(NULL);
1.40 daniel 3592: NEXT;
1.3 veillard 3593:
1.72 daniel 3594: name = xmlParseName(ctxt);
1.59 daniel 3595: if (name == NULL) {
3596: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3597: ctxt->sax->error(ctxt->userData,
1.59 daniel 3598: "xmlParseStartTag: invalid element name\n");
3599: ctxt->wellFormed = 0;
1.83 daniel 3600: return(NULL);
1.50 daniel 3601: }
3602:
3603: /*
1.3 veillard 3604: * Now parse the attributes, it ends up with the ending
3605: *
3606: * (S Attribute)* S?
3607: */
1.42 daniel 3608: SKIP_BLANKS;
1.40 daniel 3609: while ((IS_CHAR(CUR)) &&
3610: (CUR != '>') &&
3611: ((CUR != '/') || (NXT(1) != '>'))) {
3612: const CHAR *q = CUR_PTR;
1.29 daniel 3613:
1.72 daniel 3614: attname = xmlParseAttribute(ctxt, &attvalue);
3615: if ((attname != NULL) && (attvalue != NULL)) {
3616: /*
3617: * Well formedness requires at most one declaration of an attribute
3618: */
3619: for (i = 0; i < nbatts;i += 2) {
3620: if (!xmlStrcmp(atts[i], attname)) {
3621: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3622: ctxt->sax->error(ctxt->userData, "Attribute %s redefined\n",
1.72 daniel 3623: name);
3624: ctxt->wellFormed = 0;
3625: free(attname);
3626: free(attvalue);
3627: break;
3628: }
3629: }
3630:
3631: /*
3632: * Add the pair to atts
3633: */
3634: if (atts == NULL) {
3635: maxatts = 10;
3636: atts = (const CHAR **) malloc(maxatts * sizeof(CHAR *));
3637: if (atts == NULL) {
1.86 daniel 3638: fprintf(stderr, "malloc of %ld byte failed\n",
3639: maxatts * (long)sizeof(CHAR *));
1.83 daniel 3640: return(NULL);
1.72 daniel 3641: }
3642: } else if (nbatts + 2 < maxatts) {
3643: maxatts *= 2;
3644: atts = (const CHAR **) realloc(atts, maxatts * sizeof(CHAR *));
3645: if (atts == NULL) {
1.86 daniel 3646: fprintf(stderr, "realloc of %ld byte failed\n",
3647: maxatts * (long)sizeof(CHAR *));
1.83 daniel 3648: return(NULL);
1.72 daniel 3649: }
3650: }
3651: atts[nbatts++] = attname;
3652: atts[nbatts++] = attvalue;
3653: atts[nbatts] = NULL;
3654: atts[nbatts + 1] = NULL;
3655: }
3656:
1.42 daniel 3657: SKIP_BLANKS;
1.40 daniel 3658: if (q == CUR_PTR) {
1.55 daniel 3659: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3660: ctxt->sax->error(ctxt->userData,
1.31 daniel 3661: "xmlParseStartTag: problem parsing attributes\n");
1.59 daniel 3662: ctxt->wellFormed = 0;
1.29 daniel 3663: break;
1.3 veillard 3664: }
3665: }
3666:
1.43 daniel 3667: /*
1.72 daniel 3668: * SAX: Start of Element !
1.43 daniel 3669: */
1.72 daniel 3670: if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
1.74 daniel 3671: ctxt->sax->startElement(ctxt->userData, name, atts);
1.43 daniel 3672:
1.72 daniel 3673: if (atts != NULL) {
3674: for (i = 0;i < nbatts;i++) free((CHAR *) atts[i]);
3675: free(atts);
3676: }
1.83 daniel 3677: return(name);
1.3 veillard 3678: }
3679:
1.50 daniel 3680: /**
3681: * xmlParseEndTag:
3682: * @ctxt: an XML parser context
1.83 daniel 3683: * @tagname: the tag name as parsed in the opening tag.
1.50 daniel 3684: *
3685: * parse an end of tag
1.27 daniel 3686: *
3687: * [42] ETag ::= '</' Name S? '>'
1.29 daniel 3688: *
3689: * With namespace
3690: *
1.72 daniel 3691: * [NS 9] ETag ::= '</' QName S? '>'
1.7 veillard 3692: */
3693:
1.55 daniel 3694: void
1.83 daniel 3695: xmlParseEndTag(xmlParserCtxtPtr ctxt, CHAR *tagname) {
1.72 daniel 3696: CHAR *name;
1.7 veillard 3697:
1.40 daniel 3698: if ((CUR != '<') || (NXT(1) != '/')) {
1.55 daniel 3699: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3700: ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
1.59 daniel 3701: ctxt->wellFormed = 0;
1.27 daniel 3702: return;
3703: }
1.40 daniel 3704: SKIP(2);
1.7 veillard 3705:
1.72 daniel 3706: name = xmlParseName(ctxt);
1.7 veillard 3707:
3708: /*
3709: * We should definitely be at the ending "S? '>'" part
3710: */
1.42 daniel 3711: SKIP_BLANKS;
1.40 daniel 3712: if ((!IS_CHAR(CUR)) || (CUR != '>')) {
1.55 daniel 3713: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3714: ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
1.59 daniel 3715: ctxt->wellFormed = 0;
1.7 veillard 3716: } else
1.40 daniel 3717: NEXT;
1.7 veillard 3718:
1.72 daniel 3719: /*
1.83 daniel 3720: * Well formedness constraints, opening and closing must match.
3721: */
3722: if (xmlStrcmp(name, tagname)) {
3723: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3724: ctxt->sax->error(ctxt->userData,
3725: "Opening and ending tag mismatch: %s and %s\n", tagname, name);
3726: ctxt->wellFormed = 0;
3727: }
3728:
3729: /*
1.72 daniel 3730: * SAX: End of Tag
3731: */
3732: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.74 daniel 3733: ctxt->sax->endElement(ctxt->userData, name);
1.72 daniel 3734:
3735: if (name != NULL)
3736: free(name);
3737:
1.7 veillard 3738: return;
3739: }
3740:
1.50 daniel 3741: /**
3742: * xmlParseCDSect:
3743: * @ctxt: an XML parser context
3744: *
3745: * Parse escaped pure raw content.
1.29 daniel 3746: *
3747: * [18] CDSect ::= CDStart CData CDEnd
3748: *
3749: * [19] CDStart ::= '<![CDATA['
3750: *
3751: * [20] Data ::= (Char* - (Char* ']]>' Char*))
3752: *
3753: * [21] CDEnd ::= ']]>'
1.3 veillard 3754: */
1.55 daniel 3755: void
3756: xmlParseCDSect(xmlParserCtxtPtr ctxt) {
1.17 daniel 3757: const CHAR *r, *s, *base;
1.3 veillard 3758:
1.40 daniel 3759: if ((CUR == '<') && (NXT(1) == '!') &&
3760: (NXT(2) == '[') && (NXT(3) == 'C') &&
3761: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3762: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3763: (NXT(8) == '[')) {
3764: SKIP(9);
1.29 daniel 3765: } else
1.45 daniel 3766: return;
1.40 daniel 3767: base = CUR_PTR;
3768: if (!IS_CHAR(CUR)) {
1.55 daniel 3769: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3770: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 3771: ctxt->wellFormed = 0;
1.45 daniel 3772: return;
1.3 veillard 3773: }
1.40 daniel 3774: r = NEXT;
3775: if (!IS_CHAR(CUR)) {
1.55 daniel 3776: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3777: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 3778: ctxt->wellFormed = 0;
1.45 daniel 3779: return;
1.3 veillard 3780: }
1.40 daniel 3781: s = NEXT;
3782: while (IS_CHAR(CUR) &&
3783: ((*r != ']') || (*s != ']') || (CUR != '>'))) {
3784: r++;s++;NEXT;
1.3 veillard 3785: }
1.40 daniel 3786: if (!IS_CHAR(CUR)) {
1.55 daniel 3787: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3788: ctxt->sax->error(ctxt->userData, "CData section not finished\n%.50s\n", base);
1.59 daniel 3789: ctxt->wellFormed = 0;
1.45 daniel 3790: return;
1.3 veillard 3791: }
1.16 daniel 3792:
1.45 daniel 3793: /*
3794: * Ok the segment [base CUR_PTR] is to be consumed as chars.
3795: */
3796: if (ctxt->sax != NULL) {
1.72 daniel 3797: if (areBlanks(ctxt, base, CUR_PTR - base)) {
3798: if (ctxt->sax->ignorableWhitespace != NULL)
1.74 daniel 3799: ctxt->sax->ignorableWhitespace(ctxt->userData, base,
1.72 daniel 3800: (CUR_PTR - base) - 2);
3801: } else {
3802: if (ctxt->sax->characters != NULL)
1.74 daniel 3803: ctxt->sax->characters(ctxt->userData, base, (CUR_PTR - base) - 2);
1.72 daniel 3804: }
1.45 daniel 3805: }
1.2 veillard 3806: }
3807:
1.50 daniel 3808: /**
3809: * xmlParseContent:
3810: * @ctxt: an XML parser context
3811: *
3812: * Parse a content:
1.2 veillard 3813: *
1.27 daniel 3814: * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1.2 veillard 3815: */
3816:
1.55 daniel 3817: void
3818: xmlParseContent(xmlParserCtxtPtr ctxt) {
1.40 daniel 3819: while ((CUR != '<') || (NXT(1) != '/')) {
3820: const CHAR *test = CUR_PTR;
1.27 daniel 3821:
3822: /*
3823: * First case : a Processing Instruction.
3824: */
1.40 daniel 3825: if ((CUR == '<') && (NXT(1) == '?')) {
1.27 daniel 3826: xmlParsePI(ctxt);
3827: }
1.72 daniel 3828:
1.27 daniel 3829: /*
3830: * Second case : a CDSection
3831: */
1.40 daniel 3832: else if ((CUR == '<') && (NXT(1) == '!') &&
3833: (NXT(2) == '[') && (NXT(3) == 'C') &&
3834: (NXT(4) == 'D') && (NXT(5) == 'A') &&
3835: (NXT(6) == 'T') && (NXT(7) == 'A') &&
3836: (NXT(8) == '[')) {
1.45 daniel 3837: xmlParseCDSect(ctxt);
1.27 daniel 3838: }
1.72 daniel 3839:
1.27 daniel 3840: /*
3841: * Third case : a comment
3842: */
1.40 daniel 3843: else if ((CUR == '<') && (NXT(1) == '!') &&
3844: (NXT(2) == '-') && (NXT(3) == '-')) {
1.72 daniel 3845: xmlParseComment(ctxt, 1);
1.27 daniel 3846: }
1.72 daniel 3847:
1.27 daniel 3848: /*
3849: * Fourth case : a sub-element.
3850: */
1.40 daniel 3851: else if (CUR == '<') {
1.72 daniel 3852: xmlParseElement(ctxt);
1.45 daniel 3853: }
1.72 daniel 3854:
1.45 daniel 3855: /*
1.50 daniel 3856: * Fifth case : a reference. If if has not been resolved,
3857: * parsing returns it's Name, create the node
1.45 daniel 3858: */
3859: else if (CUR == '&') {
1.77 daniel 3860: xmlParseReference(ctxt);
1.27 daniel 3861: }
1.72 daniel 3862:
1.27 daniel 3863: /*
3864: * Last case, text. Note that References are handled directly.
3865: */
3866: else {
1.45 daniel 3867: xmlParseCharData(ctxt, 0);
1.3 veillard 3868: }
1.14 veillard 3869:
3870: /*
1.45 daniel 3871: * Pop-up of finished entities.
1.14 veillard 3872: */
1.69 daniel 3873: while ((CUR == 0) && (ctxt->inputNr > 1))
3874: xmlPopInput(ctxt);
1.45 daniel 3875:
1.40 daniel 3876: if (test == CUR_PTR) {
1.55 daniel 3877: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3878: ctxt->sax->error(ctxt->userData,
1.59 daniel 3879: "detected an error in element content\n");
3880: ctxt->wellFormed = 0;
1.29 daniel 3881: break;
3882: }
1.3 veillard 3883: }
1.2 veillard 3884: }
3885:
1.50 daniel 3886: /**
3887: * xmlParseElement:
3888: * @ctxt: an XML parser context
3889: *
3890: * parse an XML element, this is highly recursive
1.26 daniel 3891: *
3892: * [39] element ::= EmptyElemTag | STag content ETag
3893: *
3894: * [41] Attribute ::= Name Eq AttValue
1.2 veillard 3895: */
1.26 daniel 3896:
1.72 daniel 3897: void
1.69 daniel 3898: xmlParseElement(xmlParserCtxtPtr ctxt) {
1.40 daniel 3899: const CHAR *openTag = CUR_PTR;
1.83 daniel 3900: CHAR *name;
1.32 daniel 3901: xmlParserNodeInfo node_info;
1.2 veillard 3902:
1.32 daniel 3903: /* Capture start position */
1.40 daniel 3904: node_info.begin_pos = CUR_PTR - ctxt->input->base;
3905: node_info.begin_line = ctxt->input->line;
1.32 daniel 3906:
1.83 daniel 3907: name = xmlParseStartTag(ctxt);
3908: if (name == NULL) {
3909: return;
3910: }
1.2 veillard 3911:
3912: /*
3913: * Check for an Empty Element.
3914: */
1.40 daniel 3915: if ((CUR == '/') && (NXT(1) == '>')) {
3916: SKIP(2);
1.72 daniel 3917: if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
1.83 daniel 3918: ctxt->sax->endElement(ctxt->userData, name);
3919: free(name);
1.72 daniel 3920: return;
1.2 veillard 3921: }
1.40 daniel 3922: if (CUR == '>') NEXT;
1.2 veillard 3923: else {
1.55 daniel 3924: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3925: ctxt->sax->error(ctxt->userData, "Couldn't find end of Start Tag\n%.30s\n",
1.57 daniel 3926: openTag);
1.59 daniel 3927: ctxt->wellFormed = 0;
1.45 daniel 3928:
3929: /*
3930: * end of parsing of this node.
3931: */
3932: nodePop(ctxt);
1.83 daniel 3933: free(name);
1.72 daniel 3934: return;
1.2 veillard 3935: }
3936:
3937: /*
3938: * Parse the content of the element:
3939: */
1.45 daniel 3940: xmlParseContent(ctxt);
1.40 daniel 3941: if (!IS_CHAR(CUR)) {
1.55 daniel 3942: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 3943: ctxt->sax->error(ctxt->userData,
1.57 daniel 3944: "Premature end of data in tag %.30s\n", openTag);
1.59 daniel 3945: ctxt->wellFormed = 0;
1.45 daniel 3946:
3947: /*
3948: * end of parsing of this node.
3949: */
3950: nodePop(ctxt);
1.83 daniel 3951: free(name);
1.72 daniel 3952: return;
1.2 veillard 3953: }
3954:
3955: /*
1.27 daniel 3956: * parse the end of tag: '</' should be here.
1.2 veillard 3957: */
1.83 daniel 3958: xmlParseEndTag(ctxt, name);
3959: free(name);
1.2 veillard 3960: }
3961:
1.50 daniel 3962: /**
3963: * xmlParseVersionNum:
3964: * @ctxt: an XML parser context
3965: *
3966: * parse the XML version value.
1.29 daniel 3967: *
3968: * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
1.68 daniel 3969: *
3970: * Returns the string giving the XML version number, or NULL
1.29 daniel 3971: */
1.55 daniel 3972: CHAR *
3973: xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
1.40 daniel 3974: const CHAR *q = CUR_PTR;
1.29 daniel 3975: CHAR *ret;
3976:
1.40 daniel 3977: while (IS_CHAR(CUR) &&
3978: (((CUR >= 'a') && (CUR <= 'z')) ||
3979: ((CUR >= 'A') && (CUR <= 'Z')) ||
3980: ((CUR >= '0') && (CUR <= '9')) ||
3981: (CUR == '_') || (CUR == '.') ||
3982: (CUR == ':') || (CUR == '-'))) NEXT;
3983: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 3984: return(ret);
3985: }
3986:
1.50 daniel 3987: /**
3988: * xmlParseVersionInfo:
3989: * @ctxt: an XML parser context
3990: *
3991: * parse the XML version.
1.29 daniel 3992: *
3993: * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
3994: *
3995: * [25] Eq ::= S? '=' S?
1.50 daniel 3996: *
1.68 daniel 3997: * Returns the version string, e.g. "1.0"
1.29 daniel 3998: */
3999:
1.55 daniel 4000: CHAR *
4001: xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
1.29 daniel 4002: CHAR *version = NULL;
4003: const CHAR *q;
4004:
1.40 daniel 4005: if ((CUR == 'v') && (NXT(1) == 'e') &&
4006: (NXT(2) == 'r') && (NXT(3) == 's') &&
4007: (NXT(4) == 'i') && (NXT(5) == 'o') &&
4008: (NXT(6) == 'n')) {
4009: SKIP(7);
1.42 daniel 4010: SKIP_BLANKS;
1.40 daniel 4011: if (CUR != '=') {
1.55 daniel 4012: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4013: ctxt->sax->error(ctxt->userData, "xmlParseVersionInfo : expected '='\n");
1.59 daniel 4014: ctxt->wellFormed = 0;
1.31 daniel 4015: return(NULL);
4016: }
1.40 daniel 4017: NEXT;
1.42 daniel 4018: SKIP_BLANKS;
1.40 daniel 4019: if (CUR == '"') {
4020: NEXT;
4021: q = CUR_PTR;
1.29 daniel 4022: version = xmlParseVersionNum(ctxt);
1.55 daniel 4023: if (CUR != '"') {
4024: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4025: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4026: ctxt->wellFormed = 0;
1.55 daniel 4027: } else
1.40 daniel 4028: NEXT;
4029: } else if (CUR == '\''){
4030: NEXT;
4031: q = CUR_PTR;
1.29 daniel 4032: version = xmlParseVersionNum(ctxt);
1.55 daniel 4033: if (CUR != '\'') {
4034: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4035: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4036: ctxt->wellFormed = 0;
1.55 daniel 4037: } else
1.40 daniel 4038: NEXT;
1.31 daniel 4039: } else {
1.55 daniel 4040: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4041: ctxt->sax->error(ctxt->userData,
1.59 daniel 4042: "xmlParseVersionInfo : expected ' or \"\n");
4043: ctxt->wellFormed = 0;
1.29 daniel 4044: }
4045: }
4046: return(version);
4047: }
4048:
1.50 daniel 4049: /**
4050: * xmlParseEncName:
4051: * @ctxt: an XML parser context
4052: *
4053: * parse the XML encoding name
1.29 daniel 4054: *
4055: * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
1.50 daniel 4056: *
1.68 daniel 4057: * Returns the encoding name value or NULL
1.29 daniel 4058: */
1.55 daniel 4059: CHAR *
4060: xmlParseEncName(xmlParserCtxtPtr ctxt) {
1.40 daniel 4061: const CHAR *q = CUR_PTR;
1.29 daniel 4062: CHAR *ret = NULL;
4063:
1.40 daniel 4064: if (((CUR >= 'a') && (CUR <= 'z')) ||
4065: ((CUR >= 'A') && (CUR <= 'Z'))) {
4066: NEXT;
4067: while (IS_CHAR(CUR) &&
4068: (((CUR >= 'a') && (CUR <= 'z')) ||
4069: ((CUR >= 'A') && (CUR <= 'Z')) ||
4070: ((CUR >= '0') && (CUR <= '9')) ||
4071: (CUR == '-'))) NEXT;
4072: ret = xmlStrndup(q, CUR_PTR - q);
1.29 daniel 4073: } else {
1.55 daniel 4074: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4075: ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
1.59 daniel 4076: ctxt->wellFormed = 0;
1.29 daniel 4077: }
4078: return(ret);
4079: }
4080:
1.50 daniel 4081: /**
4082: * xmlParseEncodingDecl:
4083: * @ctxt: an XML parser context
4084: *
4085: * parse the XML encoding declaration
1.29 daniel 4086: *
4087: * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
1.50 daniel 4088: *
4089: * TODO: this should setup the conversion filters.
4090: *
1.68 daniel 4091: * Returns the encoding value or NULL
1.29 daniel 4092: */
4093:
1.55 daniel 4094: CHAR *
4095: xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 4096: CHAR *encoding = NULL;
4097: const CHAR *q;
4098:
1.42 daniel 4099: SKIP_BLANKS;
1.40 daniel 4100: if ((CUR == 'e') && (NXT(1) == 'n') &&
4101: (NXT(2) == 'c') && (NXT(3) == 'o') &&
4102: (NXT(4) == 'd') && (NXT(5) == 'i') &&
4103: (NXT(6) == 'n') && (NXT(7) == 'g')) {
4104: SKIP(8);
1.42 daniel 4105: SKIP_BLANKS;
1.40 daniel 4106: if (CUR != '=') {
1.55 daniel 4107: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4108: ctxt->sax->error(ctxt->userData, "xmlParseEncodingDecl : expected '='\n");
1.59 daniel 4109: ctxt->wellFormed = 0;
1.31 daniel 4110: return(NULL);
4111: }
1.40 daniel 4112: NEXT;
1.42 daniel 4113: SKIP_BLANKS;
1.40 daniel 4114: if (CUR == '"') {
4115: NEXT;
4116: q = CUR_PTR;
1.29 daniel 4117: encoding = xmlParseEncName(ctxt);
1.55 daniel 4118: if (CUR != '"') {
4119: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4120: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4121: ctxt->wellFormed = 0;
1.55 daniel 4122: } else
1.40 daniel 4123: NEXT;
4124: } else if (CUR == '\''){
4125: NEXT;
4126: q = CUR_PTR;
1.29 daniel 4127: encoding = xmlParseEncName(ctxt);
1.55 daniel 4128: if (CUR != '\'') {
4129: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4130: ctxt->sax->error(ctxt->userData, "String not closed\n%.50s\n", q);
1.59 daniel 4131: ctxt->wellFormed = 0;
1.55 daniel 4132: } else
1.40 daniel 4133: NEXT;
4134: } else if (CUR == '"'){
1.55 daniel 4135: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4136: ctxt->sax->error(ctxt->userData,
1.59 daniel 4137: "xmlParseEncodingDecl : expected ' or \"\n");
4138: ctxt->wellFormed = 0;
1.29 daniel 4139: }
4140: }
4141: return(encoding);
4142: }
4143:
1.50 daniel 4144: /**
4145: * xmlParseSDDecl:
4146: * @ctxt: an XML parser context
4147: *
4148: * parse the XML standalone declaration
1.29 daniel 4149: *
4150: * [32] SDDecl ::= S 'standalone' Eq
4151: * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
1.68 daniel 4152: *
4153: * Returns 1 if standalone, 0 otherwise
1.29 daniel 4154: */
4155:
1.55 daniel 4156: int
4157: xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
1.29 daniel 4158: int standalone = -1;
4159:
1.42 daniel 4160: SKIP_BLANKS;
1.40 daniel 4161: if ((CUR == 's') && (NXT(1) == 't') &&
4162: (NXT(2) == 'a') && (NXT(3) == 'n') &&
4163: (NXT(4) == 'd') && (NXT(5) == 'a') &&
4164: (NXT(6) == 'l') && (NXT(7) == 'o') &&
4165: (NXT(8) == 'n') && (NXT(9) == 'e')) {
4166: SKIP(10);
1.81 daniel 4167: SKIP_BLANKS;
1.40 daniel 4168: if (CUR != '=') {
1.55 daniel 4169: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4170: ctxt->sax->error(ctxt->userData,
1.59 daniel 4171: "XML standalone declaration : expected '='\n");
4172: ctxt->wellFormed = 0;
1.32 daniel 4173: return(standalone);
4174: }
1.40 daniel 4175: NEXT;
1.42 daniel 4176: SKIP_BLANKS;
1.40 daniel 4177: if (CUR == '\''){
4178: NEXT;
4179: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 4180: standalone = 0;
1.40 daniel 4181: SKIP(2);
4182: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
4183: (NXT(2) == 's')) {
1.29 daniel 4184: standalone = 1;
1.40 daniel 4185: SKIP(3);
1.29 daniel 4186: } else {
1.55 daniel 4187: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4188: ctxt->sax->error(ctxt->userData, "standalone accepts only 'yes' or 'no'\n");
1.59 daniel 4189: ctxt->wellFormed = 0;
1.29 daniel 4190: }
1.55 daniel 4191: if (CUR != '\'') {
4192: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4193: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 4194: ctxt->wellFormed = 0;
1.55 daniel 4195: } else
1.40 daniel 4196: NEXT;
4197: } else if (CUR == '"'){
4198: NEXT;
4199: if ((CUR == 'n') && (NXT(1) == 'o')) {
1.29 daniel 4200: standalone = 0;
1.40 daniel 4201: SKIP(2);
4202: } else if ((CUR == 'y') && (NXT(1) == 'e') &&
4203: (NXT(2) == 's')) {
1.29 daniel 4204: standalone = 1;
1.40 daniel 4205: SKIP(3);
1.29 daniel 4206: } else {
1.55 daniel 4207: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4208: ctxt->sax->error(ctxt->userData,
1.59 daniel 4209: "standalone accepts only 'yes' or 'no'\n");
4210: ctxt->wellFormed = 0;
1.29 daniel 4211: }
1.55 daniel 4212: if (CUR != '"') {
4213: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4214: ctxt->sax->error(ctxt->userData, "String not closed\n");
1.59 daniel 4215: ctxt->wellFormed = 0;
1.55 daniel 4216: } else
1.40 daniel 4217: NEXT;
1.37 daniel 4218: } else {
1.55 daniel 4219: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4220: ctxt->sax->error(ctxt->userData, "Standalone value not found\n");
1.59 daniel 4221: ctxt->wellFormed = 0;
1.37 daniel 4222: }
1.29 daniel 4223: }
4224: return(standalone);
4225: }
4226:
1.50 daniel 4227: /**
4228: * xmlParseXMLDecl:
4229: * @ctxt: an XML parser context
4230: *
4231: * parse an XML declaration header
1.29 daniel 4232: *
4233: * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1.1 veillard 4234: */
4235:
1.55 daniel 4236: void
4237: xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
1.1 veillard 4238: CHAR *version;
4239:
4240: /*
1.19 daniel 4241: * We know that '<?xml' is here.
1.1 veillard 4242: */
1.40 daniel 4243: SKIP(5);
1.1 veillard 4244:
1.59 daniel 4245: if (!IS_BLANK(CUR)) {
4246: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4247: ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
1.59 daniel 4248: ctxt->wellFormed = 0;
4249: }
1.42 daniel 4250: SKIP_BLANKS;
1.1 veillard 4251:
4252: /*
1.29 daniel 4253: * We should have the VersionInfo here.
1.1 veillard 4254: */
1.29 daniel 4255: version = xmlParseVersionInfo(ctxt);
4256: if (version == NULL)
1.45 daniel 4257: version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.72 daniel 4258: ctxt->version = xmlStrdup(version);
1.45 daniel 4259: free(version);
1.29 daniel 4260:
4261: /*
4262: * We may have the encoding declaration
4263: */
1.59 daniel 4264: if (!IS_BLANK(CUR)) {
4265: if ((CUR == '?') && (NXT(1) == '>')) {
4266: SKIP(2);
4267: return;
4268: }
4269: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4270: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 4271: ctxt->wellFormed = 0;
4272: }
1.72 daniel 4273: ctxt->encoding = xmlParseEncodingDecl(ctxt);
1.1 veillard 4274:
4275: /*
1.29 daniel 4276: * We may have the standalone status.
1.1 veillard 4277: */
1.72 daniel 4278: if ((ctxt->encoding != NULL) && (!IS_BLANK(CUR))) {
1.59 daniel 4279: if ((CUR == '?') && (NXT(1) == '>')) {
4280: SKIP(2);
4281: return;
4282: }
4283: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4284: ctxt->sax->error(ctxt->userData, "Blank needed here\n");
1.59 daniel 4285: ctxt->wellFormed = 0;
4286: }
4287: SKIP_BLANKS;
1.72 daniel 4288: ctxt->standalone = xmlParseSDDecl(ctxt);
1.1 veillard 4289:
1.42 daniel 4290: SKIP_BLANKS;
1.40 daniel 4291: if ((CUR == '?') && (NXT(1) == '>')) {
4292: SKIP(2);
4293: } else if (CUR == '>') {
1.31 daniel 4294: /* Deprecated old WD ... */
1.55 daniel 4295: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4296: ctxt->sax->error(ctxt->userData, "XML declaration must end-up with '?>'\n");
1.59 daniel 4297: ctxt->wellFormed = 0;
1.40 daniel 4298: NEXT;
1.29 daniel 4299: } else {
1.55 daniel 4300: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4301: ctxt->sax->error(ctxt->userData, "parsing XML declaration: '?>' expected\n");
1.59 daniel 4302: ctxt->wellFormed = 0;
1.40 daniel 4303: MOVETO_ENDTAG(CUR_PTR);
4304: NEXT;
1.29 daniel 4305: }
1.1 veillard 4306: }
4307:
1.50 daniel 4308: /**
4309: * xmlParseMisc:
4310: * @ctxt: an XML parser context
4311: *
4312: * parse an XML Misc* optionnal field.
1.21 daniel 4313: *
1.22 daniel 4314: * [27] Misc ::= Comment | PI | S
1.1 veillard 4315: */
4316:
1.55 daniel 4317: void
4318: xmlParseMisc(xmlParserCtxtPtr ctxt) {
1.40 daniel 4319: while (((CUR == '<') && (NXT(1) == '?')) ||
4320: ((CUR == '<') && (NXT(1) == '!') &&
4321: (NXT(2) == '-') && (NXT(3) == '-')) ||
4322: IS_BLANK(CUR)) {
4323: if ((CUR == '<') && (NXT(1) == '?')) {
1.16 daniel 4324: xmlParsePI(ctxt);
1.40 daniel 4325: } else if (IS_BLANK(CUR)) {
4326: NEXT;
1.1 veillard 4327: } else
1.31 daniel 4328: xmlParseComment(ctxt, 0);
1.1 veillard 4329: }
4330: }
4331:
1.50 daniel 4332: /**
4333: * xmlParseDocument :
4334: * @ctxt: an XML parser context
4335: *
4336: * parse an XML document (and build a tree if using the standard SAX
4337: * interface).
1.21 daniel 4338: *
1.22 daniel 4339: * [1] document ::= prolog element Misc*
1.29 daniel 4340: *
4341: * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1.50 daniel 4342: *
1.68 daniel 4343: * Returns 0, -1 in case of error. the parser context is augmented
1.50 daniel 4344: * as a result of the parsing.
1.1 veillard 4345: */
4346:
1.55 daniel 4347: int
4348: xmlParseDocument(xmlParserCtxtPtr ctxt) {
1.45 daniel 4349: xmlDefaultSAXHandlerInit();
4350:
1.14 veillard 4351: /*
1.44 daniel 4352: * SAX: beginning of the document processing.
4353: */
1.72 daniel 4354: if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
1.74 daniel 4355: ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
1.44 daniel 4356:
4357: /*
1.14 veillard 4358: * We should check for encoding here and plug-in some
4359: * conversion code TODO !!!!
4360: */
1.1 veillard 4361:
4362: /*
4363: * Wipe out everything which is before the first '<'
4364: */
1.59 daniel 4365: if (IS_BLANK(CUR)) {
4366: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4367: ctxt->sax->error(ctxt->userData,
1.59 daniel 4368: "Extra spaces at the beginning of the document are not allowed\n");
4369: ctxt->wellFormed = 0;
4370: SKIP_BLANKS;
4371: }
4372:
4373: if (CUR == 0) {
4374: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4375: ctxt->sax->error(ctxt->userData, "Document is empty\n");
1.59 daniel 4376: ctxt->wellFormed = 0;
4377: }
1.1 veillard 4378:
4379: /*
4380: * Check for the XMLDecl in the Prolog.
4381: */
1.40 daniel 4382: if ((CUR == '<') && (NXT(1) == '?') &&
4383: (NXT(2) == 'x') && (NXT(3) == 'm') &&
4384: (NXT(4) == 'l')) {
1.19 daniel 4385: xmlParseXMLDecl(ctxt);
4386: /* SKIP_EOL(cur); */
1.42 daniel 4387: SKIP_BLANKS;
1.40 daniel 4388: } else if ((CUR == '<') && (NXT(1) == '?') &&
4389: (NXT(2) == 'X') && (NXT(3) == 'M') &&
4390: (NXT(4) == 'L')) {
1.19 daniel 4391: /*
4392: * The first drafts were using <?XML and the final W3C REC
4393: * now use <?xml ...
4394: */
1.16 daniel 4395: xmlParseXMLDecl(ctxt);
1.1 veillard 4396: /* SKIP_EOL(cur); */
1.42 daniel 4397: SKIP_BLANKS;
1.1 veillard 4398: } else {
1.72 daniel 4399: ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
1.1 veillard 4400: }
1.72 daniel 4401: if ((ctxt->sax) && (ctxt->sax->startDocument))
1.74 daniel 4402: ctxt->sax->startDocument(ctxt->userData);
1.1 veillard 4403:
4404: /*
4405: * The Misc part of the Prolog
4406: */
1.16 daniel 4407: xmlParseMisc(ctxt);
1.1 veillard 4408:
4409: /*
1.29 daniel 4410: * Then possibly doc type declaration(s) and more Misc
1.21 daniel 4411: * (doctypedecl Misc*)?
4412: */
1.40 daniel 4413: if ((CUR == '<') && (NXT(1) == '!') &&
4414: (NXT(2) == 'D') && (NXT(3) == 'O') &&
4415: (NXT(4) == 'C') && (NXT(5) == 'T') &&
4416: (NXT(6) == 'Y') && (NXT(7) == 'P') &&
4417: (NXT(8) == 'E')) {
1.22 daniel 4418: xmlParseDocTypeDecl(ctxt);
4419: xmlParseMisc(ctxt);
1.21 daniel 4420: }
4421:
4422: /*
4423: * Time to start parsing the tree itself
1.1 veillard 4424: */
1.72 daniel 4425: xmlParseElement(ctxt);
1.33 daniel 4426:
4427: /*
4428: * The Misc part at the end
4429: */
4430: xmlParseMisc(ctxt);
1.16 daniel 4431:
1.59 daniel 4432: if (CUR != 0) {
4433: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 4434: ctxt->sax->error(ctxt->userData,
1.59 daniel 4435: "Extra content at the end of the document\n");
4436: ctxt->wellFormed = 0;
4437: }
4438:
1.44 daniel 4439: /*
4440: * SAX: end of the document processing.
4441: */
1.72 daniel 4442: if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
1.74 daniel 4443: ctxt->sax->endDocument(ctxt->userData);
1.59 daniel 4444: if (! ctxt->wellFormed) return(-1);
1.16 daniel 4445: return(0);
4446: }
4447:
1.50 daniel 4448: /**
1.86 daniel 4449: * xmlCreateDocParserCtxt :
1.50 daniel 4450: * @cur: a pointer to an array of CHAR
4451: *
1.69 daniel 4452: * Create a parser context for an XML in-memory document.
4453: *
4454: * Returns the new parser context or NULL
1.16 daniel 4455: */
1.69 daniel 4456: xmlParserCtxtPtr
4457: xmlCreateDocParserCtxt(CHAR *cur) {
1.16 daniel 4458: xmlParserCtxtPtr ctxt;
1.40 daniel 4459: xmlParserInputPtr input;
1.75 daniel 4460: xmlCharEncoding enc;
1.16 daniel 4461:
4462: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4463: if (ctxt == NULL) {
4464: perror("malloc");
4465: return(NULL);
4466: }
1.40 daniel 4467: xmlInitParserCtxt(ctxt);
4468: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4469: if (input == NULL) {
4470: perror("malloc");
4471: free(ctxt);
4472: return(NULL);
4473: }
4474:
1.75 daniel 4475: /*
4476: * plug some encoding conversion routines here. !!!
4477: */
4478: enc = xmlDetectCharEncoding(cur);
4479: xmlSwitchEncoding(ctxt, enc);
4480:
1.40 daniel 4481: input->filename = NULL;
4482: input->line = 1;
4483: input->col = 1;
4484: input->base = cur;
4485: input->cur = cur;
1.69 daniel 4486: input->free = NULL;
1.40 daniel 4487:
4488: inputPush(ctxt, input);
1.69 daniel 4489: return(ctxt);
4490: }
4491:
4492: /**
4493: * xmlSAXParseDoc :
4494: * @sax: the SAX handler block
4495: * @cur: a pointer to an array of CHAR
4496: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4497: * documents
4498: *
4499: * parse an XML in-memory document and build a tree.
4500: * It use the given SAX function block to handle the parsing callback.
4501: * If sax is NULL, fallback to the default DOM tree building routines.
4502: *
4503: * Returns the resulting document tree
4504: */
4505:
4506: xmlDocPtr
4507: xmlSAXParseDoc(xmlSAXHandlerPtr sax, CHAR *cur, int recovery) {
4508: xmlDocPtr ret;
4509: xmlParserCtxtPtr ctxt;
4510:
4511: if (cur == NULL) return(NULL);
1.16 daniel 4512:
4513:
1.69 daniel 4514: ctxt = xmlCreateDocParserCtxt(cur);
4515: if (ctxt == NULL) return(NULL);
1.74 daniel 4516: if (sax != NULL) {
4517: ctxt->sax = sax;
4518: ctxt->userData = NULL;
4519: }
1.69 daniel 4520:
1.16 daniel 4521: xmlParseDocument(ctxt);
1.72 daniel 4522: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 4523: else {
4524: ret = NULL;
1.72 daniel 4525: xmlFreeDoc(ctxt->myDoc);
4526: ctxt->myDoc = NULL;
1.59 daniel 4527: }
1.86 daniel 4528: if (sax != NULL)
4529: ctxt->sax = NULL;
1.69 daniel 4530: xmlFreeParserCtxt(ctxt);
1.16 daniel 4531:
1.1 veillard 4532: return(ret);
4533: }
4534:
1.50 daniel 4535: /**
1.55 daniel 4536: * xmlParseDoc :
4537: * @cur: a pointer to an array of CHAR
4538: *
4539: * parse an XML in-memory document and build a tree.
4540: *
1.68 daniel 4541: * Returns the resulting document tree
1.55 daniel 4542: */
4543:
1.69 daniel 4544: xmlDocPtr
4545: xmlParseDoc(CHAR *cur) {
1.59 daniel 4546: return(xmlSAXParseDoc(NULL, cur, 0));
1.76 daniel 4547: }
4548:
4549: /**
4550: * xmlSAXParseDTD :
4551: * @sax: the SAX handler block
4552: * @ExternalID: a NAME* containing the External ID of the DTD
4553: * @SystemID: a NAME* containing the URL to the DTD
4554: *
4555: * Load and parse an external subset.
4556: *
4557: * Returns the resulting xmlDtdPtr or NULL in case of error.
4558: */
4559:
4560: xmlDtdPtr
4561: xmlSAXParseDTD(xmlSAXHandlerPtr sax, const CHAR *ExternalID,
4562: const CHAR *SystemID) {
4563: xmlDtdPtr ret = NULL;
4564: xmlParserCtxtPtr ctxt;
1.83 daniel 4565: xmlParserInputPtr input = NULL;
1.76 daniel 4566: xmlCharEncoding enc;
4567:
4568: if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
4569:
4570: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4571: if (ctxt == NULL) {
4572: perror("malloc");
4573: return(NULL);
4574: }
4575: xmlInitParserCtxt(ctxt);
4576:
4577: /*
4578: * Set-up the SAX context
4579: */
4580: if (ctxt == NULL) return(NULL);
4581: if (sax != NULL) {
4582: ctxt->sax = sax;
4583: ctxt->userData = NULL;
4584: }
4585:
4586: /*
4587: * Ask the Entity resolver to load the damn thing
4588: */
4589:
4590: if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
4591: input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
4592: if (input == NULL) {
1.86 daniel 4593: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 4594: xmlFreeParserCtxt(ctxt);
4595: return(NULL);
4596: }
4597:
4598: /*
4599: * plug some encoding conversion routines here. !!!
4600: */
4601: xmlPushInput(ctxt, input);
4602: enc = xmlDetectCharEncoding(ctxt->input->cur);
4603: xmlSwitchEncoding(ctxt, enc);
4604:
4605: input->filename = xmlStrdup(SystemID);
4606: input->line = 1;
4607: input->col = 1;
4608: input->base = ctxt->input->cur;
4609: input->cur = ctxt->input->cur;
4610: input->free = NULL;
4611:
4612: /*
4613: * let's parse that entity knowing it's an external subset.
4614: */
1.79 daniel 4615: xmlParseExternalSubset(ctxt, ExternalID, SystemID);
1.76 daniel 4616:
4617: if (ctxt->myDoc != NULL) {
4618: if (ctxt->wellFormed) {
4619: ret = ctxt->myDoc->intSubset;
4620: ctxt->myDoc->intSubset = NULL;
4621: } else {
4622: ret = NULL;
4623: }
4624: xmlFreeDoc(ctxt->myDoc);
4625: ctxt->myDoc = NULL;
4626: }
1.86 daniel 4627: if (sax != NULL) ctxt->sax = NULL;
1.76 daniel 4628: xmlFreeParserCtxt(ctxt);
4629:
4630: return(ret);
4631: }
4632:
4633: /**
4634: * xmlParseDTD :
4635: * @ExternalID: a NAME* containing the External ID of the DTD
4636: * @SystemID: a NAME* containing the URL to the DTD
4637: *
4638: * Load and parse an external subset.
4639: *
4640: * Returns the resulting xmlDtdPtr or NULL in case of error.
4641: */
4642:
4643: xmlDtdPtr
4644: xmlParseDTD(const CHAR *ExternalID, const CHAR *SystemID) {
4645: return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
1.59 daniel 4646: }
4647:
4648: /**
4649: * xmlRecoverDoc :
4650: * @cur: a pointer to an array of CHAR
4651: *
4652: * parse an XML in-memory document and build a tree.
4653: * In the case the document is not Well Formed, a tree is built anyway
4654: *
1.68 daniel 4655: * Returns the resulting document tree
1.59 daniel 4656: */
4657:
1.69 daniel 4658: xmlDocPtr
4659: xmlRecoverDoc(CHAR *cur) {
1.59 daniel 4660: return(xmlSAXParseDoc(NULL, cur, 1));
1.55 daniel 4661: }
4662:
4663: /**
1.69 daniel 4664: * xmlCreateFileParserCtxt :
1.50 daniel 4665: * @filename: the filename
4666: *
1.69 daniel 4667: * Create a parser context for a file content.
4668: * Automatic support for ZLIB/Compress compressed document is provided
4669: * by default if found at compile-time.
1.50 daniel 4670: *
1.69 daniel 4671: * Returns the new parser context or NULL
1.9 httpng 4672: */
1.69 daniel 4673: xmlParserCtxtPtr
4674: xmlCreateFileParserCtxt(const char *filename)
4675: {
4676: xmlParserCtxtPtr ctxt;
1.20 daniel 4677: #ifdef HAVE_ZLIB_H
4678: gzFile input;
4679: #else
1.9 httpng 4680: int input;
1.20 daniel 4681: #endif
1.9 httpng 4682: int res;
1.55 daniel 4683: int len;
1.86 daniel 4684: int cnt;
1.9 httpng 4685: struct stat buf;
1.86 daniel 4686: char *buffer, *nbuf;
1.40 daniel 4687: xmlParserInputPtr inputStream;
1.75 daniel 4688: xmlCharEncoding enc;
1.9 httpng 4689:
1.86 daniel 4690: #define MINLEN 40000
1.9 httpng 4691:
1.86 daniel 4692: if (strcmp(filename,"-") == 0) {
1.20 daniel 4693: #ifdef HAVE_ZLIB_H
1.86 daniel 4694: input = gzdopen (fileno(stdin), "r");
4695: if (input == NULL) {
4696: fprintf (stderr, "Cannot read from stdin\n");
4697: perror ("gzdopen failed");
4698: return(NULL);
4699: }
1.20 daniel 4700: #else
1.86 daniel 4701: #ifdef WIN32
4702: input = -1;
4703: #else
4704: input = fileno(stdin);
1.20 daniel 4705: #endif
1.86 daniel 4706: if (input < 0) {
4707: fprintf (stderr, "Cannot read from stdin\n");
4708: perror ("open failed");
1.9 httpng 4709: return(NULL);
4710: }
1.86 daniel 4711: #endif
4712: len = MINLEN;
4713: } else {
1.20 daniel 4714: #ifdef HAVE_ZLIB_H
4715: input = gzopen (filename, "r");
4716: if (input == NULL) {
4717: fprintf (stderr, "Cannot read file %s :\n", filename);
4718: perror ("gzopen failed");
4719: return(NULL);
4720: }
4721: #else
1.72 daniel 4722: #ifdef WIN32
4723: input = _open (filename, O_RDONLY | _O_BINARY);
4724: #else
1.9 httpng 4725: input = open (filename, O_RDONLY);
1.72 daniel 4726: #endif
1.9 httpng 4727: if (input < 0) {
4728: fprintf (stderr, "Cannot read file %s :\n", filename);
4729: perror ("open failed");
4730: return(NULL);
4731: }
1.20 daniel 4732: #endif
1.86 daniel 4733: res = stat(filename, &buf);
4734: if (res < 0)
4735: return(NULL);
1.87 daniel 4736: len = buf.st_size;
1.86 daniel 4737: if (len < MINLEN)
4738: len = MINLEN;
4739: }
1.87 daniel 4740: buffer = (char *)malloc((len+1)*sizeof(char));
1.86 daniel 4741: if (buffer == NULL) {
4742: fprintf (stderr, "Cannot malloc\n");
4743: perror ("malloc failed");
4744: return(NULL);
4745: }
4746:
4747: cnt = 0;
4748: while(1) {
4749: if (cnt == len) {
4750: len *= 2;
1.87 daniel 4751: nbuf = (char *)realloc(buffer,(len+1)*sizeof(char));
1.86 daniel 4752: if (nbuf == NULL) {
4753: fprintf(stderr,"Cannot realloc\n");
4754: free(buffer);
4755: perror ("realloc failed");
4756: return(NULL);
4757: }
4758: buffer = nbuf;
4759: }
1.20 daniel 4760: #ifdef HAVE_ZLIB_H
1.86 daniel 4761: res = gzread(input, &buffer[cnt], len-cnt);
1.20 daniel 4762: #else
1.86 daniel 4763: res = read(input, &buffer[cnt], len-cnt);
1.20 daniel 4764: #endif
1.9 httpng 4765: if (res < 0) {
4766: fprintf (stderr, "Cannot read file %s :\n", filename);
1.20 daniel 4767: #ifdef HAVE_ZLIB_H
4768: perror ("gzread failed");
4769: #else
1.9 httpng 4770: perror ("read failed");
1.20 daniel 4771: #endif
1.9 httpng 4772: return(NULL);
4773: }
1.86 daniel 4774: if (res == 0)
4775: break;
4776: cnt += res;
4777: }
1.20 daniel 4778: #ifdef HAVE_ZLIB_H
4779: gzclose(input);
4780: #else
1.9 httpng 4781: close(input);
1.20 daniel 4782: #endif
4783:
1.86 daniel 4784: buffer[cnt] = '\0';
1.9 httpng 4785:
1.16 daniel 4786: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4787: if (ctxt == NULL) {
4788: perror("malloc");
4789: return(NULL);
4790: }
1.40 daniel 4791: xmlInitParserCtxt(ctxt);
4792: inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4793: if (inputStream == NULL) {
4794: perror("malloc");
4795: free(ctxt);
4796: return(NULL);
4797: }
4798:
4799: inputStream->filename = strdup(filename);
4800: inputStream->line = 1;
4801: inputStream->col = 1;
1.45 daniel 4802:
4803: /*
1.75 daniel 4804: * plug some encoding conversion routines here. !!!
1.45 daniel 4805: */
1.75 daniel 4806: enc = xmlDetectCharEncoding(buffer);
4807: xmlSwitchEncoding(ctxt, enc);
4808:
1.40 daniel 4809: inputStream->base = buffer;
4810: inputStream->cur = buffer;
1.69 daniel 4811: inputStream->free = (xmlParserInputDeallocate) free;
1.16 daniel 4812:
1.40 daniel 4813: inputPush(ctxt, inputStream);
1.69 daniel 4814: return(ctxt);
4815: }
4816:
4817: /**
4818: * xmlSAXParseFile :
4819: * @sax: the SAX handler block
4820: * @filename: the filename
4821: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4822: * documents
4823: *
4824: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4825: * compressed document is provided by default if found at compile-time.
4826: * It use the given SAX function block to handle the parsing callback.
4827: * If sax is NULL, fallback to the default DOM tree building routines.
4828: *
4829: * Returns the resulting document tree
4830: */
4831:
1.79 daniel 4832: xmlDocPtr
4833: xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
1.69 daniel 4834: int recovery) {
4835: xmlDocPtr ret;
4836: xmlParserCtxtPtr ctxt;
4837:
4838: ctxt = xmlCreateFileParserCtxt(filename);
4839: if (ctxt == NULL) return(NULL);
1.74 daniel 4840: if (sax != NULL) {
4841: ctxt->sax = sax;
4842: ctxt->userData = NULL;
4843: }
1.16 daniel 4844:
4845: xmlParseDocument(ctxt);
1.40 daniel 4846:
1.72 daniel 4847: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 4848: else {
4849: ret = NULL;
1.72 daniel 4850: xmlFreeDoc(ctxt->myDoc);
4851: ctxt->myDoc = NULL;
1.59 daniel 4852: }
1.86 daniel 4853: if (sax != NULL)
4854: ctxt->sax = NULL;
1.69 daniel 4855: xmlFreeParserCtxt(ctxt);
1.20 daniel 4856:
4857: return(ret);
4858: }
4859:
1.55 daniel 4860: /**
4861: * xmlParseFile :
4862: * @filename: the filename
4863: *
4864: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4865: * compressed document is provided by default if found at compile-time.
4866: *
1.68 daniel 4867: * Returns the resulting document tree
1.55 daniel 4868: */
4869:
1.79 daniel 4870: xmlDocPtr
4871: xmlParseFile(const char *filename) {
1.59 daniel 4872: return(xmlSAXParseFile(NULL, filename, 0));
4873: }
4874:
4875: /**
4876: * xmlRecoverFile :
4877: * @filename: the filename
4878: *
4879: * parse an XML file and build a tree. Automatic support for ZLIB/Compress
4880: * compressed document is provided by default if found at compile-time.
4881: * In the case the document is not Well Formed, a tree is built anyway
4882: *
1.68 daniel 4883: * Returns the resulting document tree
1.59 daniel 4884: */
4885:
1.79 daniel 4886: xmlDocPtr
4887: xmlRecoverFile(const char *filename) {
1.59 daniel 4888: return(xmlSAXParseFile(NULL, filename, 1));
1.55 daniel 4889: }
1.32 daniel 4890:
1.50 daniel 4891: /**
1.82 daniel 4892: * xmlSubstituteEntitiesDefault :
4893: * @val: int 0 or 1
1.79 daniel 4894: *
4895: * Set and return the previous value for default entity support.
4896: * Initially the parser always keep entity references instead of substituting
4897: * entity values in the output. This function has to be used to change the
4898: * default parser behaviour
4899: * SAX::subtituteEntities() has to be used for changing that on a file by
4900: * file basis.
4901: *
4902: * Returns the last value for 0 for no substitution, 1 for substitution.
4903: */
4904:
4905: int
4906: xmlSubstituteEntitiesDefault(int val) {
4907: int old = xmlSubstituteEntitiesDefaultValue;
4908:
4909: xmlSubstituteEntitiesDefaultValue = val;
4910: return(old);
4911: }
4912:
4913: /**
1.69 daniel 4914: * xmlCreateMemoryParserCtxt :
1.68 daniel 4915: * @buffer: an pointer to a char array
1.50 daniel 4916: * @size: the siwe of the array
4917: *
1.69 daniel 4918: * Create a parser context for an XML in-memory document.
1.50 daniel 4919: *
1.69 daniel 4920: * Returns the new parser context or NULL
1.20 daniel 4921: */
1.69 daniel 4922: xmlParserCtxtPtr
4923: xmlCreateMemoryParserCtxt(char *buffer, int size) {
1.20 daniel 4924: xmlParserCtxtPtr ctxt;
1.40 daniel 4925: xmlParserInputPtr input;
1.75 daniel 4926: xmlCharEncoding enc;
1.40 daniel 4927:
4928: buffer[size - 1] = '\0';
4929:
1.20 daniel 4930: ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
4931: if (ctxt == NULL) {
4932: perror("malloc");
4933: return(NULL);
4934: }
1.40 daniel 4935: xmlInitParserCtxt(ctxt);
4936: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
4937: if (input == NULL) {
4938: perror("malloc");
1.50 daniel 4939: free(ctxt->nodeTab);
4940: free(ctxt->inputTab);
1.40 daniel 4941: free(ctxt);
4942: return(NULL);
4943: }
1.20 daniel 4944:
1.40 daniel 4945: input->filename = NULL;
4946: input->line = 1;
4947: input->col = 1;
1.45 daniel 4948:
4949: /*
1.75 daniel 4950: * plug some encoding conversion routines here. !!!
1.45 daniel 4951: */
1.75 daniel 4952: enc = xmlDetectCharEncoding(buffer);
4953: xmlSwitchEncoding(ctxt, enc);
4954:
1.40 daniel 4955: input->base = buffer;
4956: input->cur = buffer;
1.69 daniel 4957: input->free = NULL;
1.20 daniel 4958:
1.40 daniel 4959: inputPush(ctxt, input);
1.69 daniel 4960: return(ctxt);
4961: }
4962:
4963: /**
4964: * xmlSAXParseMemory :
4965: * @sax: the SAX handler block
4966: * @buffer: an pointer to a char array
4967: * @size: the siwe of the array
4968: * @recovery: work in recovery mode, i.e. tries to read no Well Formed
4969: * documents
4970: *
4971: * parse an XML in-memory block and use the given SAX function block
4972: * to handle the parsing callback. If sax is NULL, fallback to the default
4973: * DOM tree building routines.
4974: *
4975: * Returns the resulting document tree
4976: */
4977: xmlDocPtr
4978: xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
4979: xmlDocPtr ret;
4980: xmlParserCtxtPtr ctxt;
4981:
4982: ctxt = xmlCreateMemoryParserCtxt(buffer, size);
4983: if (ctxt == NULL) return(NULL);
1.74 daniel 4984: if (sax != NULL) {
4985: ctxt->sax = sax;
4986: ctxt->userData = NULL;
4987: }
1.20 daniel 4988:
4989: xmlParseDocument(ctxt);
1.40 daniel 4990:
1.72 daniel 4991: if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
1.59 daniel 4992: else {
4993: ret = NULL;
1.72 daniel 4994: xmlFreeDoc(ctxt->myDoc);
4995: ctxt->myDoc = NULL;
1.59 daniel 4996: }
1.86 daniel 4997: if (sax != NULL)
4998: ctxt->sax = NULL;
1.69 daniel 4999: xmlFreeParserCtxt(ctxt);
1.16 daniel 5000:
1.9 httpng 5001: return(ret);
1.17 daniel 5002: }
5003:
1.55 daniel 5004: /**
5005: * xmlParseMemory :
1.68 daniel 5006: * @buffer: an pointer to a char array
1.55 daniel 5007: * @size: the size of the array
5008: *
5009: * parse an XML in-memory block and build a tree.
5010: *
1.68 daniel 5011: * Returns the resulting document tree
1.55 daniel 5012: */
5013:
5014: xmlDocPtr xmlParseMemory(char *buffer, int size) {
1.59 daniel 5015: return(xmlSAXParseMemory(NULL, buffer, size, 0));
5016: }
5017:
5018: /**
5019: * xmlRecoverMemory :
1.68 daniel 5020: * @buffer: an pointer to a char array
1.59 daniel 5021: * @size: the size of the array
5022: *
5023: * parse an XML in-memory block and build a tree.
5024: * In the case the document is not Well Formed, a tree is built anyway
5025: *
1.68 daniel 5026: * Returns the resulting document tree
1.59 daniel 5027: */
5028:
5029: xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
5030: return(xmlSAXParseMemory(NULL, buffer, size, 1));
1.55 daniel 5031: }
1.17 daniel 5032:
1.50 daniel 5033: /**
5034: * xmlInitParserCtxt:
5035: * @ctxt: an XML parser context
5036: *
5037: * Initialize a parser context
5038: */
5039:
1.55 daniel 5040: void
5041: xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 5042: {
1.86 daniel 5043: xmlSAXHandler *sax;
5044:
5045: sax = (xmlSAXHandler *) malloc(sizeof(xmlSAXHandler));
5046: if (sax == NULL) {
5047: fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
5048: }
5049:
1.69 daniel 5050: /* Allocate the Input stack */
5051: ctxt->inputTab = (xmlParserInputPtr *) malloc(5 * sizeof(xmlParserInputPtr));
5052: ctxt->inputNr = 0;
5053: ctxt->inputMax = 5;
5054: ctxt->input = NULL;
1.72 daniel 5055: ctxt->version = NULL;
5056: ctxt->encoding = NULL;
5057: ctxt->standalone = -1;
1.69 daniel 5058:
5059: /* Allocate the Node stack */
5060: ctxt->nodeTab = (xmlNodePtr *) malloc(10 * sizeof(xmlNodePtr));
5061: ctxt->nodeNr = 0;
5062: ctxt->nodeMax = 10;
5063: ctxt->node = NULL;
5064:
1.86 daniel 5065: if (sax == NULL) ctxt->sax = &xmlDefaultSAXHandler;
5066: else {
5067: ctxt->sax = sax;
5068: memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
5069: }
1.74 daniel 5070: ctxt->userData = ctxt;
1.72 daniel 5071: ctxt->myDoc = NULL;
1.69 daniel 5072: ctxt->wellFormed = 1;
1.79 daniel 5073: ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1.69 daniel 5074: ctxt->record_info = 0;
5075: xmlInitNodeInfoSeq(&ctxt->node_seq);
5076: }
5077:
5078: /**
5079: * xmlFreeParserCtxt:
5080: * @ctxt: an XML parser context
5081: *
5082: * Free all the memory used by a parser context. However the parsed
1.72 daniel 5083: * document in ctxt->myDoc is not freed.
1.69 daniel 5084: */
5085:
5086: void
5087: xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
5088: {
5089: xmlParserInputPtr input;
5090:
5091: if (ctxt == NULL) return;
5092:
5093: while ((input = inputPop(ctxt)) != NULL) {
5094: xmlFreeInputStream(input);
5095: }
5096:
5097: if (ctxt->nodeTab != NULL) free(ctxt->nodeTab);
5098: if (ctxt->inputTab != NULL) free(ctxt->inputTab);
1.73 daniel 5099: if (ctxt->version != NULL) free((char *) ctxt->version);
1.86 daniel 5100: if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
5101: free(ctxt->sax);
1.69 daniel 5102: free(ctxt);
1.17 daniel 5103: }
5104:
1.50 daniel 5105: /**
5106: * xmlClearParserCtxt:
5107: * @ctxt: an XML parser context
5108: *
5109: * Clear (release owned resources) and reinitialize a parser context
5110: */
1.17 daniel 5111:
1.55 daniel 5112: void
5113: xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1.17 daniel 5114: {
1.32 daniel 5115: xmlClearNodeInfoSeq(&ctxt->node_seq);
5116: xmlInitParserCtxt(ctxt);
1.17 daniel 5117: }
5118:
5119:
1.50 daniel 5120: /**
5121: * xmlSetupParserForBuffer:
5122: * @ctxt: an XML parser context
5123: * @buffer: a CHAR * buffer
5124: * @filename: a file name
5125: *
1.19 daniel 5126: * Setup the parser context to parse a new buffer; Clears any prior
5127: * contents from the parser context. The buffer parameter must not be
5128: * NULL, but the filename parameter can be
5129: */
1.55 daniel 5130: void
5131: xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
1.17 daniel 5132: const char* filename)
5133: {
1.40 daniel 5134: xmlParserInputPtr input;
5135:
5136: input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
5137: if (input == NULL) {
5138: perror("malloc");
5139: free(ctxt);
5140: exit(1);
5141: }
5142:
1.17 daniel 5143: xmlClearParserCtxt(ctxt);
1.40 daniel 5144: if (input->filename != NULL)
5145: input->filename = strdup(filename);
5146: else
5147: input->filename = NULL;
5148: input->line = 1;
5149: input->col = 1;
5150: input->base = buffer;
5151: input->cur = buffer;
5152:
5153: inputPush(ctxt, input);
1.17 daniel 5154: }
5155:
1.32 daniel 5156:
1.50 daniel 5157: /**
5158: * xmlParserFindNodeInfo:
5159: * @ctxt: an XML parser context
5160: * @node: an XML node within the tree
5161: *
5162: * Find the parser node info struct for a given node
5163: *
1.68 daniel 5164: * Returns an xmlParserNodeInfo block pointer or NULL
1.32 daniel 5165: */
5166: const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
5167: const xmlNode* node)
5168: {
5169: unsigned long pos;
5170:
5171: /* Find position where node should be at */
5172: pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
5173: if ( ctx->node_seq.buffer[pos].node == node )
5174: return &ctx->node_seq.buffer[pos];
5175: else
5176: return NULL;
5177: }
5178:
5179:
1.50 daniel 5180: /**
5181: * xmlInitNodeInfoSeq :
5182: * @seq: a node info sequence pointer
5183: *
5184: * -- Initialize (set to initial state) node info sequence
1.32 daniel 5185: */
1.55 daniel 5186: void
5187: xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 5188: {
5189: seq->length = 0;
5190: seq->maximum = 0;
5191: seq->buffer = NULL;
5192: }
5193:
1.50 daniel 5194: /**
5195: * xmlClearNodeInfoSeq :
5196: * @seq: a node info sequence pointer
5197: *
5198: * -- Clear (release memory and reinitialize) node
1.32 daniel 5199: * info sequence
5200: */
1.55 daniel 5201: void
5202: xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
1.32 daniel 5203: {
5204: if ( seq->buffer != NULL )
5205: free(seq->buffer);
5206: xmlInitNodeInfoSeq(seq);
5207: }
5208:
5209:
1.50 daniel 5210: /**
5211: * xmlParserFindNodeInfoIndex:
5212: * @seq: a node info sequence pointer
5213: * @node: an XML node pointer
5214: *
5215: *
1.32 daniel 5216: * xmlParserFindNodeInfoIndex : Find the index that the info record for
5217: * the given node is or should be at in a sorted sequence
1.68 daniel 5218: *
5219: * Returns a long indicating the position of the record
1.32 daniel 5220: */
5221: unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
5222: const xmlNode* node)
5223: {
5224: unsigned long upper, lower, middle;
5225: int found = 0;
5226:
5227: /* Do a binary search for the key */
5228: lower = 1;
5229: upper = seq->length;
5230: middle = 0;
5231: while ( lower <= upper && !found) {
5232: middle = lower + (upper - lower) / 2;
5233: if ( node == seq->buffer[middle - 1].node )
5234: found = 1;
5235: else if ( node < seq->buffer[middle - 1].node )
5236: upper = middle - 1;
5237: else
5238: lower = middle + 1;
5239: }
5240:
5241: /* Return position */
5242: if ( middle == 0 || seq->buffer[middle - 1].node < node )
5243: return middle;
5244: else
5245: return middle - 1;
5246: }
5247:
5248:
1.50 daniel 5249: /**
5250: * xmlParserAddNodeInfo:
5251: * @ctxt: an XML parser context
1.68 daniel 5252: * @info: a node info sequence pointer
1.50 daniel 5253: *
5254: * Insert node info record into the sorted sequence
1.32 daniel 5255: */
1.55 daniel 5256: void
5257: xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
1.68 daniel 5258: const xmlParserNodeInfo* info)
1.32 daniel 5259: {
5260: unsigned long pos;
5261: static unsigned int block_size = 5;
5262:
5263: /* Find pos and check to see if node is already in the sequence */
1.55 daniel 5264: pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
5265: if ( pos < ctxt->node_seq.length
5266: && ctxt->node_seq.buffer[pos].node == info->node ) {
5267: ctxt->node_seq.buffer[pos] = *info;
1.32 daniel 5268: }
5269:
5270: /* Otherwise, we need to add new node to buffer */
5271: else {
5272: /* Expand buffer by 5 if needed */
1.55 daniel 5273: if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
1.32 daniel 5274: xmlParserNodeInfo* tmp_buffer;
1.55 daniel 5275: unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
5276: *(ctxt->node_seq.maximum + block_size));
1.32 daniel 5277:
1.55 daniel 5278: if ( ctxt->node_seq.buffer == NULL )
1.32 daniel 5279: tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
5280: else
1.55 daniel 5281: tmp_buffer = (xmlParserNodeInfo*)realloc(ctxt->node_seq.buffer, byte_size);
1.32 daniel 5282:
5283: if ( tmp_buffer == NULL ) {
1.55 daniel 5284: if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1.74 daniel 5285: ctxt->sax->error(ctxt->userData, "Out of memory\n");
1.32 daniel 5286: return;
5287: }
1.55 daniel 5288: ctxt->node_seq.buffer = tmp_buffer;
5289: ctxt->node_seq.maximum += block_size;
1.32 daniel 5290: }
5291:
5292: /* If position is not at end, move elements out of the way */
1.55 daniel 5293: if ( pos != ctxt->node_seq.length ) {
1.32 daniel 5294: unsigned long i;
5295:
1.55 daniel 5296: for ( i = ctxt->node_seq.length; i > pos; i-- )
5297: ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
1.32 daniel 5298: }
5299:
5300: /* Copy element and increase length */
1.55 daniel 5301: ctxt->node_seq.buffer[pos] = *info;
5302: ctxt->node_seq.length++;
1.32 daniel 5303: }
5304: }
1.77 daniel 5305:
5306:
Webmaster