Annotation of XML/testHTML.c, revision 1.14
1.1 daniel 1: /*
2: * testHTML.c : a small tester program for HTML input.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #ifdef WIN32
1.9 daniel 10: #include "win32config.h"
1.1 daniel 11: #else
1.4 daniel 12: #include "config.h"
1.1 daniel 13: #endif
1.3 daniel 14:
1.12 daniel 15: #include "xmlversion.h"
16: #ifdef LIBXML_HTML_ENABLED
17:
1.3 daniel 18: #include <stdio.h>
19: #include <string.h>
1.7 daniel 20: #include <stdarg.h>
21:
1.3 daniel 22:
23: #ifdef HAVE_SYS_TYPES_H
1.1 daniel 24: #include <sys/types.h>
1.3 daniel 25: #endif
1.1 daniel 26: #ifdef HAVE_SYS_STAT_H
27: #include <sys/stat.h>
28: #endif
29: #ifdef HAVE_FCNTL_H
30: #include <fcntl.h>
31: #endif
32: #ifdef HAVE_UNISTD_H
33: #include <unistd.h>
34: #endif
1.3 daniel 35: #ifdef HAVE_STDLIB_H
1.1 daniel 36: #include <stdlib.h>
1.3 daniel 37: #endif
1.1 daniel 38:
1.12 daniel 39: #include <libxml/xmlmemory.h>
40: #include <libxml/HTMLparser.h>
41: #include <libxml/HTMLtree.h>
42: #include <libxml/debugXML.h>
1.1 daniel 43:
1.12 daniel 44: #ifdef LIBXML_DEBUG_ENABLED
1.1 daniel 45: static int debug = 0;
1.12 daniel 46: #endif
1.1 daniel 47: static int copy = 0;
1.7 daniel 48: static int sax = 0;
49: static int repeat = 0;
50: static int noout = 0;
1.10 daniel 51: static int push = 0;
1.13 veillard 52: static char *encoding = NULL;
1.1 daniel 53:
1.7 daniel 54: xmlSAXHandler emptySAXHandlerStruct = {
55: NULL, /* internalSubset */
56: NULL, /* isStandalone */
57: NULL, /* hasInternalSubset */
58: NULL, /* hasExternalSubset */
59: NULL, /* resolveEntity */
60: NULL, /* getEntity */
61: NULL, /* entityDecl */
62: NULL, /* notationDecl */
63: NULL, /* attributeDecl */
64: NULL, /* elementDecl */
65: NULL, /* unparsedEntityDecl */
66: NULL, /* setDocumentLocator */
67: NULL, /* startDocument */
68: NULL, /* endDocument */
69: NULL, /* startElement */
70: NULL, /* endElement */
71: NULL, /* reference */
72: NULL, /* characters */
73: NULL, /* ignorableWhitespace */
74: NULL, /* processingInstruction */
75: NULL, /* comment */
76: NULL, /* xmlParserWarning */
77: NULL, /* xmlParserError */
78: NULL, /* xmlParserError */
79: NULL, /* getParameterEntity */
80: };
81:
82: xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
83: extern xmlSAXHandlerPtr debugSAXHandler;
84:
85: /************************************************************************
86: * *
87: * Debug Handlers *
88: * *
89: ************************************************************************/
90:
91: /**
92: * isStandaloneDebug:
93: * @ctxt: An XML parser context
94: *
95: * Is this document tagged standalone ?
96: *
97: * Returns 1 if true
98: */
99: int
100: isStandaloneDebug(void *ctx)
101: {
102: fprintf(stdout, "SAX.isStandalone()\n");
103: return(0);
104: }
105:
106: /**
107: * hasInternalSubsetDebug:
108: * @ctxt: An XML parser context
109: *
110: * Does this document has an internal subset
111: *
112: * Returns 1 if true
113: */
114: int
115: hasInternalSubsetDebug(void *ctx)
116: {
117: fprintf(stdout, "SAX.hasInternalSubset()\n");
118: return(0);
119: }
120:
121: /**
122: * hasExternalSubsetDebug:
123: * @ctxt: An XML parser context
124: *
125: * Does this document has an external subset
126: *
127: * Returns 1 if true
128: */
129: int
130: hasExternalSubsetDebug(void *ctx)
131: {
132: fprintf(stdout, "SAX.hasExternalSubset()\n");
133: return(0);
134: }
135:
136: /**
137: * hasInternalSubsetDebug:
138: * @ctxt: An XML parser context
139: *
140: * Does this document has an internal subset
141: */
142: void
143: internalSubsetDebug(void *ctx, const xmlChar *name,
144: const xmlChar *ExternalID, const xmlChar *SystemID)
145: {
146: /* xmlDtdPtr externalSubset; */
147:
148: fprintf(stdout, "SAX.internalSubset(%s, %s, %s)\n",
149: name, ExternalID, SystemID);
150:
151: /***********
152: if ((ExternalID != NULL) || (SystemID != NULL)) {
153: externalSubset = xmlParseDTD(ExternalID, SystemID);
154: if (externalSubset != NULL) {
155: xmlFreeDtd(externalSubset);
156: }
157: }
158: ***********/
159: }
160:
161: /**
162: * resolveEntityDebug:
163: * @ctxt: An XML parser context
164: * @publicId: The public ID of the entity
165: * @systemId: The system ID of the entity
166: *
167: * Special entity resolver, better left to the parser, it has
168: * more context than the application layer.
169: * The default behaviour is to NOT resolve the entities, in that case
170: * the ENTITY_REF nodes are built in the structure (and the parameter
171: * values).
172: *
173: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
174: */
175: xmlParserInputPtr
176: resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
177: {
178: /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
179:
180:
181: fprintf(stdout, "SAX.resolveEntity(");
182: if (publicId != NULL)
183: fprintf(stdout, "%s", (char *)publicId);
184: else
185: fprintf(stdout, " ");
186: if (systemId != NULL)
187: fprintf(stdout, ", %s)\n", (char *)systemId);
188: else
189: fprintf(stdout, ", )\n");
190: /*********
191: if (systemId != NULL) {
192: return(xmlNewInputFromFile(ctxt, (char *) systemId));
193: }
194: *********/
195: return(NULL);
196: }
197:
198: /**
199: * getEntityDebug:
200: * @ctxt: An XML parser context
201: * @name: The entity name
202: *
203: * Get an entity by name
204: *
205: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
206: */
207: xmlEntityPtr
208: getEntityDebug(void *ctx, const xmlChar *name)
209: {
210: fprintf(stdout, "SAX.getEntity(%s)\n", name);
211: return(NULL);
212: }
213:
214: /**
215: * getParameterEntityDebug:
216: * @ctxt: An XML parser context
217: * @name: The entity name
218: *
219: * Get a parameter entity by name
220: *
221: * Returns the xmlParserInputPtr
222: */
223: xmlEntityPtr
224: getParameterEntityDebug(void *ctx, const xmlChar *name)
225: {
226: fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
227: return(NULL);
228: }
229:
230:
231: /**
232: * entityDeclDebug:
233: * @ctxt: An XML parser context
234: * @name: the entity name
235: * @type: the entity type
236: * @publicId: The public ID of the entity
237: * @systemId: The system ID of the entity
238: * @content: the entity value (without processing).
239: *
240: * An entity definition has been parsed
241: */
242: void
243: entityDeclDebug(void *ctx, const xmlChar *name, int type,
244: const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
245: {
246: fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
247: name, type, publicId, systemId, content);
248: }
249:
250: /**
251: * attributeDeclDebug:
252: * @ctxt: An XML parser context
253: * @name: the attribute name
254: * @type: the attribute type
255: *
256: * An attribute definition has been parsed
257: */
258: void
259: attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
260: int type, int def, const xmlChar *defaultValue,
261: xmlEnumerationPtr tree)
262: {
263: fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
264: elem, name, type, def, defaultValue);
265: }
266:
267: /**
268: * elementDeclDebug:
269: * @ctxt: An XML parser context
270: * @name: the element name
271: * @type: the element type
272: * @content: the element value (without processing).
273: *
274: * An element definition has been parsed
275: */
276: void
277: elementDeclDebug(void *ctx, const xmlChar *name, int type,
278: xmlElementContentPtr content)
279: {
280: fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
281: name, type);
282: }
283:
284: /**
285: * notationDeclDebug:
286: * @ctxt: An XML parser context
287: * @name: The name of the notation
288: * @publicId: The public ID of the entity
289: * @systemId: The system ID of the entity
290: *
291: * What to do when a notation declaration has been parsed.
292: */
293: void
294: notationDeclDebug(void *ctx, const xmlChar *name,
295: const xmlChar *publicId, const xmlChar *systemId)
296: {
297: fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
298: (char *) name, (char *) publicId, (char *) systemId);
299: }
300:
301: /**
302: * unparsedEntityDeclDebug:
303: * @ctxt: An XML parser context
304: * @name: The name of the entity
305: * @publicId: The public ID of the entity
306: * @systemId: The system ID of the entity
307: * @notationName: the name of the notation
308: *
309: * What to do when an unparsed entity declaration is parsed
310: */
311: void
312: unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
313: const xmlChar *publicId, const xmlChar *systemId,
314: const xmlChar *notationName)
315: {
316: fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
317: (char *) name, (char *) publicId, (char *) systemId,
318: (char *) notationName);
319: }
320:
321: /**
322: * setDocumentLocatorDebug:
323: * @ctxt: An XML parser context
324: * @loc: A SAX Locator
325: *
326: * Receive the document locator at startup, actually xmlDefaultSAXLocator
327: * Everything is available on the context, so this is useless in our case.
328: */
329: void
330: setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
331: {
332: fprintf(stdout, "SAX.setDocumentLocator()\n");
333: }
334:
335: /**
336: * startDocumentDebug:
337: * @ctxt: An XML parser context
338: *
339: * called when the document start being processed.
340: */
341: void
342: startDocumentDebug(void *ctx)
343: {
344: fprintf(stdout, "SAX.startDocument()\n");
345: }
346:
347: /**
348: * endDocumentDebug:
349: * @ctxt: An XML parser context
350: *
351: * called when the document end has been detected.
352: */
353: void
354: endDocumentDebug(void *ctx)
355: {
356: fprintf(stdout, "SAX.endDocument()\n");
357: }
358:
359: /**
360: * startElementDebug:
361: * @ctxt: An XML parser context
362: * @name: The element name
363: *
364: * called when an opening tag has been processed.
365: */
366: void
367: startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
368: {
369: int i;
370:
371: fprintf(stdout, "SAX.startElement(%s", (char *) name);
372: if (atts != NULL) {
373: for (i = 0;(atts[i] != NULL);i++) {
374: fprintf(stdout, ", %s='", atts[i++]);
375: fprintf(stdout, "%s'", atts[i]);
376: }
377: }
378: fprintf(stdout, ")\n");
379: }
380:
381: /**
382: * endElementDebug:
383: * @ctxt: An XML parser context
384: * @name: The element name
385: *
386: * called when the end of an element has been detected.
387: */
388: void
389: endElementDebug(void *ctx, const xmlChar *name)
390: {
391: fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
392: }
393:
394: /**
395: * charactersDebug:
396: * @ctxt: An XML parser context
397: * @ch: a xmlChar string
398: * @len: the number of xmlChar
399: *
400: * receiving some chars from the parser.
401: * Question: how much at a time ???
402: */
403: void
404: charactersDebug(void *ctx, const xmlChar *ch, int len)
405: {
406: int i;
407:
408: fprintf(stdout, "SAX.characters(");
409: for (i = 0;(i < len) && (i < 30);i++)
410: fprintf(stdout, "%c", ch[i]);
411: fprintf(stdout, ", %d)\n", len);
412: }
413:
414: /**
415: * referenceDebug:
416: * @ctxt: An XML parser context
417: * @name: The entity name
418: *
419: * called when an entity reference is detected.
420: */
421: void
422: referenceDebug(void *ctx, const xmlChar *name)
423: {
424: fprintf(stdout, "SAX.reference(%s)\n", name);
425: }
426:
427: /**
428: * ignorableWhitespaceDebug:
429: * @ctxt: An XML parser context
430: * @ch: a xmlChar string
431: * @start: the first char in the string
432: * @len: the number of xmlChar
433: *
434: * receiving some ignorable whitespaces from the parser.
435: * Question: how much at a time ???
436: */
437: void
438: ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
439: {
440: fprintf(stdout, "SAX.ignorableWhitespace(%.30s, %d)\n",
441: (char *) ch, len);
442: }
443:
444: /**
445: * processingInstructionDebug:
446: * @ctxt: An XML parser context
447: * @target: the target name
448: * @data: the PI data's
449: * @len: the number of xmlChar
450: *
451: * A processing instruction has been parsed.
452: */
453: void
454: processingInstructionDebug(void *ctx, const xmlChar *target,
455: const xmlChar *data)
456: {
457: fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
458: (char *) target, (char *) data);
459: }
460:
461: /**
462: * commentDebug:
463: * @ctxt: An XML parser context
464: * @value: the comment content
465: *
466: * A comment has been parsed.
467: */
468: void
469: commentDebug(void *ctx, const xmlChar *value)
470: {
471: fprintf(stdout, "SAX.comment(%s)\n", value);
472: }
473:
474: /**
475: * warningDebug:
476: * @ctxt: An XML parser context
477: * @msg: the message to display/transmit
478: * @...: extra parameters for the message display
479: *
480: * Display and format a warning messages, gives file, line, position and
481: * extra parameters.
482: */
483: void
484: warningDebug(void *ctx, const char *msg, ...)
485: {
486: va_list args;
487:
488: va_start(args, msg);
489: fprintf(stdout, "SAX.warning: ");
490: vfprintf(stdout, msg, args);
491: va_end(args);
492: }
493:
494: /**
495: * errorDebug:
496: * @ctxt: An XML parser context
497: * @msg: the message to display/transmit
498: * @...: extra parameters for the message display
499: *
500: * Display and format a error messages, gives file, line, position and
501: * extra parameters.
502: */
503: void
504: errorDebug(void *ctx, const char *msg, ...)
505: {
506: va_list args;
507:
508: va_start(args, msg);
509: fprintf(stdout, "SAX.error: ");
510: vfprintf(stdout, msg, args);
511: va_end(args);
512: }
513:
514: /**
515: * fatalErrorDebug:
516: * @ctxt: An XML parser context
517: * @msg: the message to display/transmit
518: * @...: extra parameters for the message display
519: *
520: * Display and format a fatalError messages, gives file, line, position and
521: * extra parameters.
522: */
523: void
524: fatalErrorDebug(void *ctx, const char *msg, ...)
525: {
526: va_list args;
527:
528: va_start(args, msg);
529: fprintf(stdout, "SAX.fatalError: ");
530: vfprintf(stdout, msg, args);
531: va_end(args);
532: }
533:
534: xmlSAXHandler debugSAXHandlerStruct = {
535: internalSubsetDebug,
536: isStandaloneDebug,
537: hasInternalSubsetDebug,
538: hasExternalSubsetDebug,
539: resolveEntityDebug,
540: getEntityDebug,
541: entityDeclDebug,
542: notationDeclDebug,
543: attributeDeclDebug,
544: elementDeclDebug,
545: unparsedEntityDeclDebug,
546: setDocumentLocatorDebug,
547: startDocumentDebug,
548: endDocumentDebug,
549: startElementDebug,
550: endElementDebug,
551: referenceDebug,
552: charactersDebug,
553: ignorableWhitespaceDebug,
554: processingInstructionDebug,
555: commentDebug,
556: warningDebug,
557: errorDebug,
558: fatalErrorDebug,
559: getParameterEntityDebug,
560: };
561:
562: xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
1.1 daniel 563: /************************************************************************
564: * *
565: * Debug *
566: * *
567: ************************************************************************/
568:
1.7 daniel 569: void parseSAXFile(char *filename) {
570: htmlDocPtr doc;
571: /*
572: * Empty callbacks for checking
573: */
574: doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
575: if (doc != NULL) {
576: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
577: xmlFreeDoc(doc);
578: }
579:
580: if (!noout) {
581: /*
582: * Debug callback
583: */
584: doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
585: if (doc != NULL) {
586: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
587: xmlFreeDoc(doc);
588: }
589: }
590: }
591:
1.1 daniel 592: void parseAndPrintFile(char *filename) {
1.11 daniel 593: htmlDocPtr doc = NULL, tmp;
1.1 daniel 594:
595: /*
596: * build an HTML tree from a string;
597: */
1.10 daniel 598: if (push) {
599: FILE *f;
600:
601: f = fopen(filename, "r");
602: if (f != NULL) {
603: int res, size = 3;
1.14 ! veillard 604: char chars[4096];
1.10 daniel 605: htmlParserCtxtPtr ctxt;
606:
1.14 ! veillard 607: /* if (repeat) */
! 608: size = 4096;
1.10 daniel 609: res = fread(chars, 1, 4, f);
610: if (res > 0) {
611: ctxt = htmlCreatePushParserCtxt(NULL, NULL,
612: chars, res, filename, 0);
613: while ((res = fread(chars, 1, size, f)) > 0) {
614: htmlParseChunk(ctxt, chars, res, 0);
615: }
616: htmlParseChunk(ctxt, chars, 0, 1);
617: doc = ctxt->myDoc;
618: htmlFreeParserCtxt(ctxt);
619: }
620: }
621: } else {
622: doc = htmlParseFile(filename, NULL);
623: }
624: if (doc == NULL) {
625: fprintf(stderr, "Could not parse %s\n", filename);
626: }
1.1 daniel 627:
628: /*
629: * test intermediate copy if needed.
630: */
631: if (copy) {
632: tmp = doc;
633: doc = xmlCopyDoc(doc, 1);
634: xmlFreeDoc(tmp);
635: }
636:
637: /*
638: * print it.
639: */
1.7 daniel 640: if (!noout) {
1.12 daniel 641: #ifdef LIBXML_DEBUG_ENABLED
1.13 veillard 642: if (!debug) {
643: if (encoding)
644: htmlSaveFileEnc("-", doc, encoding);
645: else
646: htmlDocDump(stdout, doc);
647: } else
1.7 daniel 648: xmlDebugDumpDocument(stdout, doc);
1.12 daniel 649: #else
1.13 veillard 650: if (encoding)
651: htmlSaveFileEnc("-", doc, encoding);
652: else
653: htmlDocDump(stdout, doc);
1.12 daniel 654: #endif
1.7 daniel 655: }
1.1 daniel 656:
657: /*
658: * free it.
659: */
660: xmlFreeDoc(doc);
661: }
662:
663: int main(int argc, char **argv) {
1.7 daniel 664: int i, count;
1.1 daniel 665: int files = 0;
666:
667: for (i = 1; i < argc ; i++) {
1.12 daniel 668: #ifdef LIBXML_DEBUG_ENABLED
1.1 daniel 669: if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
670: debug++;
1.12 daniel 671: else
672: #endif
673: if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
1.1 daniel 674: copy++;
1.10 daniel 675: else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
676: push++;
1.7 daniel 677: else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
678: sax++;
679: else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
680: noout++;
681: else if ((!strcmp(argv[i], "-repeat")) ||
682: (!strcmp(argv[i], "--repeat")))
683: repeat++;
1.13 veillard 684: else if ((!strcmp(argv[i], "-encode")) ||
685: (!strcmp(argv[i], "--encode"))) {
686: i++;
687: encoding = argv[i];
688: }
1.1 daniel 689: }
690: for (i = 1; i < argc ; i++) {
1.13 veillard 691: if ((!strcmp(argv[i], "-encode")) ||
692: (!strcmp(argv[i], "--encode"))) {
693: i++;
694: continue;
695: }
1.1 daniel 696: if (argv[i][0] != '-') {
1.7 daniel 697: if (repeat) {
698: for (count = 0;count < 100 * repeat;count++) {
699: if (sax)
700: parseSAXFile(argv[i]);
701: else
702: parseAndPrintFile(argv[i]);
703: }
704: } else {
705: if (sax)
706: parseSAXFile(argv[i]);
707: else
708: parseAndPrintFile(argv[i]);
709: }
1.1 daniel 710: files ++;
711: }
712: }
713: if (files == 0) {
1.7 daniel 714: printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
1.1 daniel 715: argv[0]);
716: printf("\tParse the HTML files and output the result of the parsing\n");
1.12 daniel 717: #ifdef LIBXML_DEBUG_ENABLED
1.1 daniel 718: printf("\t--debug : dump a debug tree of the in-memory document\n");
1.12 daniel 719: #endif
1.1 daniel 720: printf("\t--copy : used to test the internal copy implementation\n");
1.7 daniel 721: printf("\t--sax : debug the sequence of SAX callbacks\n");
1.10 daniel 722: printf("\t--repeat : parse the file 100 times, for timing\n");
1.7 daniel 723: printf("\t--noout : do not print the result\n");
1.10 daniel 724: printf("\t--push : use the push mode parser\n");
1.13 veillard 725: printf("\t--encode encoding : output in the given encoding\n");
1.1 daniel 726: }
1.8 daniel 727: xmlCleanupParser();
1.6 daniel 728: xmlMemoryDump();
1.1 daniel 729:
730: return(0);
731: }
1.12 daniel 732: #else /* !LIBXML_HTML_ENABLED */
733: #include <stdio.h>
734: int main(int argc, char **argv) {
735: printf("%s : HTML support not compiled in\n", argv[0]);
736: return(0);
737: }
738: #endif
Webmaster