Annotation of XML/testHTML.c, revision 1.8
1.1 daniel 1: /*
2: * testHTML.c : a small tester program for HTML input.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #ifdef WIN32
10: #define HAVE_FCNTL_H
11: #include <io.h>
12: #else
1.4 daniel 13: #include "config.h"
1.1 daniel 14: #endif
1.3 daniel 15:
16: #include <stdio.h>
17: #include <string.h>
1.7 daniel 18: #include <stdarg.h>
19:
1.3 daniel 20:
21: #ifdef HAVE_SYS_TYPES_H
1.1 daniel 22: #include <sys/types.h>
1.3 daniel 23: #endif
1.1 daniel 24: #ifdef HAVE_SYS_STAT_H
25: #include <sys/stat.h>
26: #endif
27: #ifdef HAVE_FCNTL_H
28: #include <fcntl.h>
29: #endif
30: #ifdef HAVE_UNISTD_H
31: #include <unistd.h>
32: #endif
1.3 daniel 33: #ifdef HAVE_STDLIB_H
1.1 daniel 34: #include <stdlib.h>
1.3 daniel 35: #endif
1.1 daniel 36:
1.6 daniel 37: #include "xmlmemory.h"
1.1 daniel 38: #include "HTMLparser.h"
39: #include "HTMLtree.h"
40: #include "debugXML.h"
41:
42: static int debug = 0;
43: static int copy = 0;
1.7 daniel 44: static int sax = 0;
45: static int repeat = 0;
46: static int noout = 0;
1.1 daniel 47:
48: /*
49: * Note: this is perfectly clean HTML, i.e. not a useful test.
1.5 daniel 50: static xmlChar buffer[] =
1.1 daniel 51: "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n\
52: \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n\
53: <html>\n\
54: <head>\n\
55: <title>This service is temporary down</title>\n\
56: </head>\n\
57: \n\
58: <body bgcolor=\"#FFFFFF\">\n\
59: <h1 align=\"center\">Sorry, this service is temporary down</h1>\n\
60: We are doing our best to get it back on-line,\n\
61: \n\
62: <p>The W3C system administrators</p>\n\
63: </body>\n\
64: </html>\n\
65: ";
1.2 daniel 66: */
1.1 daniel 67:
1.7 daniel 68: xmlSAXHandler emptySAXHandlerStruct = {
69: NULL, /* internalSubset */
70: NULL, /* isStandalone */
71: NULL, /* hasInternalSubset */
72: NULL, /* hasExternalSubset */
73: NULL, /* resolveEntity */
74: NULL, /* getEntity */
75: NULL, /* entityDecl */
76: NULL, /* notationDecl */
77: NULL, /* attributeDecl */
78: NULL, /* elementDecl */
79: NULL, /* unparsedEntityDecl */
80: NULL, /* setDocumentLocator */
81: NULL, /* startDocument */
82: NULL, /* endDocument */
83: NULL, /* startElement */
84: NULL, /* endElement */
85: NULL, /* reference */
86: NULL, /* characters */
87: NULL, /* ignorableWhitespace */
88: NULL, /* processingInstruction */
89: NULL, /* comment */
90: NULL, /* xmlParserWarning */
91: NULL, /* xmlParserError */
92: NULL, /* xmlParserError */
93: NULL, /* getParameterEntity */
94: };
95:
96: xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
97: extern xmlSAXHandlerPtr debugSAXHandler;
98:
99: /************************************************************************
100: * *
101: * Debug Handlers *
102: * *
103: ************************************************************************/
104:
105: /**
106: * isStandaloneDebug:
107: * @ctxt: An XML parser context
108: *
109: * Is this document tagged standalone ?
110: *
111: * Returns 1 if true
112: */
113: int
114: isStandaloneDebug(void *ctx)
115: {
116: fprintf(stdout, "SAX.isStandalone()\n");
117: return(0);
118: }
119:
120: /**
121: * hasInternalSubsetDebug:
122: * @ctxt: An XML parser context
123: *
124: * Does this document has an internal subset
125: *
126: * Returns 1 if true
127: */
128: int
129: hasInternalSubsetDebug(void *ctx)
130: {
131: fprintf(stdout, "SAX.hasInternalSubset()\n");
132: return(0);
133: }
134:
135: /**
136: * hasExternalSubsetDebug:
137: * @ctxt: An XML parser context
138: *
139: * Does this document has an external subset
140: *
141: * Returns 1 if true
142: */
143: int
144: hasExternalSubsetDebug(void *ctx)
145: {
146: fprintf(stdout, "SAX.hasExternalSubset()\n");
147: return(0);
148: }
149:
150: /**
151: * hasInternalSubsetDebug:
152: * @ctxt: An XML parser context
153: *
154: * Does this document has an internal subset
155: */
156: void
157: internalSubsetDebug(void *ctx, const xmlChar *name,
158: const xmlChar *ExternalID, const xmlChar *SystemID)
159: {
160: /* xmlDtdPtr externalSubset; */
161:
162: fprintf(stdout, "SAX.internalSubset(%s, %s, %s)\n",
163: name, ExternalID, SystemID);
164:
165: /***********
166: if ((ExternalID != NULL) || (SystemID != NULL)) {
167: externalSubset = xmlParseDTD(ExternalID, SystemID);
168: if (externalSubset != NULL) {
169: xmlFreeDtd(externalSubset);
170: }
171: }
172: ***********/
173: }
174:
175: /**
176: * resolveEntityDebug:
177: * @ctxt: An XML parser context
178: * @publicId: The public ID of the entity
179: * @systemId: The system ID of the entity
180: *
181: * Special entity resolver, better left to the parser, it has
182: * more context than the application layer.
183: * The default behaviour is to NOT resolve the entities, in that case
184: * the ENTITY_REF nodes are built in the structure (and the parameter
185: * values).
186: *
187: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
188: */
189: xmlParserInputPtr
190: resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
191: {
192: /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
193:
194:
195: fprintf(stdout, "SAX.resolveEntity(");
196: if (publicId != NULL)
197: fprintf(stdout, "%s", (char *)publicId);
198: else
199: fprintf(stdout, " ");
200: if (systemId != NULL)
201: fprintf(stdout, ", %s)\n", (char *)systemId);
202: else
203: fprintf(stdout, ", )\n");
204: /*********
205: if (systemId != NULL) {
206: return(xmlNewInputFromFile(ctxt, (char *) systemId));
207: }
208: *********/
209: return(NULL);
210: }
211:
212: /**
213: * getEntityDebug:
214: * @ctxt: An XML parser context
215: * @name: The entity name
216: *
217: * Get an entity by name
218: *
219: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
220: */
221: xmlEntityPtr
222: getEntityDebug(void *ctx, const xmlChar *name)
223: {
224: fprintf(stdout, "SAX.getEntity(%s)\n", name);
225: return(NULL);
226: }
227:
228: /**
229: * getParameterEntityDebug:
230: * @ctxt: An XML parser context
231: * @name: The entity name
232: *
233: * Get a parameter entity by name
234: *
235: * Returns the xmlParserInputPtr
236: */
237: xmlEntityPtr
238: getParameterEntityDebug(void *ctx, const xmlChar *name)
239: {
240: fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
241: return(NULL);
242: }
243:
244:
245: /**
246: * entityDeclDebug:
247: * @ctxt: An XML parser context
248: * @name: the entity name
249: * @type: the entity type
250: * @publicId: The public ID of the entity
251: * @systemId: The system ID of the entity
252: * @content: the entity value (without processing).
253: *
254: * An entity definition has been parsed
255: */
256: void
257: entityDeclDebug(void *ctx, const xmlChar *name, int type,
258: const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
259: {
260: fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
261: name, type, publicId, systemId, content);
262: }
263:
264: /**
265: * attributeDeclDebug:
266: * @ctxt: An XML parser context
267: * @name: the attribute name
268: * @type: the attribute type
269: *
270: * An attribute definition has been parsed
271: */
272: void
273: attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
274: int type, int def, const xmlChar *defaultValue,
275: xmlEnumerationPtr tree)
276: {
277: fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
278: elem, name, type, def, defaultValue);
279: }
280:
281: /**
282: * elementDeclDebug:
283: * @ctxt: An XML parser context
284: * @name: the element name
285: * @type: the element type
286: * @content: the element value (without processing).
287: *
288: * An element definition has been parsed
289: */
290: void
291: elementDeclDebug(void *ctx, const xmlChar *name, int type,
292: xmlElementContentPtr content)
293: {
294: fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
295: name, type);
296: }
297:
298: /**
299: * notationDeclDebug:
300: * @ctxt: An XML parser context
301: * @name: The name of the notation
302: * @publicId: The public ID of the entity
303: * @systemId: The system ID of the entity
304: *
305: * What to do when a notation declaration has been parsed.
306: */
307: void
308: notationDeclDebug(void *ctx, const xmlChar *name,
309: const xmlChar *publicId, const xmlChar *systemId)
310: {
311: fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
312: (char *) name, (char *) publicId, (char *) systemId);
313: }
314:
315: /**
316: * unparsedEntityDeclDebug:
317: * @ctxt: An XML parser context
318: * @name: The name of the entity
319: * @publicId: The public ID of the entity
320: * @systemId: The system ID of the entity
321: * @notationName: the name of the notation
322: *
323: * What to do when an unparsed entity declaration is parsed
324: */
325: void
326: unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
327: const xmlChar *publicId, const xmlChar *systemId,
328: const xmlChar *notationName)
329: {
330: fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
331: (char *) name, (char *) publicId, (char *) systemId,
332: (char *) notationName);
333: }
334:
335: /**
336: * setDocumentLocatorDebug:
337: * @ctxt: An XML parser context
338: * @loc: A SAX Locator
339: *
340: * Receive the document locator at startup, actually xmlDefaultSAXLocator
341: * Everything is available on the context, so this is useless in our case.
342: */
343: void
344: setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
345: {
346: fprintf(stdout, "SAX.setDocumentLocator()\n");
347: }
348:
349: /**
350: * startDocumentDebug:
351: * @ctxt: An XML parser context
352: *
353: * called when the document start being processed.
354: */
355: void
356: startDocumentDebug(void *ctx)
357: {
358: fprintf(stdout, "SAX.startDocument()\n");
359: }
360:
361: /**
362: * endDocumentDebug:
363: * @ctxt: An XML parser context
364: *
365: * called when the document end has been detected.
366: */
367: void
368: endDocumentDebug(void *ctx)
369: {
370: fprintf(stdout, "SAX.endDocument()\n");
371: }
372:
373: /**
374: * startElementDebug:
375: * @ctxt: An XML parser context
376: * @name: The element name
377: *
378: * called when an opening tag has been processed.
379: */
380: void
381: startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
382: {
383: int i;
384:
385: fprintf(stdout, "SAX.startElement(%s", (char *) name);
386: if (atts != NULL) {
387: for (i = 0;(atts[i] != NULL);i++) {
388: fprintf(stdout, ", %s='", atts[i++]);
389: fprintf(stdout, "%s'", atts[i]);
390: }
391: }
392: fprintf(stdout, ")\n");
393: }
394:
395: /**
396: * endElementDebug:
397: * @ctxt: An XML parser context
398: * @name: The element name
399: *
400: * called when the end of an element has been detected.
401: */
402: void
403: endElementDebug(void *ctx, const xmlChar *name)
404: {
405: fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
406: }
407:
408: /**
409: * charactersDebug:
410: * @ctxt: An XML parser context
411: * @ch: a xmlChar string
412: * @len: the number of xmlChar
413: *
414: * receiving some chars from the parser.
415: * Question: how much at a time ???
416: */
417: void
418: charactersDebug(void *ctx, const xmlChar *ch, int len)
419: {
420: int i;
421:
422: fprintf(stdout, "SAX.characters(");
423: for (i = 0;(i < len) && (i < 30);i++)
424: fprintf(stdout, "%c", ch[i]);
425: fprintf(stdout, ", %d)\n", len);
426: }
427:
428: /**
429: * referenceDebug:
430: * @ctxt: An XML parser context
431: * @name: The entity name
432: *
433: * called when an entity reference is detected.
434: */
435: void
436: referenceDebug(void *ctx, const xmlChar *name)
437: {
438: fprintf(stdout, "SAX.reference(%s)\n", name);
439: }
440:
441: /**
442: * ignorableWhitespaceDebug:
443: * @ctxt: An XML parser context
444: * @ch: a xmlChar string
445: * @start: the first char in the string
446: * @len: the number of xmlChar
447: *
448: * receiving some ignorable whitespaces from the parser.
449: * Question: how much at a time ???
450: */
451: void
452: ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
453: {
454: fprintf(stdout, "SAX.ignorableWhitespace(%.30s, %d)\n",
455: (char *) ch, len);
456: }
457:
458: /**
459: * processingInstructionDebug:
460: * @ctxt: An XML parser context
461: * @target: the target name
462: * @data: the PI data's
463: * @len: the number of xmlChar
464: *
465: * A processing instruction has been parsed.
466: */
467: void
468: processingInstructionDebug(void *ctx, const xmlChar *target,
469: const xmlChar *data)
470: {
471: fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
472: (char *) target, (char *) data);
473: }
474:
475: /**
476: * commentDebug:
477: * @ctxt: An XML parser context
478: * @value: the comment content
479: *
480: * A comment has been parsed.
481: */
482: void
483: commentDebug(void *ctx, const xmlChar *value)
484: {
485: fprintf(stdout, "SAX.comment(%s)\n", value);
486: }
487:
488: /**
489: * warningDebug:
490: * @ctxt: An XML parser context
491: * @msg: the message to display/transmit
492: * @...: extra parameters for the message display
493: *
494: * Display and format a warning messages, gives file, line, position and
495: * extra parameters.
496: */
497: void
498: warningDebug(void *ctx, const char *msg, ...)
499: {
500: va_list args;
501:
502: va_start(args, msg);
503: fprintf(stdout, "SAX.warning: ");
504: vfprintf(stdout, msg, args);
505: va_end(args);
506: }
507:
508: /**
509: * errorDebug:
510: * @ctxt: An XML parser context
511: * @msg: the message to display/transmit
512: * @...: extra parameters for the message display
513: *
514: * Display and format a error messages, gives file, line, position and
515: * extra parameters.
516: */
517: void
518: errorDebug(void *ctx, const char *msg, ...)
519: {
520: va_list args;
521:
522: va_start(args, msg);
523: fprintf(stdout, "SAX.error: ");
524: vfprintf(stdout, msg, args);
525: va_end(args);
526: }
527:
528: /**
529: * fatalErrorDebug:
530: * @ctxt: An XML parser context
531: * @msg: the message to display/transmit
532: * @...: extra parameters for the message display
533: *
534: * Display and format a fatalError messages, gives file, line, position and
535: * extra parameters.
536: */
537: void
538: fatalErrorDebug(void *ctx, const char *msg, ...)
539: {
540: va_list args;
541:
542: va_start(args, msg);
543: fprintf(stdout, "SAX.fatalError: ");
544: vfprintf(stdout, msg, args);
545: va_end(args);
546: }
547:
548: xmlSAXHandler debugSAXHandlerStruct = {
549: internalSubsetDebug,
550: isStandaloneDebug,
551: hasInternalSubsetDebug,
552: hasExternalSubsetDebug,
553: resolveEntityDebug,
554: getEntityDebug,
555: entityDeclDebug,
556: notationDeclDebug,
557: attributeDeclDebug,
558: elementDeclDebug,
559: unparsedEntityDeclDebug,
560: setDocumentLocatorDebug,
561: startDocumentDebug,
562: endDocumentDebug,
563: startElementDebug,
564: endElementDebug,
565: referenceDebug,
566: charactersDebug,
567: ignorableWhitespaceDebug,
568: processingInstructionDebug,
569: commentDebug,
570: warningDebug,
571: errorDebug,
572: fatalErrorDebug,
573: getParameterEntityDebug,
574: };
575:
576: xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
1.1 daniel 577: /************************************************************************
578: * *
579: * Debug *
580: * *
581: ************************************************************************/
582:
1.7 daniel 583: void parseSAXFile(char *filename) {
584: htmlDocPtr doc;
585: /*
586: * Empty callbacks for checking
587: */
588: doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
589: if (doc != NULL) {
590: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
591: xmlFreeDoc(doc);
592: }
593:
594: if (!noout) {
595: /*
596: * Debug callback
597: */
598: doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
599: if (doc != NULL) {
600: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
601: xmlFreeDoc(doc);
602: }
603: }
604: }
605:
1.1 daniel 606: void parseAndPrintFile(char *filename) {
607: htmlDocPtr doc, tmp;
608:
609: /*
610: * build an HTML tree from a string;
611: */
612: doc = htmlParseFile(filename, NULL);
613:
614: /*
615: * test intermediate copy if needed.
616: */
617: if (copy) {
618: tmp = doc;
619: doc = xmlCopyDoc(doc, 1);
620: xmlFreeDoc(tmp);
621: }
622:
623: /*
624: * print it.
625: */
1.7 daniel 626: if (!noout) {
627: if (!debug)
628: htmlDocDump(stdout, doc);
629: else
630: xmlDebugDumpDocument(stdout, doc);
631: }
1.1 daniel 632:
633: /*
634: * free it.
635: */
636: xmlFreeDoc(doc);
637: }
638:
1.5 daniel 639: void parseAndPrintBuffer(xmlChar *buf) {
1.1 daniel 640: htmlDocPtr doc, tmp;
641:
642: /*
643: * build an HTML tree from a string;
644: */
645: doc = htmlParseDoc(buf, NULL);
646:
647: /*
648: * test intermediate copy if needed.
649: */
650: if (copy) {
651: tmp = doc;
652: doc = xmlCopyDoc(doc, 1);
653: xmlFreeDoc(tmp);
654: }
655:
656: /*
657: * print it.
658: */
659: if (!debug)
660: htmlDocDump(stdout, doc);
661: else
662: xmlDebugDumpDocument(stdout, doc);
663:
664: /*
665: * free it.
666: */
667: xmlFreeDoc(doc);
668: }
669:
670: int main(int argc, char **argv) {
1.7 daniel 671: int i, count;
1.1 daniel 672: int files = 0;
673:
674: for (i = 1; i < argc ; i++) {
675: if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
676: debug++;
677: else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
678: copy++;
1.7 daniel 679: else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
680: sax++;
681: else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
682: noout++;
683: else if ((!strcmp(argv[i], "-repeat")) ||
684: (!strcmp(argv[i], "--repeat")))
685: repeat++;
1.1 daniel 686: }
687: for (i = 1; i < argc ; i++) {
688: if (argv[i][0] != '-') {
1.7 daniel 689: if (repeat) {
690: for (count = 0;count < 100 * repeat;count++) {
691: if (sax)
692: parseSAXFile(argv[i]);
693: else
694: parseAndPrintFile(argv[i]);
695: }
696: } else {
697: if (sax)
698: parseSAXFile(argv[i]);
699: else
700: parseAndPrintFile(argv[i]);
701: }
1.1 daniel 702: files ++;
703: }
704: }
705: if (files == 0) {
1.7 daniel 706: printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
1.1 daniel 707: argv[0]);
708: printf("\tParse the HTML files and output the result of the parsing\n");
709: printf("\t--debug : dump a debug tree of the in-memory document\n");
710: printf("\t--copy : used to test the internal copy implementation\n");
1.7 daniel 711: printf("\t--sax : debug the sequence of SAX callbacks\n");
712: printf("\t--repeat : parse the file 100 times, for timing or profiling\n");
713: printf("\t--noout : do not print the result\n");
1.1 daniel 714: }
1.8 ! daniel 715: xmlCleanupParser();
1.6 daniel 716: xmlMemoryDump();
1.1 daniel 717:
718: return(0);
719: }
Webmaster