Annotation of XML/testHTML.c, revision 1.9
1.1 daniel 1: /*
2: * testHTML.c : a small tester program for HTML input.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #ifdef WIN32
1.9 ! daniel 10: #include "win32config.h"
1.1 daniel 11: #else
1.4 daniel 12: #include "config.h"
1.1 daniel 13: #endif
1.3 daniel 14:
15: #include <stdio.h>
16: #include <string.h>
1.7 daniel 17: #include <stdarg.h>
18:
1.3 daniel 19:
20: #ifdef HAVE_SYS_TYPES_H
1.1 daniel 21: #include <sys/types.h>
1.3 daniel 22: #endif
1.1 daniel 23: #ifdef HAVE_SYS_STAT_H
24: #include <sys/stat.h>
25: #endif
26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.3 daniel 32: #ifdef HAVE_STDLIB_H
1.1 daniel 33: #include <stdlib.h>
1.3 daniel 34: #endif
1.1 daniel 35:
1.6 daniel 36: #include "xmlmemory.h"
1.1 daniel 37: #include "HTMLparser.h"
38: #include "HTMLtree.h"
39: #include "debugXML.h"
40:
41: static int debug = 0;
42: static int copy = 0;
1.7 daniel 43: static int sax = 0;
44: static int repeat = 0;
45: static int noout = 0;
1.1 daniel 46:
47: /*
48: * Note: this is perfectly clean HTML, i.e. not a useful test.
1.5 daniel 49: static xmlChar buffer[] =
1.1 daniel 50: "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n\
51: \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n\
52: <html>\n\
53: <head>\n\
54: <title>This service is temporary down</title>\n\
55: </head>\n\
56: \n\
57: <body bgcolor=\"#FFFFFF\">\n\
58: <h1 align=\"center\">Sorry, this service is temporary down</h1>\n\
59: We are doing our best to get it back on-line,\n\
60: \n\
61: <p>The W3C system administrators</p>\n\
62: </body>\n\
63: </html>\n\
64: ";
1.2 daniel 65: */
1.1 daniel 66:
1.7 daniel 67: xmlSAXHandler emptySAXHandlerStruct = {
68: NULL, /* internalSubset */
69: NULL, /* isStandalone */
70: NULL, /* hasInternalSubset */
71: NULL, /* hasExternalSubset */
72: NULL, /* resolveEntity */
73: NULL, /* getEntity */
74: NULL, /* entityDecl */
75: NULL, /* notationDecl */
76: NULL, /* attributeDecl */
77: NULL, /* elementDecl */
78: NULL, /* unparsedEntityDecl */
79: NULL, /* setDocumentLocator */
80: NULL, /* startDocument */
81: NULL, /* endDocument */
82: NULL, /* startElement */
83: NULL, /* endElement */
84: NULL, /* reference */
85: NULL, /* characters */
86: NULL, /* ignorableWhitespace */
87: NULL, /* processingInstruction */
88: NULL, /* comment */
89: NULL, /* xmlParserWarning */
90: NULL, /* xmlParserError */
91: NULL, /* xmlParserError */
92: NULL, /* getParameterEntity */
93: };
94:
95: xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
96: extern xmlSAXHandlerPtr debugSAXHandler;
97:
98: /************************************************************************
99: * *
100: * Debug Handlers *
101: * *
102: ************************************************************************/
103:
104: /**
105: * isStandaloneDebug:
106: * @ctxt: An XML parser context
107: *
108: * Is this document tagged standalone ?
109: *
110: * Returns 1 if true
111: */
112: int
113: isStandaloneDebug(void *ctx)
114: {
115: fprintf(stdout, "SAX.isStandalone()\n");
116: return(0);
117: }
118:
119: /**
120: * hasInternalSubsetDebug:
121: * @ctxt: An XML parser context
122: *
123: * Does this document has an internal subset
124: *
125: * Returns 1 if true
126: */
127: int
128: hasInternalSubsetDebug(void *ctx)
129: {
130: fprintf(stdout, "SAX.hasInternalSubset()\n");
131: return(0);
132: }
133:
134: /**
135: * hasExternalSubsetDebug:
136: * @ctxt: An XML parser context
137: *
138: * Does this document has an external subset
139: *
140: * Returns 1 if true
141: */
142: int
143: hasExternalSubsetDebug(void *ctx)
144: {
145: fprintf(stdout, "SAX.hasExternalSubset()\n");
146: return(0);
147: }
148:
149: /**
150: * hasInternalSubsetDebug:
151: * @ctxt: An XML parser context
152: *
153: * Does this document has an internal subset
154: */
155: void
156: internalSubsetDebug(void *ctx, const xmlChar *name,
157: const xmlChar *ExternalID, const xmlChar *SystemID)
158: {
159: /* xmlDtdPtr externalSubset; */
160:
161: fprintf(stdout, "SAX.internalSubset(%s, %s, %s)\n",
162: name, ExternalID, SystemID);
163:
164: /***********
165: if ((ExternalID != NULL) || (SystemID != NULL)) {
166: externalSubset = xmlParseDTD(ExternalID, SystemID);
167: if (externalSubset != NULL) {
168: xmlFreeDtd(externalSubset);
169: }
170: }
171: ***********/
172: }
173:
174: /**
175: * resolveEntityDebug:
176: * @ctxt: An XML parser context
177: * @publicId: The public ID of the entity
178: * @systemId: The system ID of the entity
179: *
180: * Special entity resolver, better left to the parser, it has
181: * more context than the application layer.
182: * The default behaviour is to NOT resolve the entities, in that case
183: * the ENTITY_REF nodes are built in the structure (and the parameter
184: * values).
185: *
186: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
187: */
188: xmlParserInputPtr
189: resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
190: {
191: /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
192:
193:
194: fprintf(stdout, "SAX.resolveEntity(");
195: if (publicId != NULL)
196: fprintf(stdout, "%s", (char *)publicId);
197: else
198: fprintf(stdout, " ");
199: if (systemId != NULL)
200: fprintf(stdout, ", %s)\n", (char *)systemId);
201: else
202: fprintf(stdout, ", )\n");
203: /*********
204: if (systemId != NULL) {
205: return(xmlNewInputFromFile(ctxt, (char *) systemId));
206: }
207: *********/
208: return(NULL);
209: }
210:
211: /**
212: * getEntityDebug:
213: * @ctxt: An XML parser context
214: * @name: The entity name
215: *
216: * Get an entity by name
217: *
218: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
219: */
220: xmlEntityPtr
221: getEntityDebug(void *ctx, const xmlChar *name)
222: {
223: fprintf(stdout, "SAX.getEntity(%s)\n", name);
224: return(NULL);
225: }
226:
227: /**
228: * getParameterEntityDebug:
229: * @ctxt: An XML parser context
230: * @name: The entity name
231: *
232: * Get a parameter entity by name
233: *
234: * Returns the xmlParserInputPtr
235: */
236: xmlEntityPtr
237: getParameterEntityDebug(void *ctx, const xmlChar *name)
238: {
239: fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
240: return(NULL);
241: }
242:
243:
244: /**
245: * entityDeclDebug:
246: * @ctxt: An XML parser context
247: * @name: the entity name
248: * @type: the entity type
249: * @publicId: The public ID of the entity
250: * @systemId: The system ID of the entity
251: * @content: the entity value (without processing).
252: *
253: * An entity definition has been parsed
254: */
255: void
256: entityDeclDebug(void *ctx, const xmlChar *name, int type,
257: const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
258: {
259: fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
260: name, type, publicId, systemId, content);
261: }
262:
263: /**
264: * attributeDeclDebug:
265: * @ctxt: An XML parser context
266: * @name: the attribute name
267: * @type: the attribute type
268: *
269: * An attribute definition has been parsed
270: */
271: void
272: attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
273: int type, int def, const xmlChar *defaultValue,
274: xmlEnumerationPtr tree)
275: {
276: fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
277: elem, name, type, def, defaultValue);
278: }
279:
280: /**
281: * elementDeclDebug:
282: * @ctxt: An XML parser context
283: * @name: the element name
284: * @type: the element type
285: * @content: the element value (without processing).
286: *
287: * An element definition has been parsed
288: */
289: void
290: elementDeclDebug(void *ctx, const xmlChar *name, int type,
291: xmlElementContentPtr content)
292: {
293: fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
294: name, type);
295: }
296:
297: /**
298: * notationDeclDebug:
299: * @ctxt: An XML parser context
300: * @name: The name of the notation
301: * @publicId: The public ID of the entity
302: * @systemId: The system ID of the entity
303: *
304: * What to do when a notation declaration has been parsed.
305: */
306: void
307: notationDeclDebug(void *ctx, const xmlChar *name,
308: const xmlChar *publicId, const xmlChar *systemId)
309: {
310: fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
311: (char *) name, (char *) publicId, (char *) systemId);
312: }
313:
314: /**
315: * unparsedEntityDeclDebug:
316: * @ctxt: An XML parser context
317: * @name: The name of the entity
318: * @publicId: The public ID of the entity
319: * @systemId: The system ID of the entity
320: * @notationName: the name of the notation
321: *
322: * What to do when an unparsed entity declaration is parsed
323: */
324: void
325: unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
326: const xmlChar *publicId, const xmlChar *systemId,
327: const xmlChar *notationName)
328: {
329: fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
330: (char *) name, (char *) publicId, (char *) systemId,
331: (char *) notationName);
332: }
333:
334: /**
335: * setDocumentLocatorDebug:
336: * @ctxt: An XML parser context
337: * @loc: A SAX Locator
338: *
339: * Receive the document locator at startup, actually xmlDefaultSAXLocator
340: * Everything is available on the context, so this is useless in our case.
341: */
342: void
343: setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
344: {
345: fprintf(stdout, "SAX.setDocumentLocator()\n");
346: }
347:
348: /**
349: * startDocumentDebug:
350: * @ctxt: An XML parser context
351: *
352: * called when the document start being processed.
353: */
354: void
355: startDocumentDebug(void *ctx)
356: {
357: fprintf(stdout, "SAX.startDocument()\n");
358: }
359:
360: /**
361: * endDocumentDebug:
362: * @ctxt: An XML parser context
363: *
364: * called when the document end has been detected.
365: */
366: void
367: endDocumentDebug(void *ctx)
368: {
369: fprintf(stdout, "SAX.endDocument()\n");
370: }
371:
372: /**
373: * startElementDebug:
374: * @ctxt: An XML parser context
375: * @name: The element name
376: *
377: * called when an opening tag has been processed.
378: */
379: void
380: startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
381: {
382: int i;
383:
384: fprintf(stdout, "SAX.startElement(%s", (char *) name);
385: if (atts != NULL) {
386: for (i = 0;(atts[i] != NULL);i++) {
387: fprintf(stdout, ", %s='", atts[i++]);
388: fprintf(stdout, "%s'", atts[i]);
389: }
390: }
391: fprintf(stdout, ")\n");
392: }
393:
394: /**
395: * endElementDebug:
396: * @ctxt: An XML parser context
397: * @name: The element name
398: *
399: * called when the end of an element has been detected.
400: */
401: void
402: endElementDebug(void *ctx, const xmlChar *name)
403: {
404: fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
405: }
406:
407: /**
408: * charactersDebug:
409: * @ctxt: An XML parser context
410: * @ch: a xmlChar string
411: * @len: the number of xmlChar
412: *
413: * receiving some chars from the parser.
414: * Question: how much at a time ???
415: */
416: void
417: charactersDebug(void *ctx, const xmlChar *ch, int len)
418: {
419: int i;
420:
421: fprintf(stdout, "SAX.characters(");
422: for (i = 0;(i < len) && (i < 30);i++)
423: fprintf(stdout, "%c", ch[i]);
424: fprintf(stdout, ", %d)\n", len);
425: }
426:
427: /**
428: * referenceDebug:
429: * @ctxt: An XML parser context
430: * @name: The entity name
431: *
432: * called when an entity reference is detected.
433: */
434: void
435: referenceDebug(void *ctx, const xmlChar *name)
436: {
437: fprintf(stdout, "SAX.reference(%s)\n", name);
438: }
439:
440: /**
441: * ignorableWhitespaceDebug:
442: * @ctxt: An XML parser context
443: * @ch: a xmlChar string
444: * @start: the first char in the string
445: * @len: the number of xmlChar
446: *
447: * receiving some ignorable whitespaces from the parser.
448: * Question: how much at a time ???
449: */
450: void
451: ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
452: {
453: fprintf(stdout, "SAX.ignorableWhitespace(%.30s, %d)\n",
454: (char *) ch, len);
455: }
456:
457: /**
458: * processingInstructionDebug:
459: * @ctxt: An XML parser context
460: * @target: the target name
461: * @data: the PI data's
462: * @len: the number of xmlChar
463: *
464: * A processing instruction has been parsed.
465: */
466: void
467: processingInstructionDebug(void *ctx, const xmlChar *target,
468: const xmlChar *data)
469: {
470: fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
471: (char *) target, (char *) data);
472: }
473:
474: /**
475: * commentDebug:
476: * @ctxt: An XML parser context
477: * @value: the comment content
478: *
479: * A comment has been parsed.
480: */
481: void
482: commentDebug(void *ctx, const xmlChar *value)
483: {
484: fprintf(stdout, "SAX.comment(%s)\n", value);
485: }
486:
487: /**
488: * warningDebug:
489: * @ctxt: An XML parser context
490: * @msg: the message to display/transmit
491: * @...: extra parameters for the message display
492: *
493: * Display and format a warning messages, gives file, line, position and
494: * extra parameters.
495: */
496: void
497: warningDebug(void *ctx, const char *msg, ...)
498: {
499: va_list args;
500:
501: va_start(args, msg);
502: fprintf(stdout, "SAX.warning: ");
503: vfprintf(stdout, msg, args);
504: va_end(args);
505: }
506:
507: /**
508: * errorDebug:
509: * @ctxt: An XML parser context
510: * @msg: the message to display/transmit
511: * @...: extra parameters for the message display
512: *
513: * Display and format a error messages, gives file, line, position and
514: * extra parameters.
515: */
516: void
517: errorDebug(void *ctx, const char *msg, ...)
518: {
519: va_list args;
520:
521: va_start(args, msg);
522: fprintf(stdout, "SAX.error: ");
523: vfprintf(stdout, msg, args);
524: va_end(args);
525: }
526:
527: /**
528: * fatalErrorDebug:
529: * @ctxt: An XML parser context
530: * @msg: the message to display/transmit
531: * @...: extra parameters for the message display
532: *
533: * Display and format a fatalError messages, gives file, line, position and
534: * extra parameters.
535: */
536: void
537: fatalErrorDebug(void *ctx, const char *msg, ...)
538: {
539: va_list args;
540:
541: va_start(args, msg);
542: fprintf(stdout, "SAX.fatalError: ");
543: vfprintf(stdout, msg, args);
544: va_end(args);
545: }
546:
547: xmlSAXHandler debugSAXHandlerStruct = {
548: internalSubsetDebug,
549: isStandaloneDebug,
550: hasInternalSubsetDebug,
551: hasExternalSubsetDebug,
552: resolveEntityDebug,
553: getEntityDebug,
554: entityDeclDebug,
555: notationDeclDebug,
556: attributeDeclDebug,
557: elementDeclDebug,
558: unparsedEntityDeclDebug,
559: setDocumentLocatorDebug,
560: startDocumentDebug,
561: endDocumentDebug,
562: startElementDebug,
563: endElementDebug,
564: referenceDebug,
565: charactersDebug,
566: ignorableWhitespaceDebug,
567: processingInstructionDebug,
568: commentDebug,
569: warningDebug,
570: errorDebug,
571: fatalErrorDebug,
572: getParameterEntityDebug,
573: };
574:
575: xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
1.1 daniel 576: /************************************************************************
577: * *
578: * Debug *
579: * *
580: ************************************************************************/
581:
1.7 daniel 582: void parseSAXFile(char *filename) {
583: htmlDocPtr doc;
584: /*
585: * Empty callbacks for checking
586: */
587: doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
588: if (doc != NULL) {
589: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
590: xmlFreeDoc(doc);
591: }
592:
593: if (!noout) {
594: /*
595: * Debug callback
596: */
597: doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
598: if (doc != NULL) {
599: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
600: xmlFreeDoc(doc);
601: }
602: }
603: }
604:
1.1 daniel 605: void parseAndPrintFile(char *filename) {
606: htmlDocPtr doc, tmp;
607:
608: /*
609: * build an HTML tree from a string;
610: */
611: doc = htmlParseFile(filename, NULL);
612:
613: /*
614: * test intermediate copy if needed.
615: */
616: if (copy) {
617: tmp = doc;
618: doc = xmlCopyDoc(doc, 1);
619: xmlFreeDoc(tmp);
620: }
621:
622: /*
623: * print it.
624: */
1.7 daniel 625: if (!noout) {
626: if (!debug)
627: htmlDocDump(stdout, doc);
628: else
629: xmlDebugDumpDocument(stdout, doc);
630: }
1.1 daniel 631:
632: /*
633: * free it.
634: */
635: xmlFreeDoc(doc);
636: }
637:
1.5 daniel 638: void parseAndPrintBuffer(xmlChar *buf) {
1.1 daniel 639: htmlDocPtr doc, tmp;
640:
641: /*
642: * build an HTML tree from a string;
643: */
644: doc = htmlParseDoc(buf, NULL);
645:
646: /*
647: * test intermediate copy if needed.
648: */
649: if (copy) {
650: tmp = doc;
651: doc = xmlCopyDoc(doc, 1);
652: xmlFreeDoc(tmp);
653: }
654:
655: /*
656: * print it.
657: */
658: if (!debug)
659: htmlDocDump(stdout, doc);
660: else
661: xmlDebugDumpDocument(stdout, doc);
662:
663: /*
664: * free it.
665: */
666: xmlFreeDoc(doc);
667: }
668:
669: int main(int argc, char **argv) {
1.7 daniel 670: int i, count;
1.1 daniel 671: int files = 0;
672:
673: for (i = 1; i < argc ; i++) {
674: if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
675: debug++;
676: else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
677: copy++;
1.7 daniel 678: else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
679: sax++;
680: else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
681: noout++;
682: else if ((!strcmp(argv[i], "-repeat")) ||
683: (!strcmp(argv[i], "--repeat")))
684: repeat++;
1.1 daniel 685: }
686: for (i = 1; i < argc ; i++) {
687: if (argv[i][0] != '-') {
1.7 daniel 688: if (repeat) {
689: for (count = 0;count < 100 * repeat;count++) {
690: if (sax)
691: parseSAXFile(argv[i]);
692: else
693: parseAndPrintFile(argv[i]);
694: }
695: } else {
696: if (sax)
697: parseSAXFile(argv[i]);
698: else
699: parseAndPrintFile(argv[i]);
700: }
1.1 daniel 701: files ++;
702: }
703: }
704: if (files == 0) {
1.7 daniel 705: printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
1.1 daniel 706: argv[0]);
707: printf("\tParse the HTML files and output the result of the parsing\n");
708: printf("\t--debug : dump a debug tree of the in-memory document\n");
709: printf("\t--copy : used to test the internal copy implementation\n");
1.7 daniel 710: printf("\t--sax : debug the sequence of SAX callbacks\n");
711: printf("\t--repeat : parse the file 100 times, for timing or profiling\n");
712: printf("\t--noout : do not print the result\n");
1.1 daniel 713: }
1.8 daniel 714: xmlCleanupParser();
1.6 daniel 715: xmlMemoryDump();
1.1 daniel 716:
717: return(0);
718: }
Webmaster