Annotation of XML/testHTML.c, revision 1.11
1.1 daniel 1: /*
2: * testHTML.c : a small tester program for HTML input.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #ifdef WIN32
1.9 daniel 10: #include "win32config.h"
1.1 daniel 11: #else
1.4 daniel 12: #include "config.h"
1.1 daniel 13: #endif
1.3 daniel 14:
15: #include <stdio.h>
16: #include <string.h>
1.7 daniel 17: #include <stdarg.h>
18:
1.3 daniel 19:
20: #ifdef HAVE_SYS_TYPES_H
1.1 daniel 21: #include <sys/types.h>
1.3 daniel 22: #endif
1.1 daniel 23: #ifdef HAVE_SYS_STAT_H
24: #include <sys/stat.h>
25: #endif
26: #ifdef HAVE_FCNTL_H
27: #include <fcntl.h>
28: #endif
29: #ifdef HAVE_UNISTD_H
30: #include <unistd.h>
31: #endif
1.3 daniel 32: #ifdef HAVE_STDLIB_H
1.1 daniel 33: #include <stdlib.h>
1.3 daniel 34: #endif
1.1 daniel 35:
1.6 daniel 36: #include "xmlmemory.h"
1.1 daniel 37: #include "HTMLparser.h"
38: #include "HTMLtree.h"
39: #include "debugXML.h"
40:
41: static int debug = 0;
42: static int copy = 0;
1.7 daniel 43: static int sax = 0;
44: static int repeat = 0;
45: static int noout = 0;
1.10 daniel 46: static int push = 0;
1.1 daniel 47:
1.7 daniel 48: xmlSAXHandler emptySAXHandlerStruct = {
49: NULL, /* internalSubset */
50: NULL, /* isStandalone */
51: NULL, /* hasInternalSubset */
52: NULL, /* hasExternalSubset */
53: NULL, /* resolveEntity */
54: NULL, /* getEntity */
55: NULL, /* entityDecl */
56: NULL, /* notationDecl */
57: NULL, /* attributeDecl */
58: NULL, /* elementDecl */
59: NULL, /* unparsedEntityDecl */
60: NULL, /* setDocumentLocator */
61: NULL, /* startDocument */
62: NULL, /* endDocument */
63: NULL, /* startElement */
64: NULL, /* endElement */
65: NULL, /* reference */
66: NULL, /* characters */
67: NULL, /* ignorableWhitespace */
68: NULL, /* processingInstruction */
69: NULL, /* comment */
70: NULL, /* xmlParserWarning */
71: NULL, /* xmlParserError */
72: NULL, /* xmlParserError */
73: NULL, /* getParameterEntity */
74: };
75:
76: xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
77: extern xmlSAXHandlerPtr debugSAXHandler;
78:
79: /************************************************************************
80: * *
81: * Debug Handlers *
82: * *
83: ************************************************************************/
84:
85: /**
86: * isStandaloneDebug:
87: * @ctxt: An XML parser context
88: *
89: * Is this document tagged standalone ?
90: *
91: * Returns 1 if true
92: */
93: int
94: isStandaloneDebug(void *ctx)
95: {
96: fprintf(stdout, "SAX.isStandalone()\n");
97: return(0);
98: }
99:
100: /**
101: * hasInternalSubsetDebug:
102: * @ctxt: An XML parser context
103: *
104: * Does this document has an internal subset
105: *
106: * Returns 1 if true
107: */
108: int
109: hasInternalSubsetDebug(void *ctx)
110: {
111: fprintf(stdout, "SAX.hasInternalSubset()\n");
112: return(0);
113: }
114:
115: /**
116: * hasExternalSubsetDebug:
117: * @ctxt: An XML parser context
118: *
119: * Does this document has an external subset
120: *
121: * Returns 1 if true
122: */
123: int
124: hasExternalSubsetDebug(void *ctx)
125: {
126: fprintf(stdout, "SAX.hasExternalSubset()\n");
127: return(0);
128: }
129:
130: /**
131: * hasInternalSubsetDebug:
132: * @ctxt: An XML parser context
133: *
134: * Does this document has an internal subset
135: */
136: void
137: internalSubsetDebug(void *ctx, const xmlChar *name,
138: const xmlChar *ExternalID, const xmlChar *SystemID)
139: {
140: /* xmlDtdPtr externalSubset; */
141:
142: fprintf(stdout, "SAX.internalSubset(%s, %s, %s)\n",
143: name, ExternalID, SystemID);
144:
145: /***********
146: if ((ExternalID != NULL) || (SystemID != NULL)) {
147: externalSubset = xmlParseDTD(ExternalID, SystemID);
148: if (externalSubset != NULL) {
149: xmlFreeDtd(externalSubset);
150: }
151: }
152: ***********/
153: }
154:
155: /**
156: * resolveEntityDebug:
157: * @ctxt: An XML parser context
158: * @publicId: The public ID of the entity
159: * @systemId: The system ID of the entity
160: *
161: * Special entity resolver, better left to the parser, it has
162: * more context than the application layer.
163: * The default behaviour is to NOT resolve the entities, in that case
164: * the ENTITY_REF nodes are built in the structure (and the parameter
165: * values).
166: *
167: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
168: */
169: xmlParserInputPtr
170: resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId)
171: {
172: /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */
173:
174:
175: fprintf(stdout, "SAX.resolveEntity(");
176: if (publicId != NULL)
177: fprintf(stdout, "%s", (char *)publicId);
178: else
179: fprintf(stdout, " ");
180: if (systemId != NULL)
181: fprintf(stdout, ", %s)\n", (char *)systemId);
182: else
183: fprintf(stdout, ", )\n");
184: /*********
185: if (systemId != NULL) {
186: return(xmlNewInputFromFile(ctxt, (char *) systemId));
187: }
188: *********/
189: return(NULL);
190: }
191:
192: /**
193: * getEntityDebug:
194: * @ctxt: An XML parser context
195: * @name: The entity name
196: *
197: * Get an entity by name
198: *
199: * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
200: */
201: xmlEntityPtr
202: getEntityDebug(void *ctx, const xmlChar *name)
203: {
204: fprintf(stdout, "SAX.getEntity(%s)\n", name);
205: return(NULL);
206: }
207:
208: /**
209: * getParameterEntityDebug:
210: * @ctxt: An XML parser context
211: * @name: The entity name
212: *
213: * Get a parameter entity by name
214: *
215: * Returns the xmlParserInputPtr
216: */
217: xmlEntityPtr
218: getParameterEntityDebug(void *ctx, const xmlChar *name)
219: {
220: fprintf(stdout, "SAX.getParameterEntity(%s)\n", name);
221: return(NULL);
222: }
223:
224:
225: /**
226: * entityDeclDebug:
227: * @ctxt: An XML parser context
228: * @name: the entity name
229: * @type: the entity type
230: * @publicId: The public ID of the entity
231: * @systemId: The system ID of the entity
232: * @content: the entity value (without processing).
233: *
234: * An entity definition has been parsed
235: */
236: void
237: entityDeclDebug(void *ctx, const xmlChar *name, int type,
238: const xmlChar *publicId, const xmlChar *systemId, xmlChar *content)
239: {
240: fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
241: name, type, publicId, systemId, content);
242: }
243:
244: /**
245: * attributeDeclDebug:
246: * @ctxt: An XML parser context
247: * @name: the attribute name
248: * @type: the attribute type
249: *
250: * An attribute definition has been parsed
251: */
252: void
253: attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name,
254: int type, int def, const xmlChar *defaultValue,
255: xmlEnumerationPtr tree)
256: {
257: fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
258: elem, name, type, def, defaultValue);
259: }
260:
261: /**
262: * elementDeclDebug:
263: * @ctxt: An XML parser context
264: * @name: the element name
265: * @type: the element type
266: * @content: the element value (without processing).
267: *
268: * An element definition has been parsed
269: */
270: void
271: elementDeclDebug(void *ctx, const xmlChar *name, int type,
272: xmlElementContentPtr content)
273: {
274: fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n",
275: name, type);
276: }
277:
278: /**
279: * notationDeclDebug:
280: * @ctxt: An XML parser context
281: * @name: The name of the notation
282: * @publicId: The public ID of the entity
283: * @systemId: The system ID of the entity
284: *
285: * What to do when a notation declaration has been parsed.
286: */
287: void
288: notationDeclDebug(void *ctx, const xmlChar *name,
289: const xmlChar *publicId, const xmlChar *systemId)
290: {
291: fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n",
292: (char *) name, (char *) publicId, (char *) systemId);
293: }
294:
295: /**
296: * unparsedEntityDeclDebug:
297: * @ctxt: An XML parser context
298: * @name: The name of the entity
299: * @publicId: The public ID of the entity
300: * @systemId: The system ID of the entity
301: * @notationName: the name of the notation
302: *
303: * What to do when an unparsed entity declaration is parsed
304: */
305: void
306: unparsedEntityDeclDebug(void *ctx, const xmlChar *name,
307: const xmlChar *publicId, const xmlChar *systemId,
308: const xmlChar *notationName)
309: {
310: fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n",
311: (char *) name, (char *) publicId, (char *) systemId,
312: (char *) notationName);
313: }
314:
315: /**
316: * setDocumentLocatorDebug:
317: * @ctxt: An XML parser context
318: * @loc: A SAX Locator
319: *
320: * Receive the document locator at startup, actually xmlDefaultSAXLocator
321: * Everything is available on the context, so this is useless in our case.
322: */
323: void
324: setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc)
325: {
326: fprintf(stdout, "SAX.setDocumentLocator()\n");
327: }
328:
329: /**
330: * startDocumentDebug:
331: * @ctxt: An XML parser context
332: *
333: * called when the document start being processed.
334: */
335: void
336: startDocumentDebug(void *ctx)
337: {
338: fprintf(stdout, "SAX.startDocument()\n");
339: }
340:
341: /**
342: * endDocumentDebug:
343: * @ctxt: An XML parser context
344: *
345: * called when the document end has been detected.
346: */
347: void
348: endDocumentDebug(void *ctx)
349: {
350: fprintf(stdout, "SAX.endDocument()\n");
351: }
352:
353: /**
354: * startElementDebug:
355: * @ctxt: An XML parser context
356: * @name: The element name
357: *
358: * called when an opening tag has been processed.
359: */
360: void
361: startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts)
362: {
363: int i;
364:
365: fprintf(stdout, "SAX.startElement(%s", (char *) name);
366: if (atts != NULL) {
367: for (i = 0;(atts[i] != NULL);i++) {
368: fprintf(stdout, ", %s='", atts[i++]);
369: fprintf(stdout, "%s'", atts[i]);
370: }
371: }
372: fprintf(stdout, ")\n");
373: }
374:
375: /**
376: * endElementDebug:
377: * @ctxt: An XML parser context
378: * @name: The element name
379: *
380: * called when the end of an element has been detected.
381: */
382: void
383: endElementDebug(void *ctx, const xmlChar *name)
384: {
385: fprintf(stdout, "SAX.endElement(%s)\n", (char *) name);
386: }
387:
388: /**
389: * charactersDebug:
390: * @ctxt: An XML parser context
391: * @ch: a xmlChar string
392: * @len: the number of xmlChar
393: *
394: * receiving some chars from the parser.
395: * Question: how much at a time ???
396: */
397: void
398: charactersDebug(void *ctx, const xmlChar *ch, int len)
399: {
400: int i;
401:
402: fprintf(stdout, "SAX.characters(");
403: for (i = 0;(i < len) && (i < 30);i++)
404: fprintf(stdout, "%c", ch[i]);
405: fprintf(stdout, ", %d)\n", len);
406: }
407:
408: /**
409: * referenceDebug:
410: * @ctxt: An XML parser context
411: * @name: The entity name
412: *
413: * called when an entity reference is detected.
414: */
415: void
416: referenceDebug(void *ctx, const xmlChar *name)
417: {
418: fprintf(stdout, "SAX.reference(%s)\n", name);
419: }
420:
421: /**
422: * ignorableWhitespaceDebug:
423: * @ctxt: An XML parser context
424: * @ch: a xmlChar string
425: * @start: the first char in the string
426: * @len: the number of xmlChar
427: *
428: * receiving some ignorable whitespaces from the parser.
429: * Question: how much at a time ???
430: */
431: void
432: ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len)
433: {
434: fprintf(stdout, "SAX.ignorableWhitespace(%.30s, %d)\n",
435: (char *) ch, len);
436: }
437:
438: /**
439: * processingInstructionDebug:
440: * @ctxt: An XML parser context
441: * @target: the target name
442: * @data: the PI data's
443: * @len: the number of xmlChar
444: *
445: * A processing instruction has been parsed.
446: */
447: void
448: processingInstructionDebug(void *ctx, const xmlChar *target,
449: const xmlChar *data)
450: {
451: fprintf(stdout, "SAX.processingInstruction(%s, %s)\n",
452: (char *) target, (char *) data);
453: }
454:
455: /**
456: * commentDebug:
457: * @ctxt: An XML parser context
458: * @value: the comment content
459: *
460: * A comment has been parsed.
461: */
462: void
463: commentDebug(void *ctx, const xmlChar *value)
464: {
465: fprintf(stdout, "SAX.comment(%s)\n", value);
466: }
467:
468: /**
469: * warningDebug:
470: * @ctxt: An XML parser context
471: * @msg: the message to display/transmit
472: * @...: extra parameters for the message display
473: *
474: * Display and format a warning messages, gives file, line, position and
475: * extra parameters.
476: */
477: void
478: warningDebug(void *ctx, const char *msg, ...)
479: {
480: va_list args;
481:
482: va_start(args, msg);
483: fprintf(stdout, "SAX.warning: ");
484: vfprintf(stdout, msg, args);
485: va_end(args);
486: }
487:
488: /**
489: * errorDebug:
490: * @ctxt: An XML parser context
491: * @msg: the message to display/transmit
492: * @...: extra parameters for the message display
493: *
494: * Display and format a error messages, gives file, line, position and
495: * extra parameters.
496: */
497: void
498: errorDebug(void *ctx, const char *msg, ...)
499: {
500: va_list args;
501:
502: va_start(args, msg);
503: fprintf(stdout, "SAX.error: ");
504: vfprintf(stdout, msg, args);
505: va_end(args);
506: }
507:
508: /**
509: * fatalErrorDebug:
510: * @ctxt: An XML parser context
511: * @msg: the message to display/transmit
512: * @...: extra parameters for the message display
513: *
514: * Display and format a fatalError messages, gives file, line, position and
515: * extra parameters.
516: */
517: void
518: fatalErrorDebug(void *ctx, const char *msg, ...)
519: {
520: va_list args;
521:
522: va_start(args, msg);
523: fprintf(stdout, "SAX.fatalError: ");
524: vfprintf(stdout, msg, args);
525: va_end(args);
526: }
527:
528: xmlSAXHandler debugSAXHandlerStruct = {
529: internalSubsetDebug,
530: isStandaloneDebug,
531: hasInternalSubsetDebug,
532: hasExternalSubsetDebug,
533: resolveEntityDebug,
534: getEntityDebug,
535: entityDeclDebug,
536: notationDeclDebug,
537: attributeDeclDebug,
538: elementDeclDebug,
539: unparsedEntityDeclDebug,
540: setDocumentLocatorDebug,
541: startDocumentDebug,
542: endDocumentDebug,
543: startElementDebug,
544: endElementDebug,
545: referenceDebug,
546: charactersDebug,
547: ignorableWhitespaceDebug,
548: processingInstructionDebug,
549: commentDebug,
550: warningDebug,
551: errorDebug,
552: fatalErrorDebug,
553: getParameterEntityDebug,
554: };
555:
556: xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
1.1 daniel 557: /************************************************************************
558: * *
559: * Debug *
560: * *
561: ************************************************************************/
562:
1.7 daniel 563: void parseSAXFile(char *filename) {
564: htmlDocPtr doc;
565: /*
566: * Empty callbacks for checking
567: */
568: doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL);
569: if (doc != NULL) {
570: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
571: xmlFreeDoc(doc);
572: }
573:
574: if (!noout) {
575: /*
576: * Debug callback
577: */
578: doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL);
579: if (doc != NULL) {
580: fprintf(stdout, "htmlSAXParseFile returned non-NULL\n");
581: xmlFreeDoc(doc);
582: }
583: }
584: }
585:
1.1 daniel 586: void parseAndPrintFile(char *filename) {
1.11 ! daniel 587: htmlDocPtr doc = NULL, tmp;
1.1 daniel 588:
589: /*
590: * build an HTML tree from a string;
591: */
1.10 daniel 592: if (push) {
593: FILE *f;
594:
595: f = fopen(filename, "r");
596: if (f != NULL) {
597: int res, size = 3;
598: char chars[1024];
599: htmlParserCtxtPtr ctxt;
600:
601: if (repeat)
602: size = 1024;
603: res = fread(chars, 1, 4, f);
604: if (res > 0) {
605: ctxt = htmlCreatePushParserCtxt(NULL, NULL,
606: chars, res, filename, 0);
607: while ((res = fread(chars, 1, size, f)) > 0) {
608: htmlParseChunk(ctxt, chars, res, 0);
609: }
610: htmlParseChunk(ctxt, chars, 0, 1);
611: doc = ctxt->myDoc;
612: htmlFreeParserCtxt(ctxt);
613: }
614: }
615: } else {
616: doc = htmlParseFile(filename, NULL);
617: }
618: if (doc == NULL) {
619: fprintf(stderr, "Could not parse %s\n", filename);
620: }
1.1 daniel 621:
622: /*
623: * test intermediate copy if needed.
624: */
625: if (copy) {
626: tmp = doc;
627: doc = xmlCopyDoc(doc, 1);
628: xmlFreeDoc(tmp);
629: }
630:
631: /*
632: * print it.
633: */
1.7 daniel 634: if (!noout) {
635: if (!debug)
636: htmlDocDump(stdout, doc);
637: else
638: xmlDebugDumpDocument(stdout, doc);
639: }
1.1 daniel 640:
641: /*
642: * free it.
643: */
644: xmlFreeDoc(doc);
645: }
646:
647: int main(int argc, char **argv) {
1.7 daniel 648: int i, count;
1.1 daniel 649: int files = 0;
650:
651: for (i = 1; i < argc ; i++) {
652: if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug")))
653: debug++;
654: else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy")))
655: copy++;
1.10 daniel 656: else if ((!strcmp(argv[i], "-push")) || (!strcmp(argv[i], "--push")))
657: push++;
1.7 daniel 658: else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax")))
659: sax++;
660: else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout")))
661: noout++;
662: else if ((!strcmp(argv[i], "-repeat")) ||
663: (!strcmp(argv[i], "--repeat")))
664: repeat++;
1.1 daniel 665: }
666: for (i = 1; i < argc ; i++) {
667: if (argv[i][0] != '-') {
1.7 daniel 668: if (repeat) {
669: for (count = 0;count < 100 * repeat;count++) {
670: if (sax)
671: parseSAXFile(argv[i]);
672: else
673: parseAndPrintFile(argv[i]);
674: }
675: } else {
676: if (sax)
677: parseSAXFile(argv[i]);
678: else
679: parseAndPrintFile(argv[i]);
680: }
1.1 daniel 681: files ++;
682: }
683: }
684: if (files == 0) {
1.7 daniel 685: printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n",
1.1 daniel 686: argv[0]);
687: printf("\tParse the HTML files and output the result of the parsing\n");
688: printf("\t--debug : dump a debug tree of the in-memory document\n");
689: printf("\t--copy : used to test the internal copy implementation\n");
1.7 daniel 690: printf("\t--sax : debug the sequence of SAX callbacks\n");
1.10 daniel 691: printf("\t--repeat : parse the file 100 times, for timing\n");
1.7 daniel 692: printf("\t--noout : do not print the result\n");
1.10 daniel 693: printf("\t--push : use the push mode parser\n");
1.1 daniel 694: }
1.8 daniel 695: xmlCleanupParser();
1.6 daniel 696: xmlMemoryDump();
1.1 daniel 697:
698: return(0);
699: }
Webmaster