Annotation of XML/HTMLtree.c, revision 1.16
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
1.13 daniel 10: #ifdef WIN32
11: #include "win32config.h"
12: #else
1.1 daniel 13: #include "config.h"
1.5 daniel 14: #endif
1.1 daniel 15: #include <stdio.h>
1.5 daniel 16: #include <string.h> /* for memset() only ! */
17:
18: #ifdef HAVE_CTYPE_H
1.1 daniel 19: #include <ctype.h>
1.5 daniel 20: #endif
21: #ifdef HAVE_STDLIB_H
1.1 daniel 22: #include <stdlib.h>
1.5 daniel 23: #endif
1.1 daniel 24:
1.4 daniel 25: #include "xmlmemory.h"
1.1 daniel 26: #include "HTMLparser.h"
27: #include "HTMLtree.h"
28: #include "entities.h"
29: #include "valid.h"
30:
1.14 daniel 31: static void
32: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
33:
1.1 daniel 34: /**
35: * htmlDtdDump:
36: * @buf: the HTML buffer output
37: * @doc: the document
38: *
39: * Dump the HTML document DTD, if any.
40: */
41: static void
42: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
43: xmlDtdPtr cur = doc->intSubset;
44:
45: if (cur == NULL) {
46: fprintf(stderr, "htmlDtdDump : no internal subset\n");
47: return;
48: }
49: xmlBufferWriteChar(buf, "<!DOCTYPE ");
50: xmlBufferWriteCHAR(buf, cur->name);
51: if (cur->ExternalID != NULL) {
52: xmlBufferWriteChar(buf, " PUBLIC ");
53: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 54: if (cur->SystemID != NULL) {
55: xmlBufferWriteChar(buf, " ");
56: xmlBufferWriteQuotedString(buf, cur->SystemID);
57: }
1.1 daniel 58: } else if (cur->SystemID != NULL) {
59: xmlBufferWriteChar(buf, " SYSTEM ");
60: xmlBufferWriteQuotedString(buf, cur->SystemID);
61: }
62: xmlBufferWriteChar(buf, ">\n");
63: }
64:
65: /**
66: * htmlAttrDump:
67: * @buf: the HTML buffer output
68: * @doc: the document
69: * @cur: the attribute pointer
70: *
71: * Dump an HTML attribute
72: */
73: static void
74: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 75: xmlChar *value;
1.1 daniel 76:
77: if (cur == NULL) {
78: fprintf(stderr, "htmlAttrDump : property == NULL\n");
79: return;
80: }
81: xmlBufferWriteChar(buf, " ");
82: xmlBufferWriteCHAR(buf, cur->name);
83: value = xmlNodeListGetString(doc, cur->val, 0);
84: if (value) {
85: xmlBufferWriteChar(buf, "=");
86: xmlBufferWriteQuotedString(buf, value);
1.4 daniel 87: xmlFree(value);
1.1 daniel 88: } else {
89: xmlBufferWriteChar(buf, "=\"\"");
90: }
91: }
92:
93: /**
94: * htmlAttrListDump:
95: * @buf: the HTML buffer output
96: * @doc: the document
97: * @cur: the first attribute pointer
98: *
99: * Dump a list of HTML attributes
100: */
101: static void
102: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
103: if (cur == NULL) {
104: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
105: return;
106: }
107: while (cur != NULL) {
108: htmlAttrDump(buf, doc, cur);
109: cur = cur->next;
110: }
111: }
112:
113:
1.14 daniel 114: void
1.1 daniel 115: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
116: /**
117: * htmlNodeListDump:
118: * @buf: the HTML buffer output
119: * @doc: the document
120: * @cur: the first node
121: *
122: * Dump an HTML node list, recursive behaviour,children are printed too.
123: */
124: static void
125: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
126: if (cur == NULL) {
127: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
128: return;
129: }
130: while (cur != NULL) {
131: htmlNodeDump(buf, doc, cur);
132: cur = cur->next;
133: }
134: }
135:
136: /**
137: * htmlNodeDump:
138: * @buf: the HTML buffer output
139: * @doc: the document
140: * @cur: the current node
141: *
142: * Dump an HTML node, recursive behaviour,children are printed too.
143: */
1.14 daniel 144: void
1.1 daniel 145: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
146: htmlElemDescPtr info;
147:
148: if (cur == NULL) {
149: fprintf(stderr, "htmlNodeDump : node == NULL\n");
150: return;
151: }
152: /*
153: * Special cases.
154: */
1.14 daniel 155: if (cur->type == XML_HTML_DOCUMENT_NODE) {
156: htmlDocContentDump(buf, (xmlDocPtr) cur);
157: return;
158: }
1.1 daniel 159: if (cur->type == HTML_TEXT_NODE) {
160: if (cur->content != NULL) {
1.6 daniel 161: xmlChar *buffer;
1.1 daniel 162:
163: /* uses the HTML encoding routine !!!!!!!!!! */
1.9 daniel 164: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 165: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 daniel 166: #else
167: buffer = xmlEncodeEntitiesReentrant(doc,
168: xmlBufferContent(cur->content));
169: #endif
1.1 daniel 170: if (buffer != NULL) {
171: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 172: xmlFree(buffer);
1.1 daniel 173: }
174: }
175: return;
176: }
177: if (cur->type == HTML_COMMENT_NODE) {
178: if (cur->content != NULL) {
179: xmlBufferWriteChar(buf, "<!--");
1.9 daniel 180: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 181: xmlBufferWriteCHAR(buf, cur->content);
1.9 daniel 182: #else
183: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
184: #endif
1.1 daniel 185: xmlBufferWriteChar(buf, "-->");
186: }
187: return;
188: }
189: if (cur->type == HTML_ENTITY_REF_NODE) {
190: xmlBufferWriteChar(buf, "&");
191: xmlBufferWriteCHAR(buf, cur->name);
192: xmlBufferWriteChar(buf, ";");
193: return;
194: }
195:
196: /*
197: * Get specific HTmL info for taht node.
198: */
199: info = htmlTagLookup(cur->name);
200:
201: xmlBufferWriteChar(buf, "<");
202: xmlBufferWriteCHAR(buf, cur->name);
203: if (cur->properties != NULL)
204: htmlAttrListDump(buf, doc, cur->properties);
205:
1.7 daniel 206: if ((info != NULL) && (info->empty)) {
1.1 daniel 207: xmlBufferWriteChar(buf, ">");
208: if (cur->next != NULL) {
209: if ((cur->next->type != HTML_TEXT_NODE) &&
210: (cur->next->type != HTML_ENTITY_REF_NODE))
211: xmlBufferWriteChar(buf, "\n");
212: }
213: return;
214: }
215: if ((cur->content == NULL) && (cur->childs == NULL)) {
1.7 daniel 216: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 217: xmlBufferWriteChar(buf, ">");
218: else {
219: xmlBufferWriteChar(buf, "></");
220: xmlBufferWriteCHAR(buf, cur->name);
221: xmlBufferWriteChar(buf, ">");
222: }
223: if (cur->next != NULL) {
224: if ((cur->next->type != HTML_TEXT_NODE) &&
225: (cur->next->type != HTML_ENTITY_REF_NODE))
226: xmlBufferWriteChar(buf, "\n");
227: }
228: return;
229: }
230: xmlBufferWriteChar(buf, ">");
231: if (cur->content != NULL) {
1.6 daniel 232: xmlChar *buffer;
1.1 daniel 233:
1.9 daniel 234: #ifndef XML_USE_BUFFER_CONTENT
235: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
236: #else
237: buffer = xmlEncodeEntitiesReentrant(doc,
238: xmlBufferContent(cur->content));
239: #endif
1.1 daniel 240: if (buffer != NULL) {
241: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 242: xmlFree(buffer);
1.1 daniel 243: }
244: }
245: if (cur->childs != NULL) {
246: if ((cur->childs->type != HTML_TEXT_NODE) &&
1.10 daniel 247: (cur->childs->type != HTML_ENTITY_REF_NODE) &&
248: (cur->childs != cur->last))
1.1 daniel 249: xmlBufferWriteChar(buf, "\n");
250: htmlNodeListDump(buf, doc, cur->childs);
251: if ((cur->last->type != HTML_TEXT_NODE) &&
1.10 daniel 252: (cur->last->type != HTML_ENTITY_REF_NODE) &&
253: (cur->childs != cur->last))
1.1 daniel 254: xmlBufferWriteChar(buf, "\n");
255: }
1.11 daniel 256: if (!htmlIsAutoClosed(doc, cur)) {
257: xmlBufferWriteChar(buf, "</");
258: xmlBufferWriteCHAR(buf, cur->name);
259: xmlBufferWriteChar(buf, ">");
260: }
1.1 daniel 261: if (cur->next != NULL) {
262: if ((cur->next->type != HTML_TEXT_NODE) &&
263: (cur->next->type != HTML_ENTITY_REF_NODE))
264: xmlBufferWriteChar(buf, "\n");
265: }
266: }
267:
268: /**
1.16 ! daniel 269: * htmlNodeDumpFile:
! 270: * @out: the FILE pointer
! 271: * @doc: the document
! 272: * @cur: the current node
! 273: *
! 274: * Dump an HTML node, recursive behaviour,children are printed too.
! 275: */
! 276: void
! 277: htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
! 278: xmlBufferPtr buf;
! 279:
! 280: buf = xmlBufferCreate();
! 281: if (buf == NULL) return;
! 282: htmlNodeDump(buf, doc, cur);
! 283: xmlBufferDump(out, buf);
! 284: xmlBufferFree(buf);
! 285: }
! 286:
! 287: /**
1.1 daniel 288: * htmlDocContentDump:
289: * @buf: the HTML buffer output
290: * @cur: the document
291: *
292: * Dump an HTML document.
293: */
294: static void
295: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12 daniel 296: int type;
297:
298: /*
299: * force to output the stuff as HTML, especially for entities
300: */
301: type = cur->type;
302: cur->type = XML_HTML_DOCUMENT_NODE;
1.1 daniel 303: if (cur->intSubset != NULL)
304: htmlDtdDump(buf, cur);
1.11 daniel 305: else {
306: /* Default to HTML-4.0 transitionnal @@@@ */
307: xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
308:
309: }
1.1 daniel 310: if (cur->root != NULL) {
1.8 daniel 311: htmlNodeListDump(buf, cur, cur->root);
1.1 daniel 312: }
313: xmlBufferWriteChar(buf, "\n");
1.12 daniel 314: cur->type = type;
1.1 daniel 315: }
316:
317: /**
318: * htmlDocDumpMemory:
319: * @cur: the document
320: * @mem: OUT: the memory pointer
321: * @size: OUT: the memory lenght
322: *
1.6 daniel 323: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 324: * It's up to the caller to free the memory.
325: */
326: void
1.6 daniel 327: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 328: xmlBufferPtr buf;
329:
330: if (cur == NULL) {
331: #ifdef DEBUG_TREE
1.15 daniel 332: fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
1.1 daniel 333: #endif
334: *mem = NULL;
335: *size = 0;
336: return;
337: }
338: buf = xmlBufferCreate();
339: if (buf == NULL) {
340: *mem = NULL;
341: *size = 0;
342: return;
343: }
344: htmlDocContentDump(buf, cur);
345: *mem = buf->content;
346: *size = buf->use;
347: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 348: xmlFree(buf);
1.1 daniel 349: }
350:
351:
352: /**
353: * htmlDocDump:
354: * @f: the FILE*
355: * @cur: the document
356: *
357: * Dump an HTML document to an open FILE.
358: */
359: void
360: htmlDocDump(FILE *f, xmlDocPtr cur) {
361: xmlBufferPtr buf;
362:
363: if (cur == NULL) {
364: #ifdef DEBUG_TREE
1.15 daniel 365: fprintf(stderr, "htmlDocDump : document == NULL\n");
1.1 daniel 366: #endif
367: return;
368: }
369: buf = xmlBufferCreate();
370: if (buf == NULL) return;
371: htmlDocContentDump(buf, cur);
372: xmlBufferDump(f, buf);
373: xmlBufferFree(buf);
374: }
375:
376: /**
377: * htmlSaveFile:
378: * @filename: the filename
379: * @cur: the document
380: *
381: * Dump an HTML document to a file.
382: *
383: * returns: the number of byte written or -1 in case of failure.
384: */
385: int
386: htmlSaveFile(const char *filename, xmlDocPtr cur) {
387: xmlBufferPtr buf;
388: FILE *output = NULL;
389: int ret;
390:
391: /*
392: * save the content to a temp buffer.
393: */
394: buf = xmlBufferCreate();
395: if (buf == NULL) return(0);
396: htmlDocContentDump(buf, cur);
397:
398: output = fopen(filename, "w");
399: if (output == NULL) return(-1);
400: ret = xmlBufferDump(output, buf);
401: fclose(output);
402:
403: xmlBufferFree(buf);
1.6 daniel 404: return(ret * sizeof(xmlChar));
1.1 daniel 405: }
406:
Webmaster