Annotation of XML/HTMLtree.c, revision 1.20
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
1.13 daniel 10: #ifdef WIN32
11: #include "win32config.h"
12: #else
1.1 daniel 13: #include "config.h"
1.5 daniel 14: #endif
1.18 daniel 15:
16: #include "xmlversion.h"
17: #ifdef LIBXML_HTML_ENABLED
18:
1.1 daniel 19: #include <stdio.h>
1.5 daniel 20: #include <string.h> /* for memset() only ! */
21:
22: #ifdef HAVE_CTYPE_H
1.1 daniel 23: #include <ctype.h>
1.5 daniel 24: #endif
25: #ifdef HAVE_STDLIB_H
1.1 daniel 26: #include <stdlib.h>
1.5 daniel 27: #endif
1.1 daniel 28:
1.18 daniel 29: #include <libxml/xmlmemory.h>
30: #include <libxml/HTMLparser.h>
31: #include <libxml/HTMLtree.h>
32: #include <libxml/entities.h>
33: #include <libxml/valid.h>
1.1 daniel 34:
1.14 daniel 35: static void
36: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
37:
1.1 daniel 38: /**
39: * htmlDtdDump:
40: * @buf: the HTML buffer output
41: * @doc: the document
42: *
43: * Dump the HTML document DTD, if any.
44: */
45: static void
46: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
47: xmlDtdPtr cur = doc->intSubset;
48:
49: if (cur == NULL) {
50: fprintf(stderr, "htmlDtdDump : no internal subset\n");
51: return;
52: }
53: xmlBufferWriteChar(buf, "<!DOCTYPE ");
54: xmlBufferWriteCHAR(buf, cur->name);
55: if (cur->ExternalID != NULL) {
56: xmlBufferWriteChar(buf, " PUBLIC ");
57: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 58: if (cur->SystemID != NULL) {
59: xmlBufferWriteChar(buf, " ");
60: xmlBufferWriteQuotedString(buf, cur->SystemID);
61: }
1.1 daniel 62: } else if (cur->SystemID != NULL) {
63: xmlBufferWriteChar(buf, " SYSTEM ");
64: xmlBufferWriteQuotedString(buf, cur->SystemID);
65: }
66: xmlBufferWriteChar(buf, ">\n");
67: }
68:
69: /**
70: * htmlAttrDump:
71: * @buf: the HTML buffer output
72: * @doc: the document
73: * @cur: the attribute pointer
74: *
75: * Dump an HTML attribute
76: */
77: static void
78: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 79: xmlChar *value;
1.1 daniel 80:
81: if (cur == NULL) {
82: fprintf(stderr, "htmlAttrDump : property == NULL\n");
83: return;
84: }
85: xmlBufferWriteChar(buf, " ");
86: xmlBufferWriteCHAR(buf, cur->name);
1.19 daniel 87: if (cur->children != NULL) {
88: value = xmlNodeListGetString(doc, cur->children, 0);
89: if (value) {
90: xmlBufferWriteChar(buf, "=");
91: xmlBufferWriteQuotedString(buf, value);
92: xmlFree(value);
93: } else {
94: xmlBufferWriteChar(buf, "=\"\"");
95: }
1.1 daniel 96: }
97: }
98:
99: /**
100: * htmlAttrListDump:
101: * @buf: the HTML buffer output
102: * @doc: the document
103: * @cur: the first attribute pointer
104: *
105: * Dump a list of HTML attributes
106: */
107: static void
108: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
109: if (cur == NULL) {
110: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
111: return;
112: }
113: while (cur != NULL) {
114: htmlAttrDump(buf, doc, cur);
115: cur = cur->next;
116: }
117: }
118:
119:
1.14 daniel 120: void
1.1 daniel 121: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
122: /**
123: * htmlNodeListDump:
124: * @buf: the HTML buffer output
125: * @doc: the document
126: * @cur: the first node
127: *
128: * Dump an HTML node list, recursive behaviour,children are printed too.
129: */
130: static void
131: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
132: if (cur == NULL) {
133: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
134: return;
135: }
136: while (cur != NULL) {
137: htmlNodeDump(buf, doc, cur);
138: cur = cur->next;
139: }
140: }
141:
142: /**
143: * htmlNodeDump:
144: * @buf: the HTML buffer output
145: * @doc: the document
146: * @cur: the current node
147: *
148: * Dump an HTML node, recursive behaviour,children are printed too.
149: */
1.14 daniel 150: void
1.1 daniel 151: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
152: htmlElemDescPtr info;
153:
154: if (cur == NULL) {
155: fprintf(stderr, "htmlNodeDump : node == NULL\n");
156: return;
157: }
158: /*
159: * Special cases.
160: */
1.20 ! daniel 161: if (cur->type == XML_DTD_NODE)
! 162: return;
1.14 daniel 163: if (cur->type == XML_HTML_DOCUMENT_NODE) {
164: htmlDocContentDump(buf, (xmlDocPtr) cur);
165: return;
166: }
1.1 daniel 167: if (cur->type == HTML_TEXT_NODE) {
168: if (cur->content != NULL) {
1.6 daniel 169: xmlChar *buffer;
1.1 daniel 170:
171: /* uses the HTML encoding routine !!!!!!!!!! */
1.9 daniel 172: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 173: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 daniel 174: #else
175: buffer = xmlEncodeEntitiesReentrant(doc,
176: xmlBufferContent(cur->content));
177: #endif
1.1 daniel 178: if (buffer != NULL) {
179: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 180: xmlFree(buffer);
1.1 daniel 181: }
182: }
183: return;
184: }
185: if (cur->type == HTML_COMMENT_NODE) {
186: if (cur->content != NULL) {
187: xmlBufferWriteChar(buf, "<!--");
1.9 daniel 188: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 189: xmlBufferWriteCHAR(buf, cur->content);
1.9 daniel 190: #else
191: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
192: #endif
1.1 daniel 193: xmlBufferWriteChar(buf, "-->");
194: }
195: return;
196: }
197: if (cur->type == HTML_ENTITY_REF_NODE) {
198: xmlBufferWriteChar(buf, "&");
199: xmlBufferWriteCHAR(buf, cur->name);
200: xmlBufferWriteChar(buf, ";");
201: return;
202: }
203:
204: /*
205: * Get specific HTmL info for taht node.
206: */
207: info = htmlTagLookup(cur->name);
208:
209: xmlBufferWriteChar(buf, "<");
210: xmlBufferWriteCHAR(buf, cur->name);
211: if (cur->properties != NULL)
212: htmlAttrListDump(buf, doc, cur->properties);
213:
1.7 daniel 214: if ((info != NULL) && (info->empty)) {
1.1 daniel 215: xmlBufferWriteChar(buf, ">");
216: if (cur->next != NULL) {
217: if ((cur->next->type != HTML_TEXT_NODE) &&
218: (cur->next->type != HTML_ENTITY_REF_NODE))
219: xmlBufferWriteChar(buf, "\n");
220: }
221: return;
222: }
1.17 daniel 223: if ((cur->content == NULL) && (cur->children == NULL)) {
1.7 daniel 224: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 225: xmlBufferWriteChar(buf, ">");
226: else {
227: xmlBufferWriteChar(buf, "></");
228: xmlBufferWriteCHAR(buf, cur->name);
229: xmlBufferWriteChar(buf, ">");
230: }
231: if (cur->next != NULL) {
232: if ((cur->next->type != HTML_TEXT_NODE) &&
233: (cur->next->type != HTML_ENTITY_REF_NODE))
234: xmlBufferWriteChar(buf, "\n");
235: }
236: return;
237: }
238: xmlBufferWriteChar(buf, ">");
239: if (cur->content != NULL) {
1.6 daniel 240: xmlChar *buffer;
1.1 daniel 241:
1.9 daniel 242: #ifndef XML_USE_BUFFER_CONTENT
243: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
244: #else
245: buffer = xmlEncodeEntitiesReentrant(doc,
246: xmlBufferContent(cur->content));
247: #endif
1.1 daniel 248: if (buffer != NULL) {
249: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 250: xmlFree(buffer);
1.1 daniel 251: }
252: }
1.17 daniel 253: if (cur->children != NULL) {
254: if ((cur->children->type != HTML_TEXT_NODE) &&
255: (cur->children->type != HTML_ENTITY_REF_NODE) &&
256: (cur->children != cur->last))
1.1 daniel 257: xmlBufferWriteChar(buf, "\n");
1.17 daniel 258: htmlNodeListDump(buf, doc, cur->children);
1.1 daniel 259: if ((cur->last->type != HTML_TEXT_NODE) &&
1.10 daniel 260: (cur->last->type != HTML_ENTITY_REF_NODE) &&
1.17 daniel 261: (cur->children != cur->last))
1.1 daniel 262: xmlBufferWriteChar(buf, "\n");
263: }
1.11 daniel 264: if (!htmlIsAutoClosed(doc, cur)) {
265: xmlBufferWriteChar(buf, "</");
266: xmlBufferWriteCHAR(buf, cur->name);
267: xmlBufferWriteChar(buf, ">");
268: }
1.1 daniel 269: if (cur->next != NULL) {
270: if ((cur->next->type != HTML_TEXT_NODE) &&
271: (cur->next->type != HTML_ENTITY_REF_NODE))
272: xmlBufferWriteChar(buf, "\n");
273: }
274: }
275:
276: /**
1.16 daniel 277: * htmlNodeDumpFile:
278: * @out: the FILE pointer
279: * @doc: the document
280: * @cur: the current node
281: *
282: * Dump an HTML node, recursive behaviour,children are printed too.
283: */
284: void
285: htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
286: xmlBufferPtr buf;
287:
288: buf = xmlBufferCreate();
289: if (buf == NULL) return;
290: htmlNodeDump(buf, doc, cur);
291: xmlBufferDump(out, buf);
292: xmlBufferFree(buf);
293: }
294:
295: /**
1.1 daniel 296: * htmlDocContentDump:
297: * @buf: the HTML buffer output
298: * @cur: the document
299: *
300: * Dump an HTML document.
301: */
302: static void
303: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12 daniel 304: int type;
305:
306: /*
307: * force to output the stuff as HTML, especially for entities
308: */
309: type = cur->type;
310: cur->type = XML_HTML_DOCUMENT_NODE;
1.1 daniel 311: if (cur->intSubset != NULL)
312: htmlDtdDump(buf, cur);
1.11 daniel 313: else {
314: /* Default to HTML-4.0 transitionnal @@@@ */
315: xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
316:
317: }
1.17 daniel 318: if (cur->children != NULL) {
319: htmlNodeListDump(buf, cur, cur->children);
1.1 daniel 320: }
321: xmlBufferWriteChar(buf, "\n");
1.12 daniel 322: cur->type = type;
1.1 daniel 323: }
324:
325: /**
326: * htmlDocDumpMemory:
327: * @cur: the document
328: * @mem: OUT: the memory pointer
329: * @size: OUT: the memory lenght
330: *
1.6 daniel 331: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 332: * It's up to the caller to free the memory.
333: */
334: void
1.6 daniel 335: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 336: xmlBufferPtr buf;
337:
338: if (cur == NULL) {
339: #ifdef DEBUG_TREE
1.15 daniel 340: fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
1.1 daniel 341: #endif
342: *mem = NULL;
343: *size = 0;
344: return;
345: }
346: buf = xmlBufferCreate();
347: if (buf == NULL) {
348: *mem = NULL;
349: *size = 0;
350: return;
351: }
352: htmlDocContentDump(buf, cur);
353: *mem = buf->content;
354: *size = buf->use;
355: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 356: xmlFree(buf);
1.1 daniel 357: }
358:
359:
360: /**
361: * htmlDocDump:
362: * @f: the FILE*
363: * @cur: the document
364: *
365: * Dump an HTML document to an open FILE.
366: */
367: void
368: htmlDocDump(FILE *f, xmlDocPtr cur) {
369: xmlBufferPtr buf;
370:
371: if (cur == NULL) {
372: #ifdef DEBUG_TREE
1.15 daniel 373: fprintf(stderr, "htmlDocDump : document == NULL\n");
1.1 daniel 374: #endif
375: return;
376: }
377: buf = xmlBufferCreate();
378: if (buf == NULL) return;
379: htmlDocContentDump(buf, cur);
380: xmlBufferDump(f, buf);
381: xmlBufferFree(buf);
382: }
383:
384: /**
385: * htmlSaveFile:
386: * @filename: the filename
387: * @cur: the document
388: *
389: * Dump an HTML document to a file.
390: *
391: * returns: the number of byte written or -1 in case of failure.
392: */
393: int
394: htmlSaveFile(const char *filename, xmlDocPtr cur) {
395: xmlBufferPtr buf;
396: FILE *output = NULL;
397: int ret;
398:
399: /*
400: * save the content to a temp buffer.
401: */
402: buf = xmlBufferCreate();
403: if (buf == NULL) return(0);
404: htmlDocContentDump(buf, cur);
405:
406: output = fopen(filename, "w");
407: if (output == NULL) return(-1);
408: ret = xmlBufferDump(output, buf);
409: fclose(output);
410:
411: xmlBufferFree(buf);
1.6 daniel 412: return(ret * sizeof(xmlChar));
1.1 daniel 413: }
414:
1.18 daniel 415: #endif /* LIBXML_HTML_ENABLED */
Webmaster