Annotation of XML/HTMLtree.c, revision 1.14
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
1.13 daniel 10: #ifdef WIN32
11: #include "win32config.h"
12: #else
1.1 daniel 13: #include "config.h"
1.5 daniel 14: #endif
1.1 daniel 15: #include <stdio.h>
1.5 daniel 16: #include <string.h> /* for memset() only ! */
17:
18: #ifdef HAVE_CTYPE_H
1.1 daniel 19: #include <ctype.h>
1.5 daniel 20: #endif
21: #ifdef HAVE_STDLIB_H
1.1 daniel 22: #include <stdlib.h>
1.5 daniel 23: #endif
1.1 daniel 24:
1.4 daniel 25: #include "xmlmemory.h"
1.1 daniel 26: #include "HTMLparser.h"
27: #include "HTMLtree.h"
28: #include "entities.h"
29: #include "valid.h"
30:
1.14 ! daniel 31: static void
! 32: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
! 33:
1.1 daniel 34: /**
35: * htmlDtdDump:
36: * @buf: the HTML buffer output
37: * @doc: the document
38: *
39: * Dump the HTML document DTD, if any.
40: */
41: static void
42: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
43: xmlDtdPtr cur = doc->intSubset;
44:
45: if (cur == NULL) {
46: fprintf(stderr, "htmlDtdDump : no internal subset\n");
47: return;
48: }
49: xmlBufferWriteChar(buf, "<!DOCTYPE ");
50: xmlBufferWriteCHAR(buf, cur->name);
51: if (cur->ExternalID != NULL) {
52: xmlBufferWriteChar(buf, " PUBLIC ");
53: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 54: if (cur->SystemID != NULL) {
55: xmlBufferWriteChar(buf, " ");
56: xmlBufferWriteQuotedString(buf, cur->SystemID);
57: }
1.1 daniel 58: } else if (cur->SystemID != NULL) {
59: xmlBufferWriteChar(buf, " SYSTEM ");
60: xmlBufferWriteQuotedString(buf, cur->SystemID);
61: }
62: xmlBufferWriteChar(buf, ">\n");
63: }
64:
65: /**
66: * htmlAttrDump:
67: * @buf: the HTML buffer output
68: * @doc: the document
69: * @cur: the attribute pointer
70: *
71: * Dump an HTML attribute
72: */
73: static void
74: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 75: xmlChar *value;
1.1 daniel 76:
77: if (cur == NULL) {
78: fprintf(stderr, "htmlAttrDump : property == NULL\n");
79: return;
80: }
81: xmlBufferWriteChar(buf, " ");
82: xmlBufferWriteCHAR(buf, cur->name);
83: value = xmlNodeListGetString(doc, cur->val, 0);
84: if (value) {
85: xmlBufferWriteChar(buf, "=");
86: xmlBufferWriteQuotedString(buf, value);
1.4 daniel 87: xmlFree(value);
1.1 daniel 88: } else {
89: xmlBufferWriteChar(buf, "=\"\"");
90: }
91: }
92:
93: /**
94: * htmlAttrListDump:
95: * @buf: the HTML buffer output
96: * @doc: the document
97: * @cur: the first attribute pointer
98: *
99: * Dump a list of HTML attributes
100: */
101: static void
102: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
103: if (cur == NULL) {
104: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
105: return;
106: }
107: while (cur != NULL) {
108: htmlAttrDump(buf, doc, cur);
109: cur = cur->next;
110: }
111: }
112:
113:
1.14 ! daniel 114: void
1.1 daniel 115: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
116: /**
117: * htmlNodeListDump:
118: * @buf: the HTML buffer output
119: * @doc: the document
120: * @cur: the first node
121: *
122: * Dump an HTML node list, recursive behaviour,children are printed too.
123: */
124: static void
125: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
126: if (cur == NULL) {
127: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
128: return;
129: }
130: while (cur != NULL) {
131: htmlNodeDump(buf, doc, cur);
132: cur = cur->next;
133: }
134: }
135:
136: /**
137: * htmlNodeDump:
138: * @buf: the HTML buffer output
139: * @doc: the document
140: * @cur: the current node
141: *
142: * Dump an HTML node, recursive behaviour,children are printed too.
143: */
1.14 ! daniel 144: void
1.1 daniel 145: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
146: htmlElemDescPtr info;
147:
148: if (cur == NULL) {
149: fprintf(stderr, "htmlNodeDump : node == NULL\n");
150: return;
151: }
152: /*
153: * Special cases.
154: */
1.14 ! daniel 155: if (cur->type == XML_HTML_DOCUMENT_NODE) {
! 156: htmlDocContentDump(buf, (xmlDocPtr) cur);
! 157: return;
! 158: }
1.1 daniel 159: if (cur->type == HTML_TEXT_NODE) {
160: if (cur->content != NULL) {
1.6 daniel 161: xmlChar *buffer;
1.1 daniel 162:
163: /* uses the HTML encoding routine !!!!!!!!!! */
1.9 daniel 164: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 165: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 daniel 166: #else
167: buffer = xmlEncodeEntitiesReentrant(doc,
168: xmlBufferContent(cur->content));
169: #endif
1.1 daniel 170: if (buffer != NULL) {
171: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 172: xmlFree(buffer);
1.1 daniel 173: }
174: }
175: return;
176: }
177: if (cur->type == HTML_COMMENT_NODE) {
178: if (cur->content != NULL) {
179: xmlBufferWriteChar(buf, "<!--");
1.9 daniel 180: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 181: xmlBufferWriteCHAR(buf, cur->content);
1.9 daniel 182: #else
183: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
184: #endif
1.1 daniel 185: xmlBufferWriteChar(buf, "-->");
186: }
187: return;
188: }
189: if (cur->type == HTML_ENTITY_REF_NODE) {
190: xmlBufferWriteChar(buf, "&");
191: xmlBufferWriteCHAR(buf, cur->name);
192: xmlBufferWriteChar(buf, ";");
193: return;
194: }
195:
196: /*
197: * Get specific HTmL info for taht node.
198: */
199: info = htmlTagLookup(cur->name);
200:
201: xmlBufferWriteChar(buf, "<");
202: xmlBufferWriteCHAR(buf, cur->name);
203: if (cur->properties != NULL)
204: htmlAttrListDump(buf, doc, cur->properties);
205:
1.7 daniel 206: if ((info != NULL) && (info->empty)) {
1.1 daniel 207: xmlBufferWriteChar(buf, ">");
208: if (cur->next != NULL) {
209: if ((cur->next->type != HTML_TEXT_NODE) &&
210: (cur->next->type != HTML_ENTITY_REF_NODE))
211: xmlBufferWriteChar(buf, "\n");
212: }
213: return;
214: }
215: if ((cur->content == NULL) && (cur->childs == NULL)) {
1.7 daniel 216: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 217: xmlBufferWriteChar(buf, ">");
218: else {
219: xmlBufferWriteChar(buf, "></");
220: xmlBufferWriteCHAR(buf, cur->name);
221: xmlBufferWriteChar(buf, ">");
222: }
223: if (cur->next != NULL) {
224: if ((cur->next->type != HTML_TEXT_NODE) &&
225: (cur->next->type != HTML_ENTITY_REF_NODE))
226: xmlBufferWriteChar(buf, "\n");
227: }
228: return;
229: }
230: xmlBufferWriteChar(buf, ">");
231: if (cur->content != NULL) {
1.6 daniel 232: xmlChar *buffer;
1.1 daniel 233:
1.9 daniel 234: #ifndef XML_USE_BUFFER_CONTENT
235: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
236: #else
237: buffer = xmlEncodeEntitiesReentrant(doc,
238: xmlBufferContent(cur->content));
239: #endif
1.1 daniel 240: if (buffer != NULL) {
241: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 242: xmlFree(buffer);
1.1 daniel 243: }
244: }
245: if (cur->childs != NULL) {
246: if ((cur->childs->type != HTML_TEXT_NODE) &&
1.10 daniel 247: (cur->childs->type != HTML_ENTITY_REF_NODE) &&
248: (cur->childs != cur->last))
1.1 daniel 249: xmlBufferWriteChar(buf, "\n");
250: htmlNodeListDump(buf, doc, cur->childs);
251: if ((cur->last->type != HTML_TEXT_NODE) &&
1.10 daniel 252: (cur->last->type != HTML_ENTITY_REF_NODE) &&
253: (cur->childs != cur->last))
1.1 daniel 254: xmlBufferWriteChar(buf, "\n");
255: }
1.11 daniel 256: if (!htmlIsAutoClosed(doc, cur)) {
257: xmlBufferWriteChar(buf, "</");
258: xmlBufferWriteCHAR(buf, cur->name);
259: xmlBufferWriteChar(buf, ">");
260: }
1.1 daniel 261: if (cur->next != NULL) {
262: if ((cur->next->type != HTML_TEXT_NODE) &&
263: (cur->next->type != HTML_ENTITY_REF_NODE))
264: xmlBufferWriteChar(buf, "\n");
265: }
266: }
267:
268: /**
269: * htmlDocContentDump:
270: * @buf: the HTML buffer output
271: * @cur: the document
272: *
273: * Dump an HTML document.
274: */
275: static void
276: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12 daniel 277: int type;
278:
279: /*
280: * force to output the stuff as HTML, especially for entities
281: */
282: type = cur->type;
283: cur->type = XML_HTML_DOCUMENT_NODE;
1.1 daniel 284: if (cur->intSubset != NULL)
285: htmlDtdDump(buf, cur);
1.11 daniel 286: else {
287: /* Default to HTML-4.0 transitionnal @@@@ */
288: xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
289:
290: }
1.1 daniel 291: if (cur->root != NULL) {
1.8 daniel 292: htmlNodeListDump(buf, cur, cur->root);
1.1 daniel 293: }
294: xmlBufferWriteChar(buf, "\n");
1.12 daniel 295: cur->type = type;
1.1 daniel 296: }
297:
298: /**
299: * htmlDocDumpMemory:
300: * @cur: the document
301: * @mem: OUT: the memory pointer
302: * @size: OUT: the memory lenght
303: *
1.6 daniel 304: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 305: * It's up to the caller to free the memory.
306: */
307: void
1.6 daniel 308: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 309: xmlBufferPtr buf;
310:
311: if (cur == NULL) {
312: #ifdef DEBUG_TREE
313: fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
314: #endif
315: *mem = NULL;
316: *size = 0;
317: return;
318: }
319: buf = xmlBufferCreate();
320: if (buf == NULL) {
321: *mem = NULL;
322: *size = 0;
323: return;
324: }
325: htmlDocContentDump(buf, cur);
326: *mem = buf->content;
327: *size = buf->use;
328: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 329: xmlFree(buf);
1.1 daniel 330: }
331:
332:
333: /**
334: * htmlDocDump:
335: * @f: the FILE*
336: * @cur: the document
337: *
338: * Dump an HTML document to an open FILE.
339: */
340: void
341: htmlDocDump(FILE *f, xmlDocPtr cur) {
342: xmlBufferPtr buf;
343:
344: if (cur == NULL) {
345: #ifdef DEBUG_TREE
346: fprintf(stderr, "xmlDocDump : document == NULL\n");
347: #endif
348: return;
349: }
350: buf = xmlBufferCreate();
351: if (buf == NULL) return;
352: htmlDocContentDump(buf, cur);
353: xmlBufferDump(f, buf);
354: xmlBufferFree(buf);
355: }
356:
357: /**
358: * htmlSaveFile:
359: * @filename: the filename
360: * @cur: the document
361: *
362: * Dump an HTML document to a file.
363: *
364: * returns: the number of byte written or -1 in case of failure.
365: */
366: int
367: htmlSaveFile(const char *filename, xmlDocPtr cur) {
368: xmlBufferPtr buf;
369: FILE *output = NULL;
370: int ret;
371:
372: /*
373: * save the content to a temp buffer.
374: */
375: buf = xmlBufferCreate();
376: if (buf == NULL) return(0);
377: htmlDocContentDump(buf, cur);
378:
379: output = fopen(filename, "w");
380: if (output == NULL) return(-1);
381: ret = xmlBufferDump(output, buf);
382: fclose(output);
383:
384: xmlBufferFree(buf);
1.6 daniel 385: return(ret * sizeof(xmlChar));
1.1 daniel 386: }
387:
Webmaster