Annotation of XML/HTMLtree.c, revision 1.12
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
10: #ifndef WIN32
1.1 daniel 11: #include "config.h"
1.5 daniel 12: #endif
1.1 daniel 13: #include <stdio.h>
1.5 daniel 14: #include <string.h> /* for memset() only ! */
15:
16: #ifdef HAVE_CTYPE_H
1.1 daniel 17: #include <ctype.h>
1.5 daniel 18: #endif
19: #ifdef HAVE_STDLIB_H
1.1 daniel 20: #include <stdlib.h>
1.5 daniel 21: #endif
1.1 daniel 22:
1.4 daniel 23: #include "xmlmemory.h"
1.1 daniel 24: #include "HTMLparser.h"
25: #include "HTMLtree.h"
26: #include "entities.h"
27: #include "valid.h"
28:
29: /**
30: * htmlDtdDump:
31: * @buf: the HTML buffer output
32: * @doc: the document
33: *
34: * Dump the HTML document DTD, if any.
35: */
36: static void
37: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
38: xmlDtdPtr cur = doc->intSubset;
39:
40: if (cur == NULL) {
41: fprintf(stderr, "htmlDtdDump : no internal subset\n");
42: return;
43: }
44: xmlBufferWriteChar(buf, "<!DOCTYPE ");
45: xmlBufferWriteCHAR(buf, cur->name);
46: if (cur->ExternalID != NULL) {
47: xmlBufferWriteChar(buf, " PUBLIC ");
48: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 49: if (cur->SystemID != NULL) {
50: xmlBufferWriteChar(buf, " ");
51: xmlBufferWriteQuotedString(buf, cur->SystemID);
52: }
1.1 daniel 53: } else if (cur->SystemID != NULL) {
54: xmlBufferWriteChar(buf, " SYSTEM ");
55: xmlBufferWriteQuotedString(buf, cur->SystemID);
56: }
57: xmlBufferWriteChar(buf, ">\n");
58: }
59:
60: /**
61: * htmlAttrDump:
62: * @buf: the HTML buffer output
63: * @doc: the document
64: * @cur: the attribute pointer
65: *
66: * Dump an HTML attribute
67: */
68: static void
69: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 daniel 70: xmlChar *value;
1.1 daniel 71:
72: if (cur == NULL) {
73: fprintf(stderr, "htmlAttrDump : property == NULL\n");
74: return;
75: }
76: xmlBufferWriteChar(buf, " ");
77: xmlBufferWriteCHAR(buf, cur->name);
78: value = xmlNodeListGetString(doc, cur->val, 0);
79: if (value) {
80: xmlBufferWriteChar(buf, "=");
81: xmlBufferWriteQuotedString(buf, value);
1.4 daniel 82: xmlFree(value);
1.1 daniel 83: } else {
84: xmlBufferWriteChar(buf, "=\"\"");
85: }
86: }
87:
88: /**
89: * htmlAttrListDump:
90: * @buf: the HTML buffer output
91: * @doc: the document
92: * @cur: the first attribute pointer
93: *
94: * Dump a list of HTML attributes
95: */
96: static void
97: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
98: if (cur == NULL) {
99: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
100: return;
101: }
102: while (cur != NULL) {
103: htmlAttrDump(buf, doc, cur);
104: cur = cur->next;
105: }
106: }
107:
108:
109: static void
110: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
111: /**
112: * htmlNodeListDump:
113: * @buf: the HTML buffer output
114: * @doc: the document
115: * @cur: the first node
116: *
117: * Dump an HTML node list, recursive behaviour,children are printed too.
118: */
119: static void
120: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
121: if (cur == NULL) {
122: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
123: return;
124: }
125: while (cur != NULL) {
126: htmlNodeDump(buf, doc, cur);
127: cur = cur->next;
128: }
129: }
130:
131: /**
132: * htmlNodeDump:
133: * @buf: the HTML buffer output
134: * @doc: the document
135: * @cur: the current node
136: *
137: * Dump an HTML node, recursive behaviour,children are printed too.
138: */
139: static void
140: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
141: htmlElemDescPtr info;
142:
143: if (cur == NULL) {
144: fprintf(stderr, "htmlNodeDump : node == NULL\n");
145: return;
146: }
147: /*
148: * Special cases.
149: */
150: if (cur->type == HTML_TEXT_NODE) {
151: if (cur->content != NULL) {
1.6 daniel 152: xmlChar *buffer;
1.1 daniel 153:
154: /* uses the HTML encoding routine !!!!!!!!!! */
1.9 daniel 155: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 156: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9 daniel 157: #else
158: buffer = xmlEncodeEntitiesReentrant(doc,
159: xmlBufferContent(cur->content));
160: #endif
1.1 daniel 161: if (buffer != NULL) {
162: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 163: xmlFree(buffer);
1.1 daniel 164: }
165: }
166: return;
167: }
168: if (cur->type == HTML_COMMENT_NODE) {
169: if (cur->content != NULL) {
170: xmlBufferWriteChar(buf, "<!--");
1.9 daniel 171: #ifndef XML_USE_BUFFER_CONTENT
1.1 daniel 172: xmlBufferWriteCHAR(buf, cur->content);
1.9 daniel 173: #else
174: xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
175: #endif
1.1 daniel 176: xmlBufferWriteChar(buf, "-->");
177: }
178: return;
179: }
180: if (cur->type == HTML_ENTITY_REF_NODE) {
181: xmlBufferWriteChar(buf, "&");
182: xmlBufferWriteCHAR(buf, cur->name);
183: xmlBufferWriteChar(buf, ";");
184: return;
185: }
186:
187: /*
188: * Get specific HTmL info for taht node.
189: */
190: info = htmlTagLookup(cur->name);
191:
192: xmlBufferWriteChar(buf, "<");
193: xmlBufferWriteCHAR(buf, cur->name);
194: if (cur->properties != NULL)
195: htmlAttrListDump(buf, doc, cur->properties);
196:
1.7 daniel 197: if ((info != NULL) && (info->empty)) {
1.1 daniel 198: xmlBufferWriteChar(buf, ">");
199: if (cur->next != NULL) {
200: if ((cur->next->type != HTML_TEXT_NODE) &&
201: (cur->next->type != HTML_ENTITY_REF_NODE))
202: xmlBufferWriteChar(buf, "\n");
203: }
204: return;
205: }
206: if ((cur->content == NULL) && (cur->childs == NULL)) {
1.7 daniel 207: if ((info != NULL) && (info->endTag != 0))
1.1 daniel 208: xmlBufferWriteChar(buf, ">");
209: else {
210: xmlBufferWriteChar(buf, "></");
211: xmlBufferWriteCHAR(buf, cur->name);
212: xmlBufferWriteChar(buf, ">");
213: }
214: if (cur->next != NULL) {
215: if ((cur->next->type != HTML_TEXT_NODE) &&
216: (cur->next->type != HTML_ENTITY_REF_NODE))
217: xmlBufferWriteChar(buf, "\n");
218: }
219: return;
220: }
221: xmlBufferWriteChar(buf, ">");
222: if (cur->content != NULL) {
1.6 daniel 223: xmlChar *buffer;
1.1 daniel 224:
1.9 daniel 225: #ifndef XML_USE_BUFFER_CONTENT
226: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
227: #else
228: buffer = xmlEncodeEntitiesReentrant(doc,
229: xmlBufferContent(cur->content));
230: #endif
1.1 daniel 231: if (buffer != NULL) {
232: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 233: xmlFree(buffer);
1.1 daniel 234: }
235: }
236: if (cur->childs != NULL) {
237: if ((cur->childs->type != HTML_TEXT_NODE) &&
1.10 daniel 238: (cur->childs->type != HTML_ENTITY_REF_NODE) &&
239: (cur->childs != cur->last))
1.1 daniel 240: xmlBufferWriteChar(buf, "\n");
241: htmlNodeListDump(buf, doc, cur->childs);
242: if ((cur->last->type != HTML_TEXT_NODE) &&
1.10 daniel 243: (cur->last->type != HTML_ENTITY_REF_NODE) &&
244: (cur->childs != cur->last))
1.1 daniel 245: xmlBufferWriteChar(buf, "\n");
246: }
1.11 daniel 247: if (!htmlIsAutoClosed(doc, cur)) {
248: xmlBufferWriteChar(buf, "</");
249: xmlBufferWriteCHAR(buf, cur->name);
250: xmlBufferWriteChar(buf, ">");
251: }
1.1 daniel 252: if (cur->next != NULL) {
253: if ((cur->next->type != HTML_TEXT_NODE) &&
254: (cur->next->type != HTML_ENTITY_REF_NODE))
255: xmlBufferWriteChar(buf, "\n");
256: }
257: }
258:
259: /**
260: * htmlDocContentDump:
261: * @buf: the HTML buffer output
262: * @cur: the document
263: *
264: * Dump an HTML document.
265: */
266: static void
267: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12 ! daniel 268: int type;
! 269:
! 270: /*
! 271: * force to output the stuff as HTML, especially for entities
! 272: */
! 273: type = cur->type;
! 274: cur->type = XML_HTML_DOCUMENT_NODE;
1.1 daniel 275: if (cur->intSubset != NULL)
276: htmlDtdDump(buf, cur);
1.11 daniel 277: else {
278: /* Default to HTML-4.0 transitionnal @@@@ */
279: xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
280:
281: }
1.1 daniel 282: if (cur->root != NULL) {
1.8 daniel 283: htmlNodeListDump(buf, cur, cur->root);
1.1 daniel 284: }
285: xmlBufferWriteChar(buf, "\n");
1.12 ! daniel 286: cur->type = type;
1.1 daniel 287: }
288:
289: /**
290: * htmlDocDumpMemory:
291: * @cur: the document
292: * @mem: OUT: the memory pointer
293: * @size: OUT: the memory lenght
294: *
1.6 daniel 295: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 296: * It's up to the caller to free the memory.
297: */
298: void
1.6 daniel 299: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 300: xmlBufferPtr buf;
301:
302: if (cur == NULL) {
303: #ifdef DEBUG_TREE
304: fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
305: #endif
306: *mem = NULL;
307: *size = 0;
308: return;
309: }
310: buf = xmlBufferCreate();
311: if (buf == NULL) {
312: *mem = NULL;
313: *size = 0;
314: return;
315: }
316: htmlDocContentDump(buf, cur);
317: *mem = buf->content;
318: *size = buf->use;
319: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 320: xmlFree(buf);
1.1 daniel 321: }
322:
323:
324: /**
325: * htmlDocDump:
326: * @f: the FILE*
327: * @cur: the document
328: *
329: * Dump an HTML document to an open FILE.
330: */
331: void
332: htmlDocDump(FILE *f, xmlDocPtr cur) {
333: xmlBufferPtr buf;
334:
335: if (cur == NULL) {
336: #ifdef DEBUG_TREE
337: fprintf(stderr, "xmlDocDump : document == NULL\n");
338: #endif
339: return;
340: }
341: buf = xmlBufferCreate();
342: if (buf == NULL) return;
343: htmlDocContentDump(buf, cur);
344: xmlBufferDump(f, buf);
345: xmlBufferFree(buf);
346: }
347:
348: /**
349: * htmlSaveFile:
350: * @filename: the filename
351: * @cur: the document
352: *
353: * Dump an HTML document to a file.
354: *
355: * returns: the number of byte written or -1 in case of failure.
356: */
357: int
358: htmlSaveFile(const char *filename, xmlDocPtr cur) {
359: xmlBufferPtr buf;
360: FILE *output = NULL;
361: int ret;
362:
363: /*
364: * save the content to a temp buffer.
365: */
366: buf = xmlBufferCreate();
367: if (buf == NULL) return(0);
368: htmlDocContentDump(buf, cur);
369:
370: output = fopen(filename, "w");
371: if (output == NULL) return(-1);
372: ret = xmlBufferDump(output, buf);
373: fclose(output);
374:
375: xmlBufferFree(buf);
1.6 daniel 376: return(ret * sizeof(xmlChar));
1.1 daniel 377: }
378:
Webmaster