Annotation of XML/HTMLtree.c, revision 1.6
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
1.5 daniel 9:
10: #ifndef WIN32
1.1 daniel 11: #include "config.h"
1.5 daniel 12: #endif
1.1 daniel 13: #include <stdio.h>
1.5 daniel 14: #include <string.h> /* for memset() only ! */
15:
16: #ifdef HAVE_CTYPE_H
1.1 daniel 17: #include <ctype.h>
1.5 daniel 18: #endif
19: #ifdef HAVE_STDLIB_H
1.1 daniel 20: #include <stdlib.h>
1.5 daniel 21: #endif
1.1 daniel 22:
1.4 daniel 23: #include "xmlmemory.h"
1.1 daniel 24: #include "HTMLparser.h"
25: #include "HTMLtree.h"
26: #include "entities.h"
27: #include "valid.h"
28:
29: /**
30: * htmlDtdDump:
31: * @buf: the HTML buffer output
32: * @doc: the document
33: *
34: * Dump the HTML document DTD, if any.
35: */
36: static void
37: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
38: xmlDtdPtr cur = doc->intSubset;
39:
40: if (cur == NULL) {
41: fprintf(stderr, "htmlDtdDump : no internal subset\n");
42: return;
43: }
44: xmlBufferWriteChar(buf, "<!DOCTYPE ");
45: xmlBufferWriteCHAR(buf, cur->name);
46: if (cur->ExternalID != NULL) {
47: xmlBufferWriteChar(buf, " PUBLIC ");
48: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 daniel 49: if (cur->SystemID != NULL) {
50: xmlBufferWriteChar(buf, " ");
51: xmlBufferWriteQuotedString(buf, cur->SystemID);
52: }
1.1 daniel 53: } else if (cur->SystemID != NULL) {
54: xmlBufferWriteChar(buf, " SYSTEM ");
55: xmlBufferWriteQuotedString(buf, cur->SystemID);
56: }
57: xmlBufferWriteChar(buf, ">\n");
58: }
59:
60: /**
61: * htmlAttrDump:
62: * @buf: the HTML buffer output
63: * @doc: the document
64: * @cur: the attribute pointer
65: *
66: * Dump an HTML attribute
67: */
68: static void
69: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6 ! daniel 70: xmlChar *value;
1.1 daniel 71:
72: if (cur == NULL) {
73: fprintf(stderr, "htmlAttrDump : property == NULL\n");
74: return;
75: }
76: xmlBufferWriteChar(buf, " ");
77: xmlBufferWriteCHAR(buf, cur->name);
78: value = xmlNodeListGetString(doc, cur->val, 0);
79: if (value) {
80: xmlBufferWriteChar(buf, "=");
81: xmlBufferWriteQuotedString(buf, value);
1.4 daniel 82: xmlFree(value);
1.1 daniel 83: } else {
84: xmlBufferWriteChar(buf, "=\"\"");
85: }
86: }
87:
88: /**
89: * htmlAttrListDump:
90: * @buf: the HTML buffer output
91: * @doc: the document
92: * @cur: the first attribute pointer
93: *
94: * Dump a list of HTML attributes
95: */
96: static void
97: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
98: if (cur == NULL) {
99: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
100: return;
101: }
102: while (cur != NULL) {
103: htmlAttrDump(buf, doc, cur);
104: cur = cur->next;
105: }
106: }
107:
108:
109: static void
110: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
111: /**
112: * htmlNodeListDump:
113: * @buf: the HTML buffer output
114: * @doc: the document
115: * @cur: the first node
116: *
117: * Dump an HTML node list, recursive behaviour,children are printed too.
118: */
119: static void
120: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
121: if (cur == NULL) {
122: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
123: return;
124: }
125: while (cur != NULL) {
126: htmlNodeDump(buf, doc, cur);
127: cur = cur->next;
128: }
129: }
130:
131: /**
132: * htmlNodeDump:
133: * @buf: the HTML buffer output
134: * @doc: the document
135: * @cur: the current node
136: *
137: * Dump an HTML node, recursive behaviour,children are printed too.
138: */
139: static void
140: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
141: htmlElemDescPtr info;
142:
143: if (cur == NULL) {
144: fprintf(stderr, "htmlNodeDump : node == NULL\n");
145: return;
146: }
147: /*
148: * Special cases.
149: */
150: if (cur->type == HTML_TEXT_NODE) {
151: if (cur->content != NULL) {
1.6 ! daniel 152: xmlChar *buffer;
1.1 daniel 153:
154: /* uses the HTML encoding routine !!!!!!!!!! */
155: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
156: if (buffer != NULL) {
157: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 158: xmlFree(buffer);
1.1 daniel 159: }
160: }
161: return;
162: }
163: if (cur->type == HTML_COMMENT_NODE) {
164: if (cur->content != NULL) {
165: xmlBufferWriteChar(buf, "<!--");
166: xmlBufferWriteCHAR(buf, cur->content);
167: xmlBufferWriteChar(buf, "-->");
168: }
169: return;
170: }
171: if (cur->type == HTML_ENTITY_REF_NODE) {
172: xmlBufferWriteChar(buf, "&");
173: xmlBufferWriteCHAR(buf, cur->name);
174: xmlBufferWriteChar(buf, ";");
175: return;
176: }
177:
178: /*
179: * Get specific HTmL info for taht node.
180: */
181: info = htmlTagLookup(cur->name);
182:
183: xmlBufferWriteChar(buf, "<");
184: xmlBufferWriteCHAR(buf, cur->name);
185: if (cur->properties != NULL)
186: htmlAttrListDump(buf, doc, cur->properties);
187:
188: if (info->empty) {
189: xmlBufferWriteChar(buf, ">");
190: if (cur->next != NULL) {
191: if ((cur->next->type != HTML_TEXT_NODE) &&
192: (cur->next->type != HTML_ENTITY_REF_NODE))
193: xmlBufferWriteChar(buf, "\n");
194: }
195: return;
196: }
197: if ((cur->content == NULL) && (cur->childs == NULL)) {
198: if (info->endTag != 0)
199: xmlBufferWriteChar(buf, ">");
200: else {
201: xmlBufferWriteChar(buf, "></");
202: xmlBufferWriteCHAR(buf, cur->name);
203: xmlBufferWriteChar(buf, ">");
204: }
205: if (cur->next != NULL) {
206: if ((cur->next->type != HTML_TEXT_NODE) &&
207: (cur->next->type != HTML_ENTITY_REF_NODE))
208: xmlBufferWriteChar(buf, "\n");
209: }
210: return;
211: }
212: xmlBufferWriteChar(buf, ">");
213: if (cur->content != NULL) {
1.6 ! daniel 214: xmlChar *buffer;
1.1 daniel 215:
216: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
217: if (buffer != NULL) {
218: xmlBufferWriteCHAR(buf, buffer);
1.4 daniel 219: xmlFree(buffer);
1.1 daniel 220: }
221: }
222: if (cur->childs != NULL) {
223: if ((cur->childs->type != HTML_TEXT_NODE) &&
224: (cur->childs->type != HTML_ENTITY_REF_NODE))
225: xmlBufferWriteChar(buf, "\n");
226: htmlNodeListDump(buf, doc, cur->childs);
227: if ((cur->last->type != HTML_TEXT_NODE) &&
228: (cur->last->type != HTML_ENTITY_REF_NODE))
229: xmlBufferWriteChar(buf, "\n");
230: }
231: xmlBufferWriteChar(buf, "</");
232: xmlBufferWriteCHAR(buf, cur->name);
233: xmlBufferWriteChar(buf, ">");
234: if (cur->next != NULL) {
235: if ((cur->next->type != HTML_TEXT_NODE) &&
236: (cur->next->type != HTML_ENTITY_REF_NODE))
237: xmlBufferWriteChar(buf, "\n");
238: }
239: }
240:
241: /**
242: * htmlDocContentDump:
243: * @buf: the HTML buffer output
244: * @cur: the document
245: *
246: * Dump an HTML document.
247: */
248: static void
249: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
250: if (cur->intSubset != NULL)
251: htmlDtdDump(buf, cur);
252: if (cur->root != NULL) {
253: htmlNodeDump(buf, cur, cur->root);
254: }
255: xmlBufferWriteChar(buf, "\n");
256: }
257:
258: /**
259: * htmlDocDumpMemory:
260: * @cur: the document
261: * @mem: OUT: the memory pointer
262: * @size: OUT: the memory lenght
263: *
1.6 ! daniel 264: * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1 daniel 265: * It's up to the caller to free the memory.
266: */
267: void
1.6 ! daniel 268: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1 daniel 269: xmlBufferPtr buf;
270:
271: if (cur == NULL) {
272: #ifdef DEBUG_TREE
273: fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
274: #endif
275: *mem = NULL;
276: *size = 0;
277: return;
278: }
279: buf = xmlBufferCreate();
280: if (buf == NULL) {
281: *mem = NULL;
282: *size = 0;
283: return;
284: }
285: htmlDocContentDump(buf, cur);
286: *mem = buf->content;
287: *size = buf->use;
288: memset(buf, -1, sizeof(xmlBuffer));
1.4 daniel 289: xmlFree(buf);
1.1 daniel 290: }
291:
292:
293: /**
294: * htmlDocDump:
295: * @f: the FILE*
296: * @cur: the document
297: *
298: * Dump an HTML document to an open FILE.
299: */
300: void
301: htmlDocDump(FILE *f, xmlDocPtr cur) {
302: xmlBufferPtr buf;
303:
304: if (cur == NULL) {
305: #ifdef DEBUG_TREE
306: fprintf(stderr, "xmlDocDump : document == NULL\n");
307: #endif
308: return;
309: }
310: buf = xmlBufferCreate();
311: if (buf == NULL) return;
312: htmlDocContentDump(buf, cur);
313: xmlBufferDump(f, buf);
314: xmlBufferFree(buf);
315: }
316:
317: /**
318: * htmlSaveFile:
319: * @filename: the filename
320: * @cur: the document
321: *
322: * Dump an HTML document to a file.
323: *
324: * returns: the number of byte written or -1 in case of failure.
325: */
326: int
327: htmlSaveFile(const char *filename, xmlDocPtr cur) {
328: xmlBufferPtr buf;
329: FILE *output = NULL;
330: int ret;
331:
332: /*
333: * save the content to a temp buffer.
334: */
335: buf = xmlBufferCreate();
336: if (buf == NULL) return(0);
337: htmlDocContentDump(buf, cur);
338:
339: output = fopen(filename, "w");
340: if (output == NULL) return(-1);
341: ret = xmlBufferDump(output, buf);
342: fclose(output);
343:
344: xmlBufferFree(buf);
1.6 ! daniel 345: return(ret * sizeof(xmlChar));
1.1 daniel 346: }
347:
Webmaster