Annotation of XML/HTMLtree.c, revision 1.2
1.1 daniel 1: /*
2: * HTMLtree.c : implemetation of access function for an HTML tree.
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #include "config.h"
10: #include <stdio.h>
11: #include <ctype.h>
12: #include <stdlib.h>
13: #include <string.h> /* for memset() only ! */
14:
15: #include "HTMLparser.h"
16: #include "HTMLtree.h"
17: #include "entities.h"
18: #include "valid.h"
19:
20: /**
21: * htmlDtdDump:
22: * @buf: the HTML buffer output
23: * @doc: the document
24: *
25: * Dump the HTML document DTD, if any.
26: */
27: static void
28: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
29: xmlDtdPtr cur = doc->intSubset;
30:
31: if (cur == NULL) {
32: fprintf(stderr, "htmlDtdDump : no internal subset\n");
33: return;
34: }
35: xmlBufferWriteChar(buf, "<!DOCTYPE ");
36: xmlBufferWriteCHAR(buf, cur->name);
37: if (cur->ExternalID != NULL) {
38: xmlBufferWriteChar(buf, " PUBLIC ");
39: xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2 ! daniel 40: if (cur->SystemID != NULL) {
! 41: xmlBufferWriteChar(buf, " ");
! 42: xmlBufferWriteQuotedString(buf, cur->SystemID);
! 43: }
1.1 daniel 44: } else if (cur->SystemID != NULL) {
45: xmlBufferWriteChar(buf, " SYSTEM ");
46: xmlBufferWriteQuotedString(buf, cur->SystemID);
47: }
48: xmlBufferWriteChar(buf, ">\n");
49: }
50:
51: /**
52: * htmlAttrDump:
53: * @buf: the HTML buffer output
54: * @doc: the document
55: * @cur: the attribute pointer
56: *
57: * Dump an HTML attribute
58: */
59: static void
60: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
61: CHAR *value;
62:
63: if (cur == NULL) {
64: fprintf(stderr, "htmlAttrDump : property == NULL\n");
65: return;
66: }
67: xmlBufferWriteChar(buf, " ");
68: xmlBufferWriteCHAR(buf, cur->name);
69: value = xmlNodeListGetString(doc, cur->val, 0);
70: if (value) {
71: xmlBufferWriteChar(buf, "=");
72: xmlBufferWriteQuotedString(buf, value);
73: free(value);
74: } else {
75: xmlBufferWriteChar(buf, "=\"\"");
76: }
77: }
78:
79: /**
80: * htmlAttrListDump:
81: * @buf: the HTML buffer output
82: * @doc: the document
83: * @cur: the first attribute pointer
84: *
85: * Dump a list of HTML attributes
86: */
87: static void
88: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
89: if (cur == NULL) {
90: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
91: return;
92: }
93: while (cur != NULL) {
94: htmlAttrDump(buf, doc, cur);
95: cur = cur->next;
96: }
97: }
98:
99:
100: static void
101: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
102: /**
103: * htmlNodeListDump:
104: * @buf: the HTML buffer output
105: * @doc: the document
106: * @cur: the first node
107: *
108: * Dump an HTML node list, recursive behaviour,children are printed too.
109: */
110: static void
111: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
112: if (cur == NULL) {
113: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
114: return;
115: }
116: while (cur != NULL) {
117: htmlNodeDump(buf, doc, cur);
118: cur = cur->next;
119: }
120: }
121:
122: /**
123: * htmlNodeDump:
124: * @buf: the HTML buffer output
125: * @doc: the document
126: * @cur: the current node
127: *
128: * Dump an HTML node, recursive behaviour,children are printed too.
129: */
130: static void
131: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
132: int i;
133: htmlElemDescPtr info;
134:
135: if (cur == NULL) {
136: fprintf(stderr, "htmlNodeDump : node == NULL\n");
137: return;
138: }
139: /*
140: * Special cases.
141: */
142: if (cur->type == HTML_TEXT_NODE) {
143: if (cur->content != NULL) {
144: CHAR *buffer;
145:
146: /* uses the HTML encoding routine !!!!!!!!!! */
147: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
148: if (buffer != NULL) {
149: xmlBufferWriteCHAR(buf, buffer);
150: free(buffer);
151: }
152: }
153: return;
154: }
155: if (cur->type == HTML_COMMENT_NODE) {
156: if (cur->content != NULL) {
157: xmlBufferWriteChar(buf, "<!--");
158: xmlBufferWriteCHAR(buf, cur->content);
159: xmlBufferWriteChar(buf, "-->");
160: }
161: return;
162: }
163: if (cur->type == HTML_ENTITY_REF_NODE) {
164: xmlBufferWriteChar(buf, "&");
165: xmlBufferWriteCHAR(buf, cur->name);
166: xmlBufferWriteChar(buf, ";");
167: return;
168: }
169:
170: /*
171: * Get specific HTmL info for taht node.
172: */
173: info = htmlTagLookup(cur->name);
174:
175: xmlBufferWriteChar(buf, "<");
176: xmlBufferWriteCHAR(buf, cur->name);
177: if (cur->properties != NULL)
178: htmlAttrListDump(buf, doc, cur->properties);
179:
180: if (info->empty) {
181: xmlBufferWriteChar(buf, ">");
182: if (cur->next != NULL) {
183: if ((cur->next->type != HTML_TEXT_NODE) &&
184: (cur->next->type != HTML_ENTITY_REF_NODE))
185: xmlBufferWriteChar(buf, "\n");
186: }
187: return;
188: }
189: if ((cur->content == NULL) && (cur->childs == NULL)) {
190: if (info->endTag != 0)
191: xmlBufferWriteChar(buf, ">");
192: else {
193: xmlBufferWriteChar(buf, "></");
194: xmlBufferWriteCHAR(buf, cur->name);
195: xmlBufferWriteChar(buf, ">");
196: }
197: if (cur->next != NULL) {
198: if ((cur->next->type != HTML_TEXT_NODE) &&
199: (cur->next->type != HTML_ENTITY_REF_NODE))
200: xmlBufferWriteChar(buf, "\n");
201: }
202: return;
203: }
204: xmlBufferWriteChar(buf, ">");
205: if (cur->content != NULL) {
206: CHAR *buffer;
207:
208: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
209: if (buffer != NULL) {
210: xmlBufferWriteCHAR(buf, buffer);
211: free(buffer);
212: }
213: }
214: if (cur->childs != NULL) {
215: if ((cur->childs->type != HTML_TEXT_NODE) &&
216: (cur->childs->type != HTML_ENTITY_REF_NODE))
217: xmlBufferWriteChar(buf, "\n");
218: htmlNodeListDump(buf, doc, cur->childs);
219: if ((cur->last->type != HTML_TEXT_NODE) &&
220: (cur->last->type != HTML_ENTITY_REF_NODE))
221: xmlBufferWriteChar(buf, "\n");
222: }
223: xmlBufferWriteChar(buf, "</");
224: xmlBufferWriteCHAR(buf, cur->name);
225: xmlBufferWriteChar(buf, ">");
226: if (cur->next != NULL) {
227: if ((cur->next->type != HTML_TEXT_NODE) &&
228: (cur->next->type != HTML_ENTITY_REF_NODE))
229: xmlBufferWriteChar(buf, "\n");
230: }
231: }
232:
233: /**
234: * htmlDocContentDump:
235: * @buf: the HTML buffer output
236: * @cur: the document
237: *
238: * Dump an HTML document.
239: */
240: static void
241: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
242: if (cur->intSubset != NULL)
243: htmlDtdDump(buf, cur);
244: if (cur->root != NULL) {
245: htmlNodeDump(buf, cur, cur->root);
246: }
247: xmlBufferWriteChar(buf, "\n");
248: }
249:
250: /**
251: * htmlDocDumpMemory:
252: * @cur: the document
253: * @mem: OUT: the memory pointer
254: * @size: OUT: the memory lenght
255: *
256: * Dump an HTML document in memory and return the CHAR * and it's size.
257: * It's up to the caller to free the memory.
258: */
259: void
260: htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size) {
261: xmlBufferPtr buf;
262:
263: if (cur == NULL) {
264: #ifdef DEBUG_TREE
265: fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
266: #endif
267: *mem = NULL;
268: *size = 0;
269: return;
270: }
271: buf = xmlBufferCreate();
272: if (buf == NULL) {
273: *mem = NULL;
274: *size = 0;
275: return;
276: }
277: htmlDocContentDump(buf, cur);
278: *mem = buf->content;
279: *size = buf->use;
280: memset(buf, -1, sizeof(xmlBuffer));
281: free(buf);
282: }
283:
284:
285: /**
286: * htmlDocDump:
287: * @f: the FILE*
288: * @cur: the document
289: *
290: * Dump an HTML document to an open FILE.
291: */
292: void
293: htmlDocDump(FILE *f, xmlDocPtr cur) {
294: xmlBufferPtr buf;
295:
296: if (cur == NULL) {
297: #ifdef DEBUG_TREE
298: fprintf(stderr, "xmlDocDump : document == NULL\n");
299: #endif
300: return;
301: }
302: buf = xmlBufferCreate();
303: if (buf == NULL) return;
304: htmlDocContentDump(buf, cur);
305: xmlBufferDump(f, buf);
306: xmlBufferFree(buf);
307: }
308:
309: /**
310: * htmlSaveFile:
311: * @filename: the filename
312: * @cur: the document
313: *
314: * Dump an HTML document to a file.
315: *
316: * returns: the number of byte written or -1 in case of failure.
317: */
318: int
319: htmlSaveFile(const char *filename, xmlDocPtr cur) {
320: xmlBufferPtr buf;
321: FILE *output = NULL;
322: int ret;
323:
324: /*
325: * save the content to a temp buffer.
326: */
327: buf = xmlBufferCreate();
328: if (buf == NULL) return(0);
329: htmlDocContentDump(buf, cur);
330:
331: output = fopen(filename, "w");
332: if (output == NULL) return(-1);
333: ret = xmlBufferDump(output, buf);
334: fclose(output);
335:
336: xmlBufferFree(buf);
337: return(ret * sizeof(CHAR));
338: }
339:
Webmaster