Annotation of XML/HTMLtree.c, revision 1.1
1.1 ! daniel 1: /*
! 2: * HTMLtree.c : implemetation of access function for an HTML tree.
! 3: *
! 4: * See Copyright for the status of this software.
! 5: *
! 6: * Daniel.Veillard@w3.org
! 7: */
! 8:
! 9: #include "config.h"
! 10: #include <stdio.h>
! 11: #include <ctype.h>
! 12: #include <stdlib.h>
! 13: #include <string.h> /* for memset() only ! */
! 14:
! 15: #include "HTMLparser.h"
! 16: #include "HTMLtree.h"
! 17: #include "entities.h"
! 18: #include "valid.h"
! 19:
! 20: /**
! 21: * htmlDtdDump:
! 22: * @buf: the HTML buffer output
! 23: * @doc: the document
! 24: *
! 25: * Dump the HTML document DTD, if any.
! 26: */
! 27: static void
! 28: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
! 29: xmlDtdPtr cur = doc->intSubset;
! 30:
! 31: if (cur == NULL) {
! 32: fprintf(stderr, "htmlDtdDump : no internal subset\n");
! 33: return;
! 34: }
! 35: xmlBufferWriteChar(buf, "<!DOCTYPE ");
! 36: xmlBufferWriteCHAR(buf, cur->name);
! 37: if (cur->ExternalID != NULL) {
! 38: xmlBufferWriteChar(buf, " PUBLIC ");
! 39: xmlBufferWriteQuotedString(buf, cur->ExternalID);
! 40: xmlBufferWriteChar(buf, " ");
! 41: xmlBufferWriteQuotedString(buf, cur->SystemID);
! 42: } else if (cur->SystemID != NULL) {
! 43: xmlBufferWriteChar(buf, " SYSTEM ");
! 44: xmlBufferWriteQuotedString(buf, cur->SystemID);
! 45: }
! 46: xmlBufferWriteChar(buf, ">\n");
! 47: }
! 48:
! 49: /**
! 50: * htmlAttrDump:
! 51: * @buf: the HTML buffer output
! 52: * @doc: the document
! 53: * @cur: the attribute pointer
! 54: *
! 55: * Dump an HTML attribute
! 56: */
! 57: static void
! 58: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
! 59: CHAR *value;
! 60:
! 61: if (cur == NULL) {
! 62: fprintf(stderr, "htmlAttrDump : property == NULL\n");
! 63: return;
! 64: }
! 65: xmlBufferWriteChar(buf, " ");
! 66: xmlBufferWriteCHAR(buf, cur->name);
! 67: value = xmlNodeListGetString(doc, cur->val, 0);
! 68: if (value) {
! 69: xmlBufferWriteChar(buf, "=");
! 70: xmlBufferWriteQuotedString(buf, value);
! 71: free(value);
! 72: } else {
! 73: xmlBufferWriteChar(buf, "=\"\"");
! 74: }
! 75: }
! 76:
! 77: /**
! 78: * htmlAttrListDump:
! 79: * @buf: the HTML buffer output
! 80: * @doc: the document
! 81: * @cur: the first attribute pointer
! 82: *
! 83: * Dump a list of HTML attributes
! 84: */
! 85: static void
! 86: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
! 87: if (cur == NULL) {
! 88: fprintf(stderr, "htmlAttrListDump : property == NULL\n");
! 89: return;
! 90: }
! 91: while (cur != NULL) {
! 92: htmlAttrDump(buf, doc, cur);
! 93: cur = cur->next;
! 94: }
! 95: }
! 96:
! 97:
! 98: static void
! 99: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
! 100: /**
! 101: * htmlNodeListDump:
! 102: * @buf: the HTML buffer output
! 103: * @doc: the document
! 104: * @cur: the first node
! 105: *
! 106: * Dump an HTML node list, recursive behaviour,children are printed too.
! 107: */
! 108: static void
! 109: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
! 110: if (cur == NULL) {
! 111: fprintf(stderr, "htmlNodeListDump : node == NULL\n");
! 112: return;
! 113: }
! 114: while (cur != NULL) {
! 115: htmlNodeDump(buf, doc, cur);
! 116: cur = cur->next;
! 117: }
! 118: }
! 119:
! 120: /**
! 121: * htmlNodeDump:
! 122: * @buf: the HTML buffer output
! 123: * @doc: the document
! 124: * @cur: the current node
! 125: *
! 126: * Dump an HTML node, recursive behaviour,children are printed too.
! 127: */
! 128: static void
! 129: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
! 130: int i;
! 131: htmlElemDescPtr info;
! 132:
! 133: if (cur == NULL) {
! 134: fprintf(stderr, "htmlNodeDump : node == NULL\n");
! 135: return;
! 136: }
! 137: /*
! 138: * Special cases.
! 139: */
! 140: if (cur->type == HTML_TEXT_NODE) {
! 141: if (cur->content != NULL) {
! 142: CHAR *buffer;
! 143:
! 144: /* uses the HTML encoding routine !!!!!!!!!! */
! 145: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
! 146: if (buffer != NULL) {
! 147: xmlBufferWriteCHAR(buf, buffer);
! 148: free(buffer);
! 149: }
! 150: }
! 151: return;
! 152: }
! 153: if (cur->type == HTML_COMMENT_NODE) {
! 154: if (cur->content != NULL) {
! 155: xmlBufferWriteChar(buf, "<!--");
! 156: xmlBufferWriteCHAR(buf, cur->content);
! 157: xmlBufferWriteChar(buf, "-->");
! 158: }
! 159: return;
! 160: }
! 161: if (cur->type == HTML_ENTITY_REF_NODE) {
! 162: xmlBufferWriteChar(buf, "&");
! 163: xmlBufferWriteCHAR(buf, cur->name);
! 164: xmlBufferWriteChar(buf, ";");
! 165: return;
! 166: }
! 167:
! 168: /*
! 169: * Get specific HTmL info for taht node.
! 170: */
! 171: info = htmlTagLookup(cur->name);
! 172:
! 173: xmlBufferWriteChar(buf, "<");
! 174: xmlBufferWriteCHAR(buf, cur->name);
! 175: if (cur->properties != NULL)
! 176: htmlAttrListDump(buf, doc, cur->properties);
! 177:
! 178: if (info->empty) {
! 179: xmlBufferWriteChar(buf, ">");
! 180: if (cur->next != NULL) {
! 181: if ((cur->next->type != HTML_TEXT_NODE) &&
! 182: (cur->next->type != HTML_ENTITY_REF_NODE))
! 183: xmlBufferWriteChar(buf, "\n");
! 184: }
! 185: return;
! 186: }
! 187: if ((cur->content == NULL) && (cur->childs == NULL)) {
! 188: if (info->endTag != 0)
! 189: xmlBufferWriteChar(buf, ">");
! 190: else {
! 191: xmlBufferWriteChar(buf, "></");
! 192: xmlBufferWriteCHAR(buf, cur->name);
! 193: xmlBufferWriteChar(buf, ">");
! 194: }
! 195: if (cur->next != NULL) {
! 196: if ((cur->next->type != HTML_TEXT_NODE) &&
! 197: (cur->next->type != HTML_ENTITY_REF_NODE))
! 198: xmlBufferWriteChar(buf, "\n");
! 199: }
! 200: return;
! 201: }
! 202: xmlBufferWriteChar(buf, ">");
! 203: if (cur->content != NULL) {
! 204: CHAR *buffer;
! 205:
! 206: buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
! 207: if (buffer != NULL) {
! 208: xmlBufferWriteCHAR(buf, buffer);
! 209: free(buffer);
! 210: }
! 211: }
! 212: if (cur->childs != NULL) {
! 213: if ((cur->childs->type != HTML_TEXT_NODE) &&
! 214: (cur->childs->type != HTML_ENTITY_REF_NODE))
! 215: xmlBufferWriteChar(buf, "\n");
! 216: htmlNodeListDump(buf, doc, cur->childs);
! 217: if ((cur->last->type != HTML_TEXT_NODE) &&
! 218: (cur->last->type != HTML_ENTITY_REF_NODE))
! 219: xmlBufferWriteChar(buf, "\n");
! 220: }
! 221: xmlBufferWriteChar(buf, "</");
! 222: xmlBufferWriteCHAR(buf, cur->name);
! 223: xmlBufferWriteChar(buf, ">");
! 224: if (cur->next != NULL) {
! 225: if ((cur->next->type != HTML_TEXT_NODE) &&
! 226: (cur->next->type != HTML_ENTITY_REF_NODE))
! 227: xmlBufferWriteChar(buf, "\n");
! 228: }
! 229: }
! 230:
! 231: /**
! 232: * htmlDocContentDump:
! 233: * @buf: the HTML buffer output
! 234: * @cur: the document
! 235: *
! 236: * Dump an HTML document.
! 237: */
! 238: static void
! 239: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
! 240: if (cur->intSubset != NULL)
! 241: htmlDtdDump(buf, cur);
! 242: if (cur->root != NULL) {
! 243: htmlNodeDump(buf, cur, cur->root);
! 244: }
! 245: xmlBufferWriteChar(buf, "\n");
! 246: }
! 247:
! 248: /**
! 249: * htmlDocDumpMemory:
! 250: * @cur: the document
! 251: * @mem: OUT: the memory pointer
! 252: * @size: OUT: the memory lenght
! 253: *
! 254: * Dump an HTML document in memory and return the CHAR * and it's size.
! 255: * It's up to the caller to free the memory.
! 256: */
! 257: void
! 258: htmlDocDumpMemory(xmlDocPtr cur, CHAR**mem, int *size) {
! 259: xmlBufferPtr buf;
! 260:
! 261: if (cur == NULL) {
! 262: #ifdef DEBUG_TREE
! 263: fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
! 264: #endif
! 265: *mem = NULL;
! 266: *size = 0;
! 267: return;
! 268: }
! 269: buf = xmlBufferCreate();
! 270: if (buf == NULL) {
! 271: *mem = NULL;
! 272: *size = 0;
! 273: return;
! 274: }
! 275: htmlDocContentDump(buf, cur);
! 276: *mem = buf->content;
! 277: *size = buf->use;
! 278: memset(buf, -1, sizeof(xmlBuffer));
! 279: free(buf);
! 280: }
! 281:
! 282:
! 283: /**
! 284: * htmlDocDump:
! 285: * @f: the FILE*
! 286: * @cur: the document
! 287: *
! 288: * Dump an HTML document to an open FILE.
! 289: */
! 290: void
! 291: htmlDocDump(FILE *f, xmlDocPtr cur) {
! 292: xmlBufferPtr buf;
! 293:
! 294: if (cur == NULL) {
! 295: #ifdef DEBUG_TREE
! 296: fprintf(stderr, "xmlDocDump : document == NULL\n");
! 297: #endif
! 298: return;
! 299: }
! 300: buf = xmlBufferCreate();
! 301: if (buf == NULL) return;
! 302: htmlDocContentDump(buf, cur);
! 303: xmlBufferDump(f, buf);
! 304: xmlBufferFree(buf);
! 305: }
! 306:
! 307: /**
! 308: * htmlSaveFile:
! 309: * @filename: the filename
! 310: * @cur: the document
! 311: *
! 312: * Dump an HTML document to a file.
! 313: *
! 314: * returns: the number of byte written or -1 in case of failure.
! 315: */
! 316: int
! 317: htmlSaveFile(const char *filename, xmlDocPtr cur) {
! 318: xmlBufferPtr buf;
! 319: FILE *output = NULL;
! 320: int ret;
! 321:
! 322: /*
! 323: * save the content to a temp buffer.
! 324: */
! 325: buf = xmlBufferCreate();
! 326: if (buf == NULL) return(0);
! 327: htmlDocContentDump(buf, cur);
! 328:
! 329: output = fopen(filename, "w");
! 330: if (output == NULL) return(-1);
! 331: ret = xmlBufferDump(output, buf);
! 332: fclose(output);
! 333:
! 334: xmlBufferFree(buf);
! 335: return(ret * sizeof(CHAR));
! 336: }
! 337:
Webmaster