/* * HTMLtree.c : implemetation of access function for an HTML tree. * * See Copyright for the status of this software. * * Daniel.Veillard@w3.org */ #ifdef WIN32 #include "win32config.h" #else #include "config.h" #endif #include #ifdef LIBXML_HTML_ENABLED #include #include /* for memset() only ! */ #ifdef HAVE_CTYPE_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #include #include #include #include #include #include /************************************************************************ * * * Getting/Setting encoding meta tags * * * ************************************************************************/ /** * htmlGetMetaEncoding: * @doc: the document * * Encoding definition lookup in the Meta tags * * Returns the current encoding as flagged in the HTML source */ const xmlChar * htmlGetMetaEncoding(htmlDocPtr doc) { htmlNodePtr cur; const xmlChar *content; const xmlChar *encoding; if (doc == NULL) return(NULL); cur = doc->children; /* * Search the html */ while (cur != NULL) { if (cur->name != NULL) { if (xmlStrEqual(cur->name, BAD_CAST"html")) break; if (xmlStrEqual(cur->name, BAD_CAST"head")) goto found_head; if (xmlStrEqual(cur->name, BAD_CAST"meta")) goto found_meta; } cur = cur->next; } if (cur == NULL) return(NULL); cur = cur->children; /* * Search the head */ while (cur != NULL) { if (cur->name != NULL) { if (xmlStrEqual(cur->name, BAD_CAST"head")) break; if (xmlStrEqual(cur->name, BAD_CAST"meta")) goto found_meta; } cur = cur->next; } if (cur == NULL) return(NULL); found_head: cur = cur->children; /* * Search the meta elements */ found_meta: while (cur != NULL) { if (cur->name != NULL) { if (xmlStrEqual(cur->name, BAD_CAST"meta")) { xmlAttrPtr attr = cur->properties; int http; const xmlChar *value; content = NULL; http = 0; while (attr != NULL) { if ((attr->children != NULL) && (attr->children->type == XML_TEXT_NODE) && (attr->children->next == NULL)) { #ifndef XML_USE_BUFFER_CONTENT value = attr->children->content; #else value = xmlBufferContent(attr->children->content); #endif if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) http = 1; else if ((value != NULL) && (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) content = value; if ((http != 0) && (content != NULL)) goto found_content; } attr = attr->next; } } } cur = cur->next; } return(NULL); found_content: encoding = xmlStrstr(content, BAD_CAST"charset="); if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"Charset="); if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"CHARSET="); if (encoding != NULL) { encoding += 8; } else { encoding = xmlStrstr(content, BAD_CAST"charset ="); if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"Charset ="); if (encoding == NULL) encoding = xmlStrstr(content, BAD_CAST"CHARSET ="); if (encoding != NULL) encoding += 9; } if (encoding != NULL) { while ((*encoding == ' ') || (*encoding == '\t')) encoding++; } return(encoding); } /** * htmlSetMetaEncoding: * @doc: the document * @encoding: the encoding string * * Sets the current encoding in the Meta tags * NOTE: this will not change the document content encoding, just * the META flag associated. * * Returns 0 in case of success and -1 in case of error */ int htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) { htmlNodePtr cur, meta; const xmlChar *content; char newcontent[100]; if (doc == NULL) return(-1); if (encoding != NULL) { #ifdef HAVE_SNPRINTF snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s", encoding); #else sprintf(newcontent, "text/html; charset=%s", encoding); #endif newcontent[sizeof(newcontent) - 1] = 0; } cur = doc->children; /* * Search the html */ while (cur != NULL) { if (cur->name != NULL) { if (xmlStrEqual(cur->name, BAD_CAST"html")) break; if (xmlStrEqual(cur->name, BAD_CAST"body")) { if (encoding == NULL) return(0); meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL); xmlAddPrevSibling(cur, meta); cur = meta; meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); xmlAddChild(cur, meta); xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); return(0); } if (xmlStrEqual(cur->name, BAD_CAST"head")) goto found_head; if (xmlStrEqual(cur->name, BAD_CAST"meta")) goto found_meta; } cur = cur->next; } if (cur == NULL) return(-1); cur = cur->children; /* * Search the head */ while (cur != NULL) { if (cur->name != NULL) { if (xmlStrEqual(cur->name, BAD_CAST"head")) break; if (xmlStrEqual(cur->name, BAD_CAST"body")) { if (encoding == NULL) return(0); meta = xmlNewDocNode(doc, NULL, BAD_CAST"head", NULL); xmlAddPrevSibling(cur, meta); cur = meta; meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); xmlAddChild(cur, meta); xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); return(0); } if (xmlStrEqual(cur->name, BAD_CAST"meta")) goto found_meta; } cur = cur->next; } if (cur == NULL) return(-1); found_head: if (cur->children == NULL) { if (encoding == NULL) return(0); meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); xmlAddChild(cur, meta); xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); return(0); } cur = cur->children; found_meta: if (encoding != NULL) { /* * Create a new Meta element with the right aatributes */ meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL); xmlAddPrevSibling(cur, meta); xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type"); xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent); } /* * Search and destroy all the remaining the meta elements carrying * encoding informations */ while (cur != NULL) { if (cur->name != NULL) { if (xmlStrEqual(cur->name, BAD_CAST"meta")) { xmlAttrPtr attr = cur->properties; int http; const xmlChar *value; content = NULL; http = 0; while (attr != NULL) { if ((attr->children != NULL) && (attr->children->type == XML_TEXT_NODE) && (attr->children->next == NULL)) { #ifndef XML_USE_BUFFER_CONTENT value = attr->children->content; #else value = xmlBufferContent(attr->children->content); #endif if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv")) && (!xmlStrcasecmp(value, BAD_CAST"Content-Type"))) http = 1; else if ((value != NULL) && (!xmlStrcasecmp(attr->name, BAD_CAST"content"))) content = value; if ((http != 0) && (content != NULL)) break; } attr = attr->next; } if ((http != 0) && (content != NULL)) { meta = cur; cur = cur->next; xmlUnlinkNode(meta); xmlFreeNode(meta); continue; } } } cur = cur->next; } return(0); } /************************************************************************ * * * Dumping HTML tree content to a simple buffer * * * ************************************************************************/ static void htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur); /** * htmlDtdDump: * @buf: the HTML buffer output * @doc: the document * * Dump the HTML document DTD, if any. */ static void htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) { xmlDtdPtr cur = doc->intSubset; if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlDtdDump : no internal subset\n"); return; } xmlBufferWriteChar(buf, "name); if (cur->ExternalID != NULL) { xmlBufferWriteChar(buf, " PUBLIC "); xmlBufferWriteQuotedString(buf, cur->ExternalID); if (cur->SystemID != NULL) { xmlBufferWriteChar(buf, " "); xmlBufferWriteQuotedString(buf, cur->SystemID); } } else if (cur->SystemID != NULL) { xmlBufferWriteChar(buf, " SYSTEM "); xmlBufferWriteQuotedString(buf, cur->SystemID); } xmlBufferWriteChar(buf, ">\n"); } /** * htmlAttrDump: * @buf: the HTML buffer output * @doc: the document * @cur: the attribute pointer * * Dump an HTML attribute */ static void htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { xmlChar *value; if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlAttrDump : property == NULL\n"); return; } xmlBufferWriteChar(buf, " "); xmlBufferWriteCHAR(buf, cur->name); if (cur->children != NULL) { value = xmlNodeListGetString(doc, cur->children, 0); if (value) { xmlBufferWriteChar(buf, "="); xmlBufferWriteQuotedString(buf, value); xmlFree(value); } else { xmlBufferWriteChar(buf, "=\"\""); } } } /** * htmlAttrListDump: * @buf: the HTML buffer output * @doc: the document * @cur: the first attribute pointer * * Dump a list of HTML attributes */ static void htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlAttrListDump : property == NULL\n"); return; } while (cur != NULL) { htmlAttrDump(buf, doc, cur); cur = cur->next; } } void htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur); /** * htmlNodeListDump: * @buf: the HTML buffer output * @doc: the document * @cur: the first node * * Dump an HTML node list, recursive behaviour,children are printed too. */ static void htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlNodeListDump : node == NULL\n"); return; } while (cur != NULL) { htmlNodeDump(buf, doc, cur); cur = cur->next; } } /** * htmlNodeDump: * @buf: the HTML buffer output * @doc: the document * @cur: the current node * * Dump an HTML node, recursive behaviour,children are printed too. */ void htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { htmlElemDescPtr info; if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlNodeDump : node == NULL\n"); return; } /* * Special cases. */ if (cur->type == XML_DTD_NODE) return; if (cur->type == XML_HTML_DOCUMENT_NODE) { htmlDocContentDump(buf, (xmlDocPtr) cur); return; } if (cur->type == HTML_TEXT_NODE) { if (cur->content != NULL) { xmlChar *buffer; #ifndef XML_USE_BUFFER_CONTENT buffer = xmlEncodeEntitiesReentrant(doc, cur->content); #else buffer = xmlEncodeEntitiesReentrant(doc, xmlBufferContent(cur->content)); #endif if (buffer != NULL) { xmlBufferWriteCHAR(buf, buffer); xmlFree(buffer); } } return; } if (cur->type == HTML_COMMENT_NODE) { if (cur->content != NULL) { xmlBufferWriteChar(buf, ""); } return; } if (cur->type == HTML_ENTITY_REF_NODE) { xmlBufferWriteChar(buf, "&"); xmlBufferWriteCHAR(buf, cur->name); xmlBufferWriteChar(buf, ";"); return; } /* * Get specific HTmL info for taht node. */ info = htmlTagLookup(cur->name); xmlBufferWriteChar(buf, "<"); xmlBufferWriteCHAR(buf, cur->name); if (cur->properties != NULL) htmlAttrListDump(buf, doc, cur->properties); if ((info != NULL) && (info->empty)) { xmlBufferWriteChar(buf, ">"); if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE)) xmlBufferWriteChar(buf, "\n"); } return; } if ((cur->content == NULL) && (cur->children == NULL)) { if ((info != NULL) && (info->endTag != 0)) xmlBufferWriteChar(buf, ">"); else { xmlBufferWriteChar(buf, ">name); xmlBufferWriteChar(buf, ">"); } if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE)) xmlBufferWriteChar(buf, "\n"); } return; } xmlBufferWriteChar(buf, ">"); if (cur->content != NULL) { xmlChar *buffer; #ifndef XML_USE_BUFFER_CONTENT buffer = xmlEncodeEntitiesReentrant(doc, cur->content); #else buffer = xmlEncodeEntitiesReentrant(doc, xmlBufferContent(cur->content)); #endif if (buffer != NULL) { xmlBufferWriteCHAR(buf, buffer); xmlFree(buffer); } } if (cur->children != NULL) { if ((cur->children->type != HTML_TEXT_NODE) && (cur->children->type != HTML_ENTITY_REF_NODE) && (cur->children != cur->last)) xmlBufferWriteChar(buf, "\n"); htmlNodeListDump(buf, doc, cur->children); if ((cur->last->type != HTML_TEXT_NODE) && (cur->last->type != HTML_ENTITY_REF_NODE) && (cur->children != cur->last)) xmlBufferWriteChar(buf, "\n"); } if (!htmlIsAutoClosed(doc, cur)) { xmlBufferWriteChar(buf, "name); xmlBufferWriteChar(buf, ">"); } if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE)) xmlBufferWriteChar(buf, "\n"); } } /** * htmlNodeDumpFile: * @out: the FILE pointer * @doc: the document * @cur: the current node * * Dump an HTML node, recursive behaviour,children are printed too. */ void htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { xmlBufferPtr buf; buf = xmlBufferCreate(); if (buf == NULL) return; htmlNodeDump(buf, doc, cur); xmlBufferDump(out, buf); xmlBufferFree(buf); } /** * htmlDocContentDump: * @buf: the HTML buffer output * @cur: the document * * Dump an HTML document. */ static void htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) { int type; /* * force to output the stuff as HTML, especially for entities */ type = cur->type; cur->type = XML_HTML_DOCUMENT_NODE; if (cur->intSubset != NULL) htmlDtdDump(buf, cur); else { /* Default to HTML-4.0 transitionnal @@@@ */ xmlBufferWriteChar(buf, ""); } if (cur->children != NULL) { htmlNodeListDump(buf, cur, cur->children); } xmlBufferWriteChar(buf, "\n"); cur->type = (xmlElementType) type; } /** * htmlDocDumpMemory: * @cur: the document * @mem: OUT: the memory pointer * @size: OUT: the memory lenght * * Dump an HTML document in memory and return the xmlChar * and it's size. * It's up to the caller to free the memory. */ void htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { xmlBufferPtr buf; if (cur == NULL) { #ifdef DEBUG_TREE xmlGenericError(xmlGenericErrorContext, "htmlxmlDocDumpMemory : document == NULL\n"); #endif *mem = NULL; *size = 0; return; } buf = xmlBufferCreate(); if (buf == NULL) { *mem = NULL; *size = 0; return; } htmlDocContentDump(buf, cur); *mem = buf->content; *size = buf->use; memset(buf, -1, sizeof(xmlBuffer)); xmlFree(buf); } /************************************************************************ * * * Dumping HTML tree content to an I/O output buffer * * * ************************************************************************/ static void htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding); /** * htmlDtdDump: * @buf: the HTML buffer output * @doc: the document * * Dump the HTML document DTD, if any. */ static void htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, const char *encoding) { xmlDtdPtr cur = doc->intSubset; if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlDtdDump : no internal subset\n"); return; } xmlOutputBufferWriteString(buf, "name); if (cur->ExternalID != NULL) { xmlOutputBufferWriteString(buf, " PUBLIC "); xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID); if (cur->SystemID != NULL) { xmlOutputBufferWriteString(buf, " "); xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); } } else if (cur->SystemID != NULL) { xmlOutputBufferWriteString(buf, " SYSTEM "); xmlBufferWriteQuotedString(buf->buffer, cur->SystemID); } xmlOutputBufferWriteString(buf, ">\n"); } /** * htmlAttrDump: * @buf: the HTML buffer output * @doc: the document * @cur: the attribute pointer * * Dump an HTML attribute */ static void htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { xmlChar *value; if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlAttrDump : property == NULL\n"); return; } xmlOutputBufferWriteString(buf, " "); xmlOutputBufferWriteString(buf, (const char *)cur->name); if (cur->children != NULL) { value = xmlNodeListGetString(doc, cur->children, 0); if (value) { xmlOutputBufferWriteString(buf, "="); xmlBufferWriteQuotedString(buf->buffer, value); xmlFree(value); } else { xmlOutputBufferWriteString(buf, "=\"\""); } } } /** * htmlAttrListDump: * @buf: the HTML buffer output * @doc: the document * @cur: the first attribute pointer * * Dump a list of HTML attributes */ static void htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) { if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlAttrListDump : property == NULL\n"); return; } while (cur != NULL) { htmlAttrDumpOutput(buf, doc, cur, encoding); cur = cur->next; } } void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding); /** * htmlNodeListDump: * @buf: the HTML buffer output * @doc: the document * @cur: the first node * * Dump an HTML node list, recursive behaviour,children are printed too. */ static void htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) { if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlNodeListDump : node == NULL\n"); return; } while (cur != NULL) { htmlNodeDumpOutput(buf, doc, cur, encoding); cur = cur->next; } } /** * htmlNodeDump: * @buf: the HTML buffer output * @doc: the document * @cur: the current node * * Dump an HTML node, recursive behaviour,children are printed too. */ void htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const char *encoding) { htmlElemDescPtr info; if (cur == NULL) { xmlGenericError(xmlGenericErrorContext, "htmlNodeDump : node == NULL\n"); return; } /* * Special cases. */ if (cur->type == XML_DTD_NODE) return; if (cur->type == XML_HTML_DOCUMENT_NODE) { htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding); return; } if (cur->type == HTML_TEXT_NODE) { if (cur->content != NULL) { xmlChar *buffer; #ifndef XML_USE_BUFFER_CONTENT buffer = xmlEncodeEntitiesReentrant(doc, cur->content); #else buffer = xmlEncodeEntitiesReentrant(doc, xmlBufferContent(cur->content)); #endif if (buffer != NULL) { xmlOutputBufferWriteString(buf, (const char *)buffer); xmlFree(buffer); } } return; } if (cur->type == HTML_COMMENT_NODE) { if (cur->content != NULL) { xmlOutputBufferWriteString(buf, ""); } return; } if (cur->type == HTML_ENTITY_REF_NODE) { xmlOutputBufferWriteString(buf, "&"); xmlOutputBufferWriteString(buf, (const char *)cur->name); xmlOutputBufferWriteString(buf, ";"); return; } if (cur->type == HTML_PRESERVE_NODE) { if (cur->content != NULL) { #ifndef XML_USE_BUFFER_CONTENT xmlOutputBufferWriteString(buf, (const char *)cur->content); #else xmlOutputBufferWriteString(buf, xmlBufferContent(cur->content)); #endif } return; } /* * Get specific HTmL info for taht node. */ info = htmlTagLookup(cur->name); xmlOutputBufferWriteString(buf, "<"); xmlOutputBufferWriteString(buf, (const char *)cur->name); if (cur->properties != NULL) htmlAttrListDumpOutput(buf, doc, cur->properties, encoding); if ((info != NULL) && (info->empty)) { xmlOutputBufferWriteString(buf, ">"); if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE)) xmlOutputBufferWriteString(buf, "\n"); } return; } if ((cur->content == NULL) && (cur->children == NULL)) { if ((info != NULL) && (info->endTag != 0) && (strcmp(info->name, "html")) && (strcmp(info->name, "body"))) { xmlOutputBufferWriteString(buf, ">"); } else { xmlOutputBufferWriteString(buf, ">name); xmlOutputBufferWriteString(buf, ">"); } if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE)) xmlOutputBufferWriteString(buf, "\n"); } return; } xmlOutputBufferWriteString(buf, ">"); if (cur->content != NULL) { #if 0 xmlChar *buffer; #ifndef XML_USE_BUFFER_CONTENT buffer = xmlEncodeEntitiesReentrant(doc, cur->content); #else buffer = xmlEncodeEntitiesReentrant(doc, xmlBufferContent(cur->content)); #endif if (buffer != NULL) { xmlOutputBufferWriteString(buf, buffer); xmlFree(buffer); } #else /* * Uses the OutputBuffer property to automatically convert * invalids to charrefs */ #ifndef XML_USE_BUFFER_CONTENT xmlOutputBufferWriteString(buf, (const char *) cur->content); #else xmlOutputBufferWriteString(buf, (const char *) xmlBufferContent(cur->content)); #endif #endif } if (cur->children != NULL) { if ((cur->children->type != HTML_TEXT_NODE) && (cur->children->type != HTML_ENTITY_REF_NODE) && (cur->children != cur->last)) xmlOutputBufferWriteString(buf, "\n"); htmlNodeListDumpOutput(buf, doc, cur->children, encoding); if ((cur->last->type != HTML_TEXT_NODE) && (cur->last->type != HTML_ENTITY_REF_NODE) && (cur->children != cur->last)) xmlOutputBufferWriteString(buf, "\n"); } if (!htmlIsAutoClosed(doc, cur)) { xmlOutputBufferWriteString(buf, "name); xmlOutputBufferWriteString(buf, ">"); } if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && (cur->next->type != HTML_ENTITY_REF_NODE)) xmlOutputBufferWriteString(buf, "\n"); } } /** * htmlDocContentDump: * @buf: the HTML buffer output * @cur: the document * * Dump an HTML document. */ static void htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, const char *encoding) { int type; /* * force to output the stuff as HTML, especially for entities */ type = cur->type; cur->type = XML_HTML_DOCUMENT_NODE; if (cur->intSubset != NULL) htmlDtdDumpOutput(buf, cur, NULL); else { /* Default to HTML-4.0 transitionnal @@@@ */ xmlOutputBufferWriteString(buf, ""); } if (cur->children != NULL) { htmlNodeListDumpOutput(buf, cur, cur->children, encoding); } xmlOutputBufferWriteString(buf, "\n"); cur->type = (xmlElementType) type; } /************************************************************************ * * * Saving functions front-ends * * * ************************************************************************/ /** * htmlDocDump: * @f: the FILE* * @cur: the document * * Dump an HTML document to an open FILE. * * returns: the number of byte written or -1 in case of failure. */ int htmlDocDump(FILE *f, xmlDocPtr cur) { xmlOutputBufferPtr buf; xmlCharEncodingHandlerPtr handler = NULL; const char *encoding; int ret; if (cur == NULL) { #ifdef DEBUG_TREE xmlGenericError(xmlGenericErrorContext, "htmlDocDump : document == NULL\n"); #endif return(-1); } encoding = (const char *) htmlGetMetaEncoding(cur); if (encoding != NULL) { xmlCharEncoding enc; enc = xmlParseCharEncoding(encoding); if (enc != cur->charset) { if (cur->charset != XML_CHAR_ENCODING_UTF8) { /* * Not supported yet */ return(-1); } handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) return(-1); } } /* * Fallback to HTML or ASCII when the encoding is unspecified */ if (handler == NULL) handler = xmlFindCharEncodingHandler("HTML"); if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); buf = xmlOutputBufferCreateFile(f, handler); if (buf == NULL) return(-1); htmlDocContentDumpOutput(buf, cur, NULL); ret = xmlOutputBufferClose(buf); return(ret); } /** * htmlSaveFile: * @filename: the filename (or URL) * @cur: the document * * Dump an HTML document to a file. If @filename is "-" the stdout file is * used. * returns: the number of byte written or -1 in case of failure. */ int htmlSaveFile(const char *filename, xmlDocPtr cur) { xmlOutputBufferPtr buf; xmlCharEncodingHandlerPtr handler = NULL; const char *encoding; int ret; encoding = (const char *) htmlGetMetaEncoding(cur); if (encoding != NULL) { xmlCharEncoding enc; enc = xmlParseCharEncoding(encoding); if (enc != cur->charset) { if (cur->charset != XML_CHAR_ENCODING_UTF8) { /* * Not supported yet */ return(-1); } handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) return(-1); } } /* * Fallback to HTML or ASCII when the encoding is unspecified */ if (handler == NULL) handler = xmlFindCharEncodingHandler("HTML"); if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression); if (buf == NULL) return(0); htmlDocContentDumpOutput(buf, cur, NULL); ret = xmlOutputBufferClose(buf); return(ret); } /** * htmlSaveFileEnc: * @filename: the filename * @cur: the document * * Dump an HTML document to a file using a given encoding. * * returns: the number of byte written or -1 in case of failure. */ int htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { xmlOutputBufferPtr buf; xmlCharEncodingHandlerPtr handler = NULL; int ret; if (encoding != NULL) { xmlCharEncoding enc; enc = xmlParseCharEncoding(encoding); if (enc != cur->charset) { if (cur->charset != XML_CHAR_ENCODING_UTF8) { /* * Not supported yet */ return(-1); } handler = xmlFindCharEncodingHandler(encoding); if (handler == NULL) return(-1); htmlSetMetaEncoding(cur, (const xmlChar *) encoding); } } /* * Fallback to HTML or ASCII when the encoding is unspecified */ if (handler == NULL) handler = xmlFindCharEncodingHandler("HTML"); if (handler == NULL) handler = xmlFindCharEncodingHandler("ascii"); /* * save the content to a temp buffer. */ buf = xmlOutputBufferCreateFilename(filename, handler, 0); if (buf == NULL) return(0); htmlDocContentDumpOutput(buf, cur, encoding); ret = xmlOutputBufferClose(buf); return(ret); } #endif /* LIBXML_HTML_ENABLED */