Annotation of XML/HTMLtree.c, revision 1.20

1.1       daniel      1: /*
                      2:  * HTMLtree.c : implemetation of access function for an HTML tree.
                      3:  *
                      4:  * See Copyright for the status of this software.
                      5:  *
                      6:  * Daniel.Veillard@w3.org
                      7:  */
                      8: 
1.5       daniel      9: 
1.13      daniel     10: #ifdef WIN32
                     11: #include "win32config.h"
                     12: #else
1.1       daniel     13: #include "config.h"
1.5       daniel     14: #endif
1.18      daniel     15: 
                     16: #include "xmlversion.h"
                     17: #ifdef LIBXML_HTML_ENABLED
                     18: 
1.1       daniel     19: #include <stdio.h>
1.5       daniel     20: #include <string.h> /* for memset() only ! */
                     21: 
                     22: #ifdef HAVE_CTYPE_H
1.1       daniel     23: #include <ctype.h>
1.5       daniel     24: #endif
                     25: #ifdef HAVE_STDLIB_H
1.1       daniel     26: #include <stdlib.h>
1.5       daniel     27: #endif
1.1       daniel     28: 
1.18      daniel     29: #include <libxml/xmlmemory.h>
                     30: #include <libxml/HTMLparser.h>
                     31: #include <libxml/HTMLtree.h>
                     32: #include <libxml/entities.h>
                     33: #include <libxml/valid.h>
1.1       daniel     34: 
1.14      daniel     35: static void
                     36: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
                     37: 
1.1       daniel     38: /**
                     39:  * htmlDtdDump:
                     40:  * @buf:  the HTML buffer output
                     41:  * @doc:  the document
                     42:  * 
                     43:  * Dump the HTML document DTD, if any.
                     44:  */
                     45: static void
                     46: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
                     47:     xmlDtdPtr cur = doc->intSubset;
                     48: 
                     49:     if (cur == NULL) {
                     50:         fprintf(stderr, "htmlDtdDump : no internal subset\n");
                     51:        return;
                     52:     }
                     53:     xmlBufferWriteChar(buf, "<!DOCTYPE ");
                     54:     xmlBufferWriteCHAR(buf, cur->name);
                     55:     if (cur->ExternalID != NULL) {
                     56:        xmlBufferWriteChar(buf, " PUBLIC ");
                     57:        xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2       daniel     58:        if (cur->SystemID != NULL) {
                     59:            xmlBufferWriteChar(buf, " ");
                     60:            xmlBufferWriteQuotedString(buf, cur->SystemID);
                     61:        } 
1.1       daniel     62:     }  else if (cur->SystemID != NULL) {
                     63:        xmlBufferWriteChar(buf, " SYSTEM ");
                     64:        xmlBufferWriteQuotedString(buf, cur->SystemID);
                     65:     }
                     66:     xmlBufferWriteChar(buf, ">\n");
                     67: }
                     68: 
                     69: /**
                     70:  * htmlAttrDump:
                     71:  * @buf:  the HTML buffer output
                     72:  * @doc:  the document
                     73:  * @cur:  the attribute pointer
                     74:  *
                     75:  * Dump an HTML attribute
                     76:  */
                     77: static void
                     78: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6       daniel     79:     xmlChar *value;
1.1       daniel     80: 
                     81:     if (cur == NULL) {
                     82:         fprintf(stderr, "htmlAttrDump : property == NULL\n");
                     83:        return;
                     84:     }
                     85:     xmlBufferWriteChar(buf, " ");
                     86:     xmlBufferWriteCHAR(buf, cur->name);
1.19      daniel     87:     if (cur->children != NULL) {
                     88:        value = xmlNodeListGetString(doc, cur->children, 0);
                     89:        if (value) {
                     90:            xmlBufferWriteChar(buf, "=");
                     91:            xmlBufferWriteQuotedString(buf, value);
                     92:            xmlFree(value);
                     93:        } else  {
                     94:            xmlBufferWriteChar(buf, "=\"\"");
                     95:        }
1.1       daniel     96:     }
                     97: }
                     98: 
                     99: /**
                    100:  * htmlAttrListDump:
                    101:  * @buf:  the HTML buffer output
                    102:  * @doc:  the document
                    103:  * @cur:  the first attribute pointer
                    104:  *
                    105:  * Dump a list of HTML attributes
                    106:  */
                    107: static void
                    108: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
                    109:     if (cur == NULL) {
                    110:         fprintf(stderr, "htmlAttrListDump : property == NULL\n");
                    111:        return;
                    112:     }
                    113:     while (cur != NULL) {
                    114:         htmlAttrDump(buf, doc, cur);
                    115:        cur = cur->next;
                    116:     }
                    117: }
                    118: 
                    119: 
1.14      daniel    120: void
1.1       daniel    121: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
                    122: /**
                    123:  * htmlNodeListDump:
                    124:  * @buf:  the HTML buffer output
                    125:  * @doc:  the document
                    126:  * @cur:  the first node
                    127:  *
                    128:  * Dump an HTML node list, recursive behaviour,children are printed too.
                    129:  */
                    130: static void
                    131: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
                    132:     if (cur == NULL) {
                    133:         fprintf(stderr, "htmlNodeListDump : node == NULL\n");
                    134:        return;
                    135:     }
                    136:     while (cur != NULL) {
                    137:         htmlNodeDump(buf, doc, cur);
                    138:        cur = cur->next;
                    139:     }
                    140: }
                    141: 
                    142: /**
                    143:  * htmlNodeDump:
                    144:  * @buf:  the HTML buffer output
                    145:  * @doc:  the document
                    146:  * @cur:  the current node
                    147:  *
                    148:  * Dump an HTML node, recursive behaviour,children are printed too.
                    149:  */
1.14      daniel    150: void
1.1       daniel    151: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
                    152:     htmlElemDescPtr info;
                    153: 
                    154:     if (cur == NULL) {
                    155:         fprintf(stderr, "htmlNodeDump : node == NULL\n");
                    156:        return;
                    157:     }
                    158:     /*
                    159:      * Special cases.
                    160:      */
1.20    ! daniel    161:     if (cur->type == XML_DTD_NODE)
        !           162:        return;
1.14      daniel    163:     if (cur->type == XML_HTML_DOCUMENT_NODE) {
                    164:        htmlDocContentDump(buf, (xmlDocPtr) cur);
                    165:        return;
                    166:     }
1.1       daniel    167:     if (cur->type == HTML_TEXT_NODE) {
                    168:        if (cur->content != NULL) {
1.6       daniel    169:             xmlChar *buffer;
1.1       daniel    170: 
                    171:            /* uses the HTML encoding routine !!!!!!!!!! */
1.9       daniel    172: #ifndef XML_USE_BUFFER_CONTENT
1.1       daniel    173:             buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9       daniel    174: #else
                    175:             buffer = xmlEncodeEntitiesReentrant(doc, 
                    176:                                                 xmlBufferContent(cur->content));
                    177: #endif 
1.1       daniel    178:            if (buffer != NULL) {
                    179:                xmlBufferWriteCHAR(buf, buffer);
1.4       daniel    180:                xmlFree(buffer);
1.1       daniel    181:            }
                    182:        }
                    183:        return;
                    184:     }
                    185:     if (cur->type == HTML_COMMENT_NODE) {
                    186:        if (cur->content != NULL) {
                    187:            xmlBufferWriteChar(buf, "<!--");
1.9       daniel    188: #ifndef XML_USE_BUFFER_CONTENT
1.1       daniel    189:            xmlBufferWriteCHAR(buf, cur->content);
1.9       daniel    190: #else
                    191:            xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
                    192: #endif
1.1       daniel    193:            xmlBufferWriteChar(buf, "-->");
                    194:        }
                    195:        return;
                    196:     }
                    197:     if (cur->type == HTML_ENTITY_REF_NODE) {
                    198:         xmlBufferWriteChar(buf, "&");
                    199:        xmlBufferWriteCHAR(buf, cur->name);
                    200:         xmlBufferWriteChar(buf, ";");
                    201:        return;
                    202:     }
                    203: 
                    204:     /*
                    205:      * Get specific HTmL info for taht node.
                    206:      */
                    207:     info = htmlTagLookup(cur->name);
                    208: 
                    209:     xmlBufferWriteChar(buf, "<");
                    210:     xmlBufferWriteCHAR(buf, cur->name);
                    211:     if (cur->properties != NULL)
                    212:         htmlAttrListDump(buf, doc, cur->properties);
                    213: 
1.7       daniel    214:     if ((info != NULL) && (info->empty)) {
1.1       daniel    215:         xmlBufferWriteChar(buf, ">");
                    216:        if (cur->next != NULL) {
                    217:            if ((cur->next->type != HTML_TEXT_NODE) &&
                    218:                (cur->next->type != HTML_ENTITY_REF_NODE))
                    219:                xmlBufferWriteChar(buf, "\n");
                    220:        }
                    221:        return;
                    222:     }
1.17      daniel    223:     if ((cur->content == NULL) && (cur->children == NULL)) {
1.7       daniel    224:         if ((info != NULL) && (info->endTag != 0))
1.1       daniel    225:            xmlBufferWriteChar(buf, ">");
                    226:        else {
                    227:            xmlBufferWriteChar(buf, "></");
                    228:            xmlBufferWriteCHAR(buf, cur->name);
                    229:            xmlBufferWriteChar(buf, ">");
                    230:        }
                    231:        if (cur->next != NULL) {
                    232:            if ((cur->next->type != HTML_TEXT_NODE) &&
                    233:                (cur->next->type != HTML_ENTITY_REF_NODE))
                    234:                xmlBufferWriteChar(buf, "\n");
                    235:        }
                    236:        return;
                    237:     }
                    238:     xmlBufferWriteChar(buf, ">");
                    239:     if (cur->content != NULL) {
1.6       daniel    240:        xmlChar *buffer;
1.1       daniel    241: 
1.9       daniel    242: #ifndef XML_USE_BUFFER_CONTENT
                    243:     buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
                    244: #else
                    245:     buffer = xmlEncodeEntitiesReentrant(doc, 
                    246:                                         xmlBufferContent(cur->content));
                    247: #endif
1.1       daniel    248:        if (buffer != NULL) {
                    249:            xmlBufferWriteCHAR(buf, buffer);
1.4       daniel    250:            xmlFree(buffer);
1.1       daniel    251:        }
                    252:     }
1.17      daniel    253:     if (cur->children != NULL) {
                    254:         if ((cur->children->type != HTML_TEXT_NODE) &&
                    255:            (cur->children->type != HTML_ENTITY_REF_NODE) &&
                    256:            (cur->children != cur->last))
1.1       daniel    257:            xmlBufferWriteChar(buf, "\n");
1.17      daniel    258:        htmlNodeListDump(buf, doc, cur->children);
1.1       daniel    259:         if ((cur->last->type != HTML_TEXT_NODE) &&
1.10      daniel    260:            (cur->last->type != HTML_ENTITY_REF_NODE) &&
1.17      daniel    261:            (cur->children != cur->last))
1.1       daniel    262:            xmlBufferWriteChar(buf, "\n");
                    263:     }
1.11      daniel    264:     if (!htmlIsAutoClosed(doc, cur)) {
                    265:        xmlBufferWriteChar(buf, "</");
                    266:        xmlBufferWriteCHAR(buf, cur->name);
                    267:        xmlBufferWriteChar(buf, ">");
                    268:     }
1.1       daniel    269:     if (cur->next != NULL) {
                    270:         if ((cur->next->type != HTML_TEXT_NODE) &&
                    271:            (cur->next->type != HTML_ENTITY_REF_NODE))
                    272:            xmlBufferWriteChar(buf, "\n");
                    273:     }
                    274: }
                    275: 
                    276: /**
1.16      daniel    277:  * htmlNodeDumpFile:
                    278:  * @out:  the FILE pointer
                    279:  * @doc:  the document
                    280:  * @cur:  the current node
                    281:  *
                    282:  * Dump an HTML node, recursive behaviour,children are printed too.
                    283:  */
                    284: void
                    285: htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
                    286:     xmlBufferPtr buf;
                    287: 
                    288:     buf = xmlBufferCreate();
                    289:     if (buf == NULL) return;
                    290:     htmlNodeDump(buf, doc, cur);
                    291:     xmlBufferDump(out, buf);
                    292:     xmlBufferFree(buf);
                    293: }
                    294: 
                    295: /**
1.1       daniel    296:  * htmlDocContentDump:
                    297:  * @buf:  the HTML buffer output
                    298:  * @cur:  the document
                    299:  *
                    300:  * Dump an HTML document.
                    301:  */
                    302: static void
                    303: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12      daniel    304:     int type;
                    305: 
                    306:     /*
                    307:      * force to output the stuff as HTML, especially for entities
                    308:      */
                    309:     type = cur->type;
                    310:     cur->type = XML_HTML_DOCUMENT_NODE;
1.1       daniel    311:     if (cur->intSubset != NULL)
                    312:         htmlDtdDump(buf, cur);
1.11      daniel    313:     else {
                    314:        /* Default to HTML-4.0 transitionnal @@@@ */
                    315:        xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
                    316: 
                    317:     }
1.17      daniel    318:     if (cur->children != NULL) {
                    319:         htmlNodeListDump(buf, cur, cur->children);
1.1       daniel    320:     }
                    321:     xmlBufferWriteChar(buf, "\n");
1.12      daniel    322:     cur->type = type;
1.1       daniel    323: }
                    324: 
                    325: /**
                    326:  * htmlDocDumpMemory:
                    327:  * @cur:  the document
                    328:  * @mem:  OUT: the memory pointer
                    329:  * @size:  OUT: the memory lenght
                    330:  *
1.6       daniel    331:  * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1       daniel    332:  * It's up to the caller to free the memory.
                    333:  */
                    334: void
1.6       daniel    335: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1       daniel    336:     xmlBufferPtr buf;
                    337: 
                    338:     if (cur == NULL) {
                    339: #ifdef DEBUG_TREE
1.15      daniel    340:         fprintf(stderr, "htmlxmlDocDumpMemory : document == NULL\n");
1.1       daniel    341: #endif
                    342:        *mem = NULL;
                    343:        *size = 0;
                    344:        return;
                    345:     }
                    346:     buf = xmlBufferCreate();
                    347:     if (buf == NULL) {
                    348:        *mem = NULL;
                    349:        *size = 0;
                    350:        return;
                    351:     }
                    352:     htmlDocContentDump(buf, cur);
                    353:     *mem = buf->content;
                    354:     *size = buf->use;
                    355:     memset(buf, -1, sizeof(xmlBuffer));
1.4       daniel    356:     xmlFree(buf);
1.1       daniel    357: }
                    358: 
                    359: 
                    360: /**
                    361:  * htmlDocDump:
                    362:  * @f:  the FILE*
                    363:  * @cur:  the document
                    364:  *
                    365:  * Dump an HTML document to an open FILE.
                    366:  */
                    367: void
                    368: htmlDocDump(FILE *f, xmlDocPtr cur) {
                    369:     xmlBufferPtr buf;
                    370: 
                    371:     if (cur == NULL) {
                    372: #ifdef DEBUG_TREE
1.15      daniel    373:         fprintf(stderr, "htmlDocDump : document == NULL\n");
1.1       daniel    374: #endif
                    375:        return;
                    376:     }
                    377:     buf = xmlBufferCreate();
                    378:     if (buf == NULL) return;
                    379:     htmlDocContentDump(buf, cur);
                    380:     xmlBufferDump(f, buf);
                    381:     xmlBufferFree(buf);
                    382: }
                    383: 
                    384: /**
                    385:  * htmlSaveFile:
                    386:  * @filename:  the filename
                    387:  * @cur:  the document
                    388:  *
                    389:  * Dump an HTML document to a file.
                    390:  * 
                    391:  * returns: the number of byte written or -1 in case of failure.
                    392:  */
                    393: int
                    394: htmlSaveFile(const char *filename, xmlDocPtr cur) {
                    395:     xmlBufferPtr buf;
                    396:     FILE *output = NULL;
                    397:     int ret;
                    398: 
                    399:     /* 
                    400:      * save the content to a temp buffer.
                    401:      */
                    402:     buf = xmlBufferCreate();
                    403:     if (buf == NULL) return(0);
                    404:     htmlDocContentDump(buf, cur);
                    405: 
                    406:     output = fopen(filename, "w");
                    407:     if (output == NULL) return(-1);
                    408:     ret = xmlBufferDump(output, buf);
                    409:     fclose(output);
                    410: 
                    411:     xmlBufferFree(buf);
1.6       daniel    412:     return(ret * sizeof(xmlChar));
1.1       daniel    413: }
                    414: 
1.18      daniel    415: #endif /* LIBXML_HTML_ENABLED */

Webmaster