Annotation of XML/HTMLtree.c, revision 1.12

1.1       daniel      1: /*
                      2:  * HTMLtree.c : implemetation of access function for an HTML tree.
                      3:  *
                      4:  * See Copyright for the status of this software.
                      5:  *
                      6:  * Daniel.Veillard@w3.org
                      7:  */
                      8: 
1.5       daniel      9: 
                     10: #ifndef WIN32
1.1       daniel     11: #include "config.h"
1.5       daniel     12: #endif
1.1       daniel     13: #include <stdio.h>
1.5       daniel     14: #include <string.h> /* for memset() only ! */
                     15: 
                     16: #ifdef HAVE_CTYPE_H
1.1       daniel     17: #include <ctype.h>
1.5       daniel     18: #endif
                     19: #ifdef HAVE_STDLIB_H
1.1       daniel     20: #include <stdlib.h>
1.5       daniel     21: #endif
1.1       daniel     22: 
1.4       daniel     23: #include "xmlmemory.h"
1.1       daniel     24: #include "HTMLparser.h"
                     25: #include "HTMLtree.h"
                     26: #include "entities.h"
                     27: #include "valid.h"
                     28: 
                     29: /**
                     30:  * htmlDtdDump:
                     31:  * @buf:  the HTML buffer output
                     32:  * @doc:  the document
                     33:  * 
                     34:  * Dump the HTML document DTD, if any.
                     35:  */
                     36: static void
                     37: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
                     38:     xmlDtdPtr cur = doc->intSubset;
                     39: 
                     40:     if (cur == NULL) {
                     41:         fprintf(stderr, "htmlDtdDump : no internal subset\n");
                     42:        return;
                     43:     }
                     44:     xmlBufferWriteChar(buf, "<!DOCTYPE ");
                     45:     xmlBufferWriteCHAR(buf, cur->name);
                     46:     if (cur->ExternalID != NULL) {
                     47:        xmlBufferWriteChar(buf, " PUBLIC ");
                     48:        xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2       daniel     49:        if (cur->SystemID != NULL) {
                     50:            xmlBufferWriteChar(buf, " ");
                     51:            xmlBufferWriteQuotedString(buf, cur->SystemID);
                     52:        } 
1.1       daniel     53:     }  else if (cur->SystemID != NULL) {
                     54:        xmlBufferWriteChar(buf, " SYSTEM ");
                     55:        xmlBufferWriteQuotedString(buf, cur->SystemID);
                     56:     }
                     57:     xmlBufferWriteChar(buf, ">\n");
                     58: }
                     59: 
                     60: /**
                     61:  * htmlAttrDump:
                     62:  * @buf:  the HTML buffer output
                     63:  * @doc:  the document
                     64:  * @cur:  the attribute pointer
                     65:  *
                     66:  * Dump an HTML attribute
                     67:  */
                     68: static void
                     69: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6       daniel     70:     xmlChar *value;
1.1       daniel     71: 
                     72:     if (cur == NULL) {
                     73:         fprintf(stderr, "htmlAttrDump : property == NULL\n");
                     74:        return;
                     75:     }
                     76:     xmlBufferWriteChar(buf, " ");
                     77:     xmlBufferWriteCHAR(buf, cur->name);
                     78:     value = xmlNodeListGetString(doc, cur->val, 0);
                     79:     if (value) {
                     80:        xmlBufferWriteChar(buf, "=");
                     81:        xmlBufferWriteQuotedString(buf, value);
1.4       daniel     82:        xmlFree(value);
1.1       daniel     83:     } else  {
                     84:        xmlBufferWriteChar(buf, "=\"\"");
                     85:     }
                     86: }
                     87: 
                     88: /**
                     89:  * htmlAttrListDump:
                     90:  * @buf:  the HTML buffer output
                     91:  * @doc:  the document
                     92:  * @cur:  the first attribute pointer
                     93:  *
                     94:  * Dump a list of HTML attributes
                     95:  */
                     96: static void
                     97: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
                     98:     if (cur == NULL) {
                     99:         fprintf(stderr, "htmlAttrListDump : property == NULL\n");
                    100:        return;
                    101:     }
                    102:     while (cur != NULL) {
                    103:         htmlAttrDump(buf, doc, cur);
                    104:        cur = cur->next;
                    105:     }
                    106: }
                    107: 
                    108: 
                    109: static void
                    110: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
                    111: /**
                    112:  * htmlNodeListDump:
                    113:  * @buf:  the HTML buffer output
                    114:  * @doc:  the document
                    115:  * @cur:  the first node
                    116:  *
                    117:  * Dump an HTML node list, recursive behaviour,children are printed too.
                    118:  */
                    119: static void
                    120: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
                    121:     if (cur == NULL) {
                    122:         fprintf(stderr, "htmlNodeListDump : node == NULL\n");
                    123:        return;
                    124:     }
                    125:     while (cur != NULL) {
                    126:         htmlNodeDump(buf, doc, cur);
                    127:        cur = cur->next;
                    128:     }
                    129: }
                    130: 
                    131: /**
                    132:  * htmlNodeDump:
                    133:  * @buf:  the HTML buffer output
                    134:  * @doc:  the document
                    135:  * @cur:  the current node
                    136:  *
                    137:  * Dump an HTML node, recursive behaviour,children are printed too.
                    138:  */
                    139: static void
                    140: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
                    141:     htmlElemDescPtr info;
                    142: 
                    143:     if (cur == NULL) {
                    144:         fprintf(stderr, "htmlNodeDump : node == NULL\n");
                    145:        return;
                    146:     }
                    147:     /*
                    148:      * Special cases.
                    149:      */
                    150:     if (cur->type == HTML_TEXT_NODE) {
                    151:        if (cur->content != NULL) {
1.6       daniel    152:             xmlChar *buffer;
1.1       daniel    153: 
                    154:            /* uses the HTML encoding routine !!!!!!!!!! */
1.9       daniel    155: #ifndef XML_USE_BUFFER_CONTENT
1.1       daniel    156:             buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9       daniel    157: #else
                    158:             buffer = xmlEncodeEntitiesReentrant(doc, 
                    159:                                                 xmlBufferContent(cur->content));
                    160: #endif 
1.1       daniel    161:            if (buffer != NULL) {
                    162:                xmlBufferWriteCHAR(buf, buffer);
1.4       daniel    163:                xmlFree(buffer);
1.1       daniel    164:            }
                    165:        }
                    166:        return;
                    167:     }
                    168:     if (cur->type == HTML_COMMENT_NODE) {
                    169:        if (cur->content != NULL) {
                    170:            xmlBufferWriteChar(buf, "<!--");
1.9       daniel    171: #ifndef XML_USE_BUFFER_CONTENT
1.1       daniel    172:            xmlBufferWriteCHAR(buf, cur->content);
1.9       daniel    173: #else
                    174:            xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
                    175: #endif
1.1       daniel    176:            xmlBufferWriteChar(buf, "-->");
                    177:        }
                    178:        return;
                    179:     }
                    180:     if (cur->type == HTML_ENTITY_REF_NODE) {
                    181:         xmlBufferWriteChar(buf, "&");
                    182:        xmlBufferWriteCHAR(buf, cur->name);
                    183:         xmlBufferWriteChar(buf, ";");
                    184:        return;
                    185:     }
                    186: 
                    187:     /*
                    188:      * Get specific HTmL info for taht node.
                    189:      */
                    190:     info = htmlTagLookup(cur->name);
                    191: 
                    192:     xmlBufferWriteChar(buf, "<");
                    193:     xmlBufferWriteCHAR(buf, cur->name);
                    194:     if (cur->properties != NULL)
                    195:         htmlAttrListDump(buf, doc, cur->properties);
                    196: 
1.7       daniel    197:     if ((info != NULL) && (info->empty)) {
1.1       daniel    198:         xmlBufferWriteChar(buf, ">");
                    199:        if (cur->next != NULL) {
                    200:            if ((cur->next->type != HTML_TEXT_NODE) &&
                    201:                (cur->next->type != HTML_ENTITY_REF_NODE))
                    202:                xmlBufferWriteChar(buf, "\n");
                    203:        }
                    204:        return;
                    205:     }
                    206:     if ((cur->content == NULL) && (cur->childs == NULL)) {
1.7       daniel    207:         if ((info != NULL) && (info->endTag != 0))
1.1       daniel    208:            xmlBufferWriteChar(buf, ">");
                    209:        else {
                    210:            xmlBufferWriteChar(buf, "></");
                    211:            xmlBufferWriteCHAR(buf, cur->name);
                    212:            xmlBufferWriteChar(buf, ">");
                    213:        }
                    214:        if (cur->next != NULL) {
                    215:            if ((cur->next->type != HTML_TEXT_NODE) &&
                    216:                (cur->next->type != HTML_ENTITY_REF_NODE))
                    217:                xmlBufferWriteChar(buf, "\n");
                    218:        }
                    219:        return;
                    220:     }
                    221:     xmlBufferWriteChar(buf, ">");
                    222:     if (cur->content != NULL) {
1.6       daniel    223:        xmlChar *buffer;
1.1       daniel    224: 
1.9       daniel    225: #ifndef XML_USE_BUFFER_CONTENT
                    226:     buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
                    227: #else
                    228:     buffer = xmlEncodeEntitiesReentrant(doc, 
                    229:                                         xmlBufferContent(cur->content));
                    230: #endif
1.1       daniel    231:        if (buffer != NULL) {
                    232:            xmlBufferWriteCHAR(buf, buffer);
1.4       daniel    233:            xmlFree(buffer);
1.1       daniel    234:        }
                    235:     }
                    236:     if (cur->childs != NULL) {
                    237:         if ((cur->childs->type != HTML_TEXT_NODE) &&
1.10      daniel    238:            (cur->childs->type != HTML_ENTITY_REF_NODE) &&
                    239:            (cur->childs != cur->last))
1.1       daniel    240:            xmlBufferWriteChar(buf, "\n");
                    241:        htmlNodeListDump(buf, doc, cur->childs);
                    242:         if ((cur->last->type != HTML_TEXT_NODE) &&
1.10      daniel    243:            (cur->last->type != HTML_ENTITY_REF_NODE) &&
                    244:            (cur->childs != cur->last))
1.1       daniel    245:            xmlBufferWriteChar(buf, "\n");
                    246:     }
1.11      daniel    247:     if (!htmlIsAutoClosed(doc, cur)) {
                    248:        xmlBufferWriteChar(buf, "</");
                    249:        xmlBufferWriteCHAR(buf, cur->name);
                    250:        xmlBufferWriteChar(buf, ">");
                    251:     }
1.1       daniel    252:     if (cur->next != NULL) {
                    253:         if ((cur->next->type != HTML_TEXT_NODE) &&
                    254:            (cur->next->type != HTML_ENTITY_REF_NODE))
                    255:            xmlBufferWriteChar(buf, "\n");
                    256:     }
                    257: }
                    258: 
                    259: /**
                    260:  * htmlDocContentDump:
                    261:  * @buf:  the HTML buffer output
                    262:  * @cur:  the document
                    263:  *
                    264:  * Dump an HTML document.
                    265:  */
                    266: static void
                    267: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12    ! daniel    268:     int type;
        !           269: 
        !           270:     /*
        !           271:      * force to output the stuff as HTML, especially for entities
        !           272:      */
        !           273:     type = cur->type;
        !           274:     cur->type = XML_HTML_DOCUMENT_NODE;
1.1       daniel    275:     if (cur->intSubset != NULL)
                    276:         htmlDtdDump(buf, cur);
1.11      daniel    277:     else {
                    278:        /* Default to HTML-4.0 transitionnal @@@@ */
                    279:        xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
                    280: 
                    281:     }
1.1       daniel    282:     if (cur->root != NULL) {
1.8       daniel    283:         htmlNodeListDump(buf, cur, cur->root);
1.1       daniel    284:     }
                    285:     xmlBufferWriteChar(buf, "\n");
1.12    ! daniel    286:     cur->type = type;
1.1       daniel    287: }
                    288: 
                    289: /**
                    290:  * htmlDocDumpMemory:
                    291:  * @cur:  the document
                    292:  * @mem:  OUT: the memory pointer
                    293:  * @size:  OUT: the memory lenght
                    294:  *
1.6       daniel    295:  * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1       daniel    296:  * It's up to the caller to free the memory.
                    297:  */
                    298: void
1.6       daniel    299: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1       daniel    300:     xmlBufferPtr buf;
                    301: 
                    302:     if (cur == NULL) {
                    303: #ifdef DEBUG_TREE
                    304:         fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
                    305: #endif
                    306:        *mem = NULL;
                    307:        *size = 0;
                    308:        return;
                    309:     }
                    310:     buf = xmlBufferCreate();
                    311:     if (buf == NULL) {
                    312:        *mem = NULL;
                    313:        *size = 0;
                    314:        return;
                    315:     }
                    316:     htmlDocContentDump(buf, cur);
                    317:     *mem = buf->content;
                    318:     *size = buf->use;
                    319:     memset(buf, -1, sizeof(xmlBuffer));
1.4       daniel    320:     xmlFree(buf);
1.1       daniel    321: }
                    322: 
                    323: 
                    324: /**
                    325:  * htmlDocDump:
                    326:  * @f:  the FILE*
                    327:  * @cur:  the document
                    328:  *
                    329:  * Dump an HTML document to an open FILE.
                    330:  */
                    331: void
                    332: htmlDocDump(FILE *f, xmlDocPtr cur) {
                    333:     xmlBufferPtr buf;
                    334: 
                    335:     if (cur == NULL) {
                    336: #ifdef DEBUG_TREE
                    337:         fprintf(stderr, "xmlDocDump : document == NULL\n");
                    338: #endif
                    339:        return;
                    340:     }
                    341:     buf = xmlBufferCreate();
                    342:     if (buf == NULL) return;
                    343:     htmlDocContentDump(buf, cur);
                    344:     xmlBufferDump(f, buf);
                    345:     xmlBufferFree(buf);
                    346: }
                    347: 
                    348: /**
                    349:  * htmlSaveFile:
                    350:  * @filename:  the filename
                    351:  * @cur:  the document
                    352:  *
                    353:  * Dump an HTML document to a file.
                    354:  * 
                    355:  * returns: the number of byte written or -1 in case of failure.
                    356:  */
                    357: int
                    358: htmlSaveFile(const char *filename, xmlDocPtr cur) {
                    359:     xmlBufferPtr buf;
                    360:     FILE *output = NULL;
                    361:     int ret;
                    362: 
                    363:     /* 
                    364:      * save the content to a temp buffer.
                    365:      */
                    366:     buf = xmlBufferCreate();
                    367:     if (buf == NULL) return(0);
                    368:     htmlDocContentDump(buf, cur);
                    369: 
                    370:     output = fopen(filename, "w");
                    371:     if (output == NULL) return(-1);
                    372:     ret = xmlBufferDump(output, buf);
                    373:     fclose(output);
                    374: 
                    375:     xmlBufferFree(buf);
1.6       daniel    376:     return(ret * sizeof(xmlChar));
1.1       daniel    377: }
                    378: 

Webmaster