Annotation of XML/HTMLtree.c, revision 1.14

1.1       daniel      1: /*
                      2:  * HTMLtree.c : implemetation of access function for an HTML tree.
                      3:  *
                      4:  * See Copyright for the status of this software.
                      5:  *
                      6:  * Daniel.Veillard@w3.org
                      7:  */
                      8: 
1.5       daniel      9: 
1.13      daniel     10: #ifdef WIN32
                     11: #include "win32config.h"
                     12: #else
1.1       daniel     13: #include "config.h"
1.5       daniel     14: #endif
1.1       daniel     15: #include <stdio.h>
1.5       daniel     16: #include <string.h> /* for memset() only ! */
                     17: 
                     18: #ifdef HAVE_CTYPE_H
1.1       daniel     19: #include <ctype.h>
1.5       daniel     20: #endif
                     21: #ifdef HAVE_STDLIB_H
1.1       daniel     22: #include <stdlib.h>
1.5       daniel     23: #endif
1.1       daniel     24: 
1.4       daniel     25: #include "xmlmemory.h"
1.1       daniel     26: #include "HTMLparser.h"
                     27: #include "HTMLtree.h"
                     28: #include "entities.h"
                     29: #include "valid.h"
                     30: 
1.14    ! daniel     31: static void
        !            32: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur);
        !            33: 
1.1       daniel     34: /**
                     35:  * htmlDtdDump:
                     36:  * @buf:  the HTML buffer output
                     37:  * @doc:  the document
                     38:  * 
                     39:  * Dump the HTML document DTD, if any.
                     40:  */
                     41: static void
                     42: htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) {
                     43:     xmlDtdPtr cur = doc->intSubset;
                     44: 
                     45:     if (cur == NULL) {
                     46:         fprintf(stderr, "htmlDtdDump : no internal subset\n");
                     47:        return;
                     48:     }
                     49:     xmlBufferWriteChar(buf, "<!DOCTYPE ");
                     50:     xmlBufferWriteCHAR(buf, cur->name);
                     51:     if (cur->ExternalID != NULL) {
                     52:        xmlBufferWriteChar(buf, " PUBLIC ");
                     53:        xmlBufferWriteQuotedString(buf, cur->ExternalID);
1.2       daniel     54:        if (cur->SystemID != NULL) {
                     55:            xmlBufferWriteChar(buf, " ");
                     56:            xmlBufferWriteQuotedString(buf, cur->SystemID);
                     57:        } 
1.1       daniel     58:     }  else if (cur->SystemID != NULL) {
                     59:        xmlBufferWriteChar(buf, " SYSTEM ");
                     60:        xmlBufferWriteQuotedString(buf, cur->SystemID);
                     61:     }
                     62:     xmlBufferWriteChar(buf, ">\n");
                     63: }
                     64: 
                     65: /**
                     66:  * htmlAttrDump:
                     67:  * @buf:  the HTML buffer output
                     68:  * @doc:  the document
                     69:  * @cur:  the attribute pointer
                     70:  *
                     71:  * Dump an HTML attribute
                     72:  */
                     73: static void
                     74: htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
1.6       daniel     75:     xmlChar *value;
1.1       daniel     76: 
                     77:     if (cur == NULL) {
                     78:         fprintf(stderr, "htmlAttrDump : property == NULL\n");
                     79:        return;
                     80:     }
                     81:     xmlBufferWriteChar(buf, " ");
                     82:     xmlBufferWriteCHAR(buf, cur->name);
                     83:     value = xmlNodeListGetString(doc, cur->val, 0);
                     84:     if (value) {
                     85:        xmlBufferWriteChar(buf, "=");
                     86:        xmlBufferWriteQuotedString(buf, value);
1.4       daniel     87:        xmlFree(value);
1.1       daniel     88:     } else  {
                     89:        xmlBufferWriteChar(buf, "=\"\"");
                     90:     }
                     91: }
                     92: 
                     93: /**
                     94:  * htmlAttrListDump:
                     95:  * @buf:  the HTML buffer output
                     96:  * @doc:  the document
                     97:  * @cur:  the first attribute pointer
                     98:  *
                     99:  * Dump a list of HTML attributes
                    100:  */
                    101: static void
                    102: htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
                    103:     if (cur == NULL) {
                    104:         fprintf(stderr, "htmlAttrListDump : property == NULL\n");
                    105:        return;
                    106:     }
                    107:     while (cur != NULL) {
                    108:         htmlAttrDump(buf, doc, cur);
                    109:        cur = cur->next;
                    110:     }
                    111: }
                    112: 
                    113: 
1.14    ! daniel    114: void
1.1       daniel    115: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur);
                    116: /**
                    117:  * htmlNodeListDump:
                    118:  * @buf:  the HTML buffer output
                    119:  * @doc:  the document
                    120:  * @cur:  the first node
                    121:  *
                    122:  * Dump an HTML node list, recursive behaviour,children are printed too.
                    123:  */
                    124: static void
                    125: htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
                    126:     if (cur == NULL) {
                    127:         fprintf(stderr, "htmlNodeListDump : node == NULL\n");
                    128:        return;
                    129:     }
                    130:     while (cur != NULL) {
                    131:         htmlNodeDump(buf, doc, cur);
                    132:        cur = cur->next;
                    133:     }
                    134: }
                    135: 
                    136: /**
                    137:  * htmlNodeDump:
                    138:  * @buf:  the HTML buffer output
                    139:  * @doc:  the document
                    140:  * @cur:  the current node
                    141:  *
                    142:  * Dump an HTML node, recursive behaviour,children are printed too.
                    143:  */
1.14    ! daniel    144: void
1.1       daniel    145: htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
                    146:     htmlElemDescPtr info;
                    147: 
                    148:     if (cur == NULL) {
                    149:         fprintf(stderr, "htmlNodeDump : node == NULL\n");
                    150:        return;
                    151:     }
                    152:     /*
                    153:      * Special cases.
                    154:      */
1.14    ! daniel    155:     if (cur->type == XML_HTML_DOCUMENT_NODE) {
        !           156:        htmlDocContentDump(buf, (xmlDocPtr) cur);
        !           157:        return;
        !           158:     }
1.1       daniel    159:     if (cur->type == HTML_TEXT_NODE) {
                    160:        if (cur->content != NULL) {
1.6       daniel    161:             xmlChar *buffer;
1.1       daniel    162: 
                    163:            /* uses the HTML encoding routine !!!!!!!!!! */
1.9       daniel    164: #ifndef XML_USE_BUFFER_CONTENT
1.1       daniel    165:             buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
1.9       daniel    166: #else
                    167:             buffer = xmlEncodeEntitiesReentrant(doc, 
                    168:                                                 xmlBufferContent(cur->content));
                    169: #endif 
1.1       daniel    170:            if (buffer != NULL) {
                    171:                xmlBufferWriteCHAR(buf, buffer);
1.4       daniel    172:                xmlFree(buffer);
1.1       daniel    173:            }
                    174:        }
                    175:        return;
                    176:     }
                    177:     if (cur->type == HTML_COMMENT_NODE) {
                    178:        if (cur->content != NULL) {
                    179:            xmlBufferWriteChar(buf, "<!--");
1.9       daniel    180: #ifndef XML_USE_BUFFER_CONTENT
1.1       daniel    181:            xmlBufferWriteCHAR(buf, cur->content);
1.9       daniel    182: #else
                    183:            xmlBufferWriteCHAR(buf, xmlBufferContent(cur->content));
                    184: #endif
1.1       daniel    185:            xmlBufferWriteChar(buf, "-->");
                    186:        }
                    187:        return;
                    188:     }
                    189:     if (cur->type == HTML_ENTITY_REF_NODE) {
                    190:         xmlBufferWriteChar(buf, "&");
                    191:        xmlBufferWriteCHAR(buf, cur->name);
                    192:         xmlBufferWriteChar(buf, ";");
                    193:        return;
                    194:     }
                    195: 
                    196:     /*
                    197:      * Get specific HTmL info for taht node.
                    198:      */
                    199:     info = htmlTagLookup(cur->name);
                    200: 
                    201:     xmlBufferWriteChar(buf, "<");
                    202:     xmlBufferWriteCHAR(buf, cur->name);
                    203:     if (cur->properties != NULL)
                    204:         htmlAttrListDump(buf, doc, cur->properties);
                    205: 
1.7       daniel    206:     if ((info != NULL) && (info->empty)) {
1.1       daniel    207:         xmlBufferWriteChar(buf, ">");
                    208:        if (cur->next != NULL) {
                    209:            if ((cur->next->type != HTML_TEXT_NODE) &&
                    210:                (cur->next->type != HTML_ENTITY_REF_NODE))
                    211:                xmlBufferWriteChar(buf, "\n");
                    212:        }
                    213:        return;
                    214:     }
                    215:     if ((cur->content == NULL) && (cur->childs == NULL)) {
1.7       daniel    216:         if ((info != NULL) && (info->endTag != 0))
1.1       daniel    217:            xmlBufferWriteChar(buf, ">");
                    218:        else {
                    219:            xmlBufferWriteChar(buf, "></");
                    220:            xmlBufferWriteCHAR(buf, cur->name);
                    221:            xmlBufferWriteChar(buf, ">");
                    222:        }
                    223:        if (cur->next != NULL) {
                    224:            if ((cur->next->type != HTML_TEXT_NODE) &&
                    225:                (cur->next->type != HTML_ENTITY_REF_NODE))
                    226:                xmlBufferWriteChar(buf, "\n");
                    227:        }
                    228:        return;
                    229:     }
                    230:     xmlBufferWriteChar(buf, ">");
                    231:     if (cur->content != NULL) {
1.6       daniel    232:        xmlChar *buffer;
1.1       daniel    233: 
1.9       daniel    234: #ifndef XML_USE_BUFFER_CONTENT
                    235:     buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
                    236: #else
                    237:     buffer = xmlEncodeEntitiesReentrant(doc, 
                    238:                                         xmlBufferContent(cur->content));
                    239: #endif
1.1       daniel    240:        if (buffer != NULL) {
                    241:            xmlBufferWriteCHAR(buf, buffer);
1.4       daniel    242:            xmlFree(buffer);
1.1       daniel    243:        }
                    244:     }
                    245:     if (cur->childs != NULL) {
                    246:         if ((cur->childs->type != HTML_TEXT_NODE) &&
1.10      daniel    247:            (cur->childs->type != HTML_ENTITY_REF_NODE) &&
                    248:            (cur->childs != cur->last))
1.1       daniel    249:            xmlBufferWriteChar(buf, "\n");
                    250:        htmlNodeListDump(buf, doc, cur->childs);
                    251:         if ((cur->last->type != HTML_TEXT_NODE) &&
1.10      daniel    252:            (cur->last->type != HTML_ENTITY_REF_NODE) &&
                    253:            (cur->childs != cur->last))
1.1       daniel    254:            xmlBufferWriteChar(buf, "\n");
                    255:     }
1.11      daniel    256:     if (!htmlIsAutoClosed(doc, cur)) {
                    257:        xmlBufferWriteChar(buf, "</");
                    258:        xmlBufferWriteCHAR(buf, cur->name);
                    259:        xmlBufferWriteChar(buf, ">");
                    260:     }
1.1       daniel    261:     if (cur->next != NULL) {
                    262:         if ((cur->next->type != HTML_TEXT_NODE) &&
                    263:            (cur->next->type != HTML_ENTITY_REF_NODE))
                    264:            xmlBufferWriteChar(buf, "\n");
                    265:     }
                    266: }
                    267: 
                    268: /**
                    269:  * htmlDocContentDump:
                    270:  * @buf:  the HTML buffer output
                    271:  * @cur:  the document
                    272:  *
                    273:  * Dump an HTML document.
                    274:  */
                    275: static void
                    276: htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
1.12      daniel    277:     int type;
                    278: 
                    279:     /*
                    280:      * force to output the stuff as HTML, especially for entities
                    281:      */
                    282:     type = cur->type;
                    283:     cur->type = XML_HTML_DOCUMENT_NODE;
1.1       daniel    284:     if (cur->intSubset != NULL)
                    285:         htmlDtdDump(buf, cur);
1.11      daniel    286:     else {
                    287:        /* Default to HTML-4.0 transitionnal @@@@ */
                    288:        xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");
                    289: 
                    290:     }
1.1       daniel    291:     if (cur->root != NULL) {
1.8       daniel    292:         htmlNodeListDump(buf, cur, cur->root);
1.1       daniel    293:     }
                    294:     xmlBufferWriteChar(buf, "\n");
1.12      daniel    295:     cur->type = type;
1.1       daniel    296: }
                    297: 
                    298: /**
                    299:  * htmlDocDumpMemory:
                    300:  * @cur:  the document
                    301:  * @mem:  OUT: the memory pointer
                    302:  * @size:  OUT: the memory lenght
                    303:  *
1.6       daniel    304:  * Dump an HTML document in memory and return the xmlChar * and it's size.
1.1       daniel    305:  * It's up to the caller to free the memory.
                    306:  */
                    307: void
1.6       daniel    308: htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
1.1       daniel    309:     xmlBufferPtr buf;
                    310: 
                    311:     if (cur == NULL) {
                    312: #ifdef DEBUG_TREE
                    313:         fprintf(stderr, "xmlDocDumpMemory : document == NULL\n");
                    314: #endif
                    315:        *mem = NULL;
                    316:        *size = 0;
                    317:        return;
                    318:     }
                    319:     buf = xmlBufferCreate();
                    320:     if (buf == NULL) {
                    321:        *mem = NULL;
                    322:        *size = 0;
                    323:        return;
                    324:     }
                    325:     htmlDocContentDump(buf, cur);
                    326:     *mem = buf->content;
                    327:     *size = buf->use;
                    328:     memset(buf, -1, sizeof(xmlBuffer));
1.4       daniel    329:     xmlFree(buf);
1.1       daniel    330: }
                    331: 
                    332: 
                    333: /**
                    334:  * htmlDocDump:
                    335:  * @f:  the FILE*
                    336:  * @cur:  the document
                    337:  *
                    338:  * Dump an HTML document to an open FILE.
                    339:  */
                    340: void
                    341: htmlDocDump(FILE *f, xmlDocPtr cur) {
                    342:     xmlBufferPtr buf;
                    343: 
                    344:     if (cur == NULL) {
                    345: #ifdef DEBUG_TREE
                    346:         fprintf(stderr, "xmlDocDump : document == NULL\n");
                    347: #endif
                    348:        return;
                    349:     }
                    350:     buf = xmlBufferCreate();
                    351:     if (buf == NULL) return;
                    352:     htmlDocContentDump(buf, cur);
                    353:     xmlBufferDump(f, buf);
                    354:     xmlBufferFree(buf);
                    355: }
                    356: 
                    357: /**
                    358:  * htmlSaveFile:
                    359:  * @filename:  the filename
                    360:  * @cur:  the document
                    361:  *
                    362:  * Dump an HTML document to a file.
                    363:  * 
                    364:  * returns: the number of byte written or -1 in case of failure.
                    365:  */
                    366: int
                    367: htmlSaveFile(const char *filename, xmlDocPtr cur) {
                    368:     xmlBufferPtr buf;
                    369:     FILE *output = NULL;
                    370:     int ret;
                    371: 
                    372:     /* 
                    373:      * save the content to a temp buffer.
                    374:      */
                    375:     buf = xmlBufferCreate();
                    376:     if (buf == NULL) return(0);
                    377:     htmlDocContentDump(buf, cur);
                    378: 
                    379:     output = fopen(filename, "w");
                    380:     if (output == NULL) return(-1);
                    381:     ret = xmlBufferDump(output, buf);
                    382:     fclose(output);
                    383: 
                    384:     xmlBufferFree(buf);
1.6       daniel    385:     return(ret * sizeof(xmlChar));
1.1       daniel    386: }
                    387: 

Webmaster