Annotation of XML/xmlIO.c, revision 1.5

1.1       daniel      1: /*
                      2:  * xmlIO.c : implementation of the I/O interfaces used by the parser
                      3:  *
                      4:  * See Copyright for the status of this software.
                      5:  *
                      6:  * Daniel.Veillard@w3.org
                      7:  */
                      8: 
1.4       daniel      9: /*
                     10:  * TODO: plug-in a generic transfer library, like libwww if available
                     11:  */
                     12: 
1.1       daniel     13: #include "config.h"
                     14: 
                     15: #include <sys/types.h>
                     16: #include <sys/stat.h>
                     17: #include <fcntl.h>
                     18: #include <malloc.h>
                     19: #ifdef HAVE_UNISTD_H
                     20: #include <unistd.h>
                     21: #endif
                     22: #ifdef HAVE_ZLIB_H
                     23: #include <zlib.h>
                     24: #endif
1.4       daniel     25: #include <string.h>
1.1       daniel     26: 
1.5     ! daniel     27: #include "parser.h"
1.1       daniel     28: #include "xmlIO.h"
                     29: 
                     30: /* #define DEBUG_INPUT */
1.2       daniel     31: /* #define VERBOSE_FAILURE */
1.5     ! daniel     32: /* #define DEBUG_EXTERNAL_ENTITIES */
1.1       daniel     33: 
                     34: #ifdef DEBUG_INPUT
                     35: #define MINLEN 40
                     36: #else
                     37: #define MINLEN 4000
                     38: #endif
                     39: 
                     40: /**
                     41:  * xmlAllocParserInputBuffer:
                     42:  * @enc:  the charset encoding if known
                     43:  *
                     44:  * Create a buffered parser input for progressive parsing
                     45:  *
                     46:  * Returns the new parser input or NULL
                     47:  */
                     48: xmlParserInputBufferPtr
                     49: xmlAllocParserInputBuffer(xmlCharEncoding enc) {
                     50:     xmlParserInputBufferPtr ret;
                     51: 
                     52:     ret = (xmlParserInputBufferPtr) malloc(sizeof(xmlParserInputBuffer));
                     53:     if (ret == NULL) {
                     54:         fprintf(stderr, "xmlAllocParserInputBuffer : out of memory!\n");
                     55:        return(NULL);
                     56:     }
1.3       veillard   57:     memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
1.1       daniel     58:     ret->buffer = xmlBufferCreate();
                     59:     ret->encoder = xmlGetCharEncodingHandler(enc);
                     60:     ret->fd = -1;
                     61: 
                     62:     return(ret);
                     63: }
                     64: 
                     65: /**
                     66:  * xmlFreeParserInputBuffer:
                     67:  * @in:  a buffered parser input
                     68:  *
                     69:  * Free up the memory used by a buffered parser input
                     70:  */
                     71: void
                     72: xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
                     73:     if (in->buffer != NULL) {
                     74:         xmlBufferFree(in->buffer);
                     75:        in->buffer = NULL;
                     76:     }
                     77: #ifdef HAVE_ZLIB_H
                     78:     if (in->gzfile != NULL)
                     79:         gzclose(in->gzfile);
                     80: #endif
                     81:     if (in->fd >= 0)
                     82:         close(in->fd);
1.3       veillard   83:     memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
1.1       daniel     84:     free(in);
                     85: }
                     86: 
                     87: /**
                     88:  * xmlParserInputBufferCreateFilename:
                     89:  * @filename:  a C string containing the filename
                     90:  * @enc:  the charset encoding if known
                     91:  *
                     92:  * Create a buffered parser input for the progressive parsing of a file
                     93:  * If filename is "-' then we use stdin as the input.
                     94:  * Automatic support for ZLIB/Compress compressed document is provided
                     95:  * by default if found at compile-time.
                     96:  *
                     97:  * Returns the new parser input or NULL
                     98:  */
                     99: xmlParserInputBufferPtr
                    100: xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
                    101:     xmlParserInputBufferPtr ret;
                    102: #ifdef HAVE_ZLIB_H
                    103:     gzFile input;
                    104: #else
                    105:     int input = -1;
                    106: #endif
                    107: 
                    108:     if (filename == NULL) return(NULL);
                    109: 
                    110:     if (!strcmp(filename, "-")) {
                    111: #ifdef HAVE_ZLIB_H
                    112:         input = gzdopen (fileno(stdin), "r");
                    113:         if (input == NULL) {
1.2       daniel    114: #ifdef VERBOSE_FAILURE
1.1       daniel    115:             fprintf (stderr, "Cannot read from stdin\n");
                    116:             perror ("gzdopen failed");
1.2       daniel    117: #endif
1.1       daniel    118:             return(NULL);
                    119:        }
                    120: #else
                    121: #ifdef WIN32
                    122:         input = -1;
                    123: #else
                    124:         input = fileno(stdin);
                    125: #endif
                    126:         if (input < 0) {
1.2       daniel    127: #ifdef VERBOSE_FAILURE
1.1       daniel    128:             fprintf (stderr, "Cannot read from stdin\n");
                    129:             perror ("open failed");
1.2       daniel    130: #endif
                    131:            return(NULL);
1.1       daniel    132:        }
                    133: #endif
                    134:     } else {
                    135: #ifdef HAVE_ZLIB_H
                    136:        input = gzopen (filename, "r");
                    137:        if (input == NULL) {
1.2       daniel    138: #ifdef VERBOSE_FAILURE
1.1       daniel    139:            fprintf (stderr, "Cannot read file %s :\n", filename);
                    140:            perror ("gzopen failed");
1.2       daniel    141: #endif
1.1       daniel    142:            return(NULL);
                    143:        }
                    144: #else
                    145: #ifdef WIN32
                    146:        input = _open (filename, O_RDONLY | _O_BINARY);
                    147: #else
                    148:        input = open (filename, O_RDONLY);
                    149: #endif
                    150:        if (input < 0) {
1.2       daniel    151: #ifdef VERBOSE_FAILURE
1.1       daniel    152:            fprintf (stderr, "Cannot read file %s :\n", filename);
                    153:            perror ("open failed");
1.2       daniel    154: #endif
1.1       daniel    155:            return(NULL);
                    156:        }
                    157: #endif
                    158:     }
                    159:     /* 
                    160:      * TODO : get the 4 first bytes and 
                    161:      * if enc == XML_CHAR_ENCODING_NONE
                    162:      * plug some encoding conversion routines here. !!!
                    163:      * enc = xmlDetectCharEncoding(buffer);
                    164:      */
                    165: 
                    166:     ret = xmlAllocParserInputBuffer(enc);
                    167:     if (ret != NULL) {
                    168: #ifdef HAVE_ZLIB_H
                    169:         ret->gzfile = input;
                    170: #else
                    171:         ret->fd = input;
                    172: #endif
                    173:     }
                    174:     xmlParserInputBufferRead(ret, 4);
                    175: 
                    176:     return(ret);
                    177: }
                    178: 
                    179: /**
                    180:  * xmlParserInputBufferCreateFile:
                    181:  * @file:  a FILE* 
                    182:  * @enc:  the charset encoding if known
                    183:  *
                    184:  * Create a buffered parser input for the progressive parsing of a FILE *
                    185:  * buffered C I/O
                    186:  *
                    187:  * Returns the new parser input or NULL
                    188:  */
                    189: xmlParserInputBufferPtr
                    190: xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) {
                    191:     xmlParserInputBufferPtr ret;
                    192: 
                    193:     if (file == NULL) return(NULL);
                    194: 
                    195:     ret = xmlAllocParserInputBuffer(enc);
                    196:     if (ret != NULL)
                    197:         ret->file = file;
                    198: 
                    199:     return(ret);
                    200: }
                    201: 
                    202: /**
                    203:  * xmlParserInputBufferCreateFd:
                    204:  * @fd:  a file descriptor number
                    205:  * @enc:  the charset encoding if known
                    206:  *
                    207:  * Create a buffered parser input for the progressive parsing for the input
                    208:  * from a file descriptor
                    209:  *
                    210:  * Returns the new parser input or NULL
                    211:  */
                    212: xmlParserInputBufferPtr
                    213: xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
                    214:     xmlParserInputBufferPtr ret;
                    215: 
                    216:     if (fd < 0) return(NULL);
                    217: 
                    218:     ret = xmlAllocParserInputBuffer(enc);
                    219:     if (ret != NULL)
                    220:         ret->fd = fd;
                    221: 
                    222:     return(ret);
                    223: }
                    224: 
                    225: /**
                    226:  * xmlParserInputBufferGrow:
                    227:  * @in:  a buffered parser input
                    228:  * @len:  indicative value of the amount of chars to read
                    229:  *
                    230:  * Grow up the content of the input buffer, the old data are preserved
                    231:  * This routine handle the I18N transcoding to internal UTF-8
                    232:  * TODO: one should be able to remove one copy
                    233:  *
                    234:  * Returns the number of chars read and stored in the buffer, or -1
                    235:  *         in case of error.
                    236:  */
                    237: int
                    238: xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
                    239:     char *buffer = NULL;
                    240: #ifdef HAVE_ZLIB_H
                    241:     gzFile input = (gzFile) in->gzfile;
                    242: #endif
                    243:     int res = 0;
                    244:     int nbchars = 0;
                    245:     int buffree;
                    246: 
                    247:     if ((len <= MINLEN) && (len != 4)) 
                    248:         len = MINLEN;
                    249:     buffree = in->buffer->size - in->buffer->use;
                    250:     if (buffree <= 0) {
                    251:         fprintf(stderr, "xmlParserInputBufferGrow : buffer full !\n");
                    252:        return(0);
                    253:     }
                    254:     if (len > buffree) 
                    255:         len = buffree;
                    256: 
                    257:     buffer = malloc((len + 1) * sizeof(char));
                    258:     if (buffer == NULL) {
                    259:         fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
                    260:        return(-1);
                    261:     }
                    262:     if (in->file != NULL) {
                    263:        res = fread(&buffer[0], 1, len, in->file);
                    264: #ifdef HAVE_ZLIB_H
                    265:     } else if (in->gzfile != NULL) {
                    266:        res = gzread(input, &buffer[0], len);
                    267: #endif
                    268:     } else if (in->fd >= 0) {
                    269:        res = read(in->fd, &buffer[0], len);
                    270:     } else {
                    271:         fprintf(stderr, "xmlParserInputBufferGrow : no input !\n");
                    272:        free(buffer);
                    273:        return(-1);
                    274:     }
                    275:     if (res == 0) {
                    276:        free(buffer);
                    277:         return(0);
                    278:     }
                    279:     if (res < 0) {
                    280:        perror ("read error");
                    281:        free(buffer);
                    282:        return(-1);
                    283:     }
                    284:     if (in->encoder != NULL) {
                    285:         CHAR *buf;
                    286: 
                    287:        buf = (CHAR *) malloc((res + 1) * 2 * sizeof(CHAR));
                    288:        if (buf == NULL) {
                    289:            fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
                    290:            free(buffer);
                    291:            return(-1);
                    292:        }
                    293:        nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(CHAR),
                    294:                                     buffer, res);
1.3       veillard  295:         buf[nbchars] = 0;
1.1       daniel    296:         xmlBufferAdd(in->buffer, (CHAR *) buf, nbchars);
                    297:        free(buf);
                    298:     } else {
                    299:        nbchars = res;
1.3       veillard  300:         buffer[nbchars] = 0;
1.1       daniel    301:         xmlBufferAdd(in->buffer, (CHAR *) buffer, nbchars);
                    302:     }
                    303: #ifdef DEBUG_INPUT
                    304:     fprintf(stderr, "I/O: read %d chars, buffer %d/%d\n",
                    305:             nbchars, in->buffer->use, in->buffer->size);
                    306: #endif
                    307:     free(buffer);
                    308:     return(nbchars);
                    309: }
                    310: 
                    311: /**
                    312:  * xmlParserInputBufferRead:
                    313:  * @in:  a buffered parser input
                    314:  * @len:  indicative value of the amount of chars to read
                    315:  *
                    316:  * Refresh the content of the input buffer, the old data are considered
                    317:  * consumed
                    318:  * This routine handle the I18N transcoding to internal UTF-8
                    319:  *
                    320:  * Returns the number of chars read and stored in the buffer, or -1
                    321:  *         in case of error.
                    322:  */
                    323: int
                    324: xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
                    325:     /* xmlBufferEmpty(in->buffer); */
                    326:     return(xmlParserInputBufferGrow(in, len));
1.4       daniel    327: }
                    328: 
                    329: /*
                    330:  * xmlParserGetDirectory:
                    331:  * @filename:  the path to a file
                    332:  *
                    333:  * lookup the directory for that file
                    334:  *
                    335:  * Returns a new allocated string containing the directory, or NULL.
                    336:  */
                    337: char *
                    338: xmlParserGetDirectory(const char *filename) {
                    339:     char *ret = NULL;
                    340:     char dir[1024];
                    341:     char *cur;
                    342:     char sep = '/';
                    343: 
                    344:     if (filename == NULL) return(NULL);
                    345: #ifdef WIN32
                    346:     sep = '\\';
                    347: #endif
                    348: 
                    349:     strncpy(dir, filename, 1023);
                    350:     dir[1023] = 0;
                    351:     cur = &dir[strlen(dir)];
                    352:     while (cur > dir) {
                    353:          if (*cur == sep) break;
                    354:         cur --;
                    355:     }
                    356:     if (*cur == sep) {
                    357:         if (cur == dir) dir[1] = 0;
                    358:        else *cur = 0;
                    359:        ret = strdup(dir);
                    360:     } else {
                    361:         if (getcwd(dir, 1024) != NULL) {
                    362:            dir[1023] = 0;
                    363:            ret = strdup(dir);
                    364:        }
                    365:     }
                    366:     return(ret);
1.5     ! daniel    367: }
        !           368: 
        !           369: /****************************************************************
        !           370:  *                                                             *
        !           371:  *             External entities loading                       *
        !           372:  *                                                             *
        !           373:  ****************************************************************/
        !           374: 
        !           375: /*
        !           376:  * xmlDefaultExternalEntityLoader:
        !           377:  * @URL:  the URL for the entity to load
        !           378:  * @ID:  the System ID for the entity to load
        !           379:  * @context:  the context in which the entity is called or NULL
        !           380:  *
        !           381:  * By default we don't load external entitites, yet.
        !           382:  * TODO: get a sample http implementation and scan for existing one
        !           383:  *       at compile time.
        !           384:  *
        !           385:  * Returns a new allocated xmlParserInputPtr, or NULL.
        !           386:  */
        !           387: static
        !           388: xmlParserInputPtr
        !           389: xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
        !           390:                                xmlParserInputPtr context) {
        !           391: #ifdef DEBUG_EXTERNAL_ENTITIES
        !           392:     fprintf(stderr, "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
        !           393: #endif
        !           394:     return(NULL);
        !           395: }
        !           396: 
        !           397: static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =
        !           398:        xmlDefaultExternalEntityLoader;
        !           399: 
        !           400: /*
        !           401:  * xmlSetExternalEntityLoader:
        !           402:  * @f:  the new entity resolver function
        !           403:  *
        !           404:  * Changes the defaultexternal entity resolver function for the application
        !           405:  */
        !           406: void
        !           407: xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
        !           408:     xmlCurrentExternalEntityLoader = f;
        !           409: }
        !           410: 
        !           411: /*
        !           412:  * xmlGetExternalEntityLoader:
        !           413:  *
        !           414:  * Get the default external entity resolver function for the application
        !           415:  *
        !           416:  * Returns the xmlExternalEntityLoader function pointer
        !           417:  */
        !           418: xmlExternalEntityLoader
        !           419: xmlGetExternalEntityLoader(void) {
        !           420:     return(xmlCurrentExternalEntityLoader);
        !           421: }
        !           422: 
        !           423: /*
        !           424:  * xmlLoadExternalEntity:
        !           425:  * @URL:  the URL for the entity to load
        !           426:  * @ID:  the System ID for the entity to load
        !           427:  * @context:  the context in which the entity is called or NULL
        !           428:  *
        !           429:  * Load an external entity, note that the use of this function for
        !           430:  * unparsed entities may generate problems
        !           431:  * TODO: a more generic API must be designed
        !           432:  *
        !           433:  * Returns the xmlParserInputPtr or NULL
        !           434:  */
        !           435: xmlParserInputPtr
        !           436: xmlLoadExternalEntity(const char *URL, const char *ID,
        !           437:                       xmlParserInputPtr context) {
        !           438:     return(xmlCurrentExternalEntityLoader(URL, ID, context));
1.1       daniel    439: }
                    440: 

Webmaster