Annotation of XML/xmlIO.c, revision 1.3

1.1       daniel      1: /*
                      2:  * xmlIO.c : implementation of the I/O interfaces used by the parser
                      3:  *
                      4:  * See Copyright for the status of this software.
                      5:  *
                      6:  * Daniel.Veillard@w3.org
                      7:  */
                      8: 
                      9: #include "config.h"
                     10: 
                     11: #include <sys/types.h>
                     12: #include <sys/stat.h>
                     13: #include <fcntl.h>
                     14: #include <malloc.h>
                     15: #ifdef HAVE_UNISTD_H
                     16: #include <unistd.h>
                     17: #endif
                     18: #ifdef HAVE_ZLIB_H
                     19: #include <zlib.h>
                     20: #endif
                     21: 
                     22: #include "xmlIO.h"
                     23: 
                     24: /* #define DEBUG_INPUT */
1.2       daniel     25: /* #define VERBOSE_FAILURE */
1.1       daniel     26: 
                     27: #ifdef DEBUG_INPUT
                     28: #define MINLEN 40
                     29: #else
                     30: #define MINLEN 4000
                     31: #endif
                     32: 
                     33: /**
                     34:  * xmlAllocParserInputBuffer:
                     35:  * @enc:  the charset encoding if known
                     36:  *
                     37:  * Create a buffered parser input for progressive parsing
                     38:  *
                     39:  * Returns the new parser input or NULL
                     40:  */
                     41: xmlParserInputBufferPtr
                     42: xmlAllocParserInputBuffer(xmlCharEncoding enc) {
                     43:     xmlParserInputBufferPtr ret;
                     44: 
                     45:     ret = (xmlParserInputBufferPtr) malloc(sizeof(xmlParserInputBuffer));
                     46:     if (ret == NULL) {
                     47:         fprintf(stderr, "xmlAllocParserInputBuffer : out of memory!\n");
                     48:        return(NULL);
                     49:     }
1.3     ! veillard   50:     memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
1.1       daniel     51:     ret->buffer = xmlBufferCreate();
                     52:     ret->encoder = xmlGetCharEncodingHandler(enc);
                     53:     ret->fd = -1;
                     54: 
                     55:     return(ret);
                     56: }
                     57: 
                     58: /**
                     59:  * xmlFreeParserInputBuffer:
                     60:  * @in:  a buffered parser input
                     61:  *
                     62:  * Free up the memory used by a buffered parser input
                     63:  */
                     64: void
                     65: xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
                     66:     if (in->buffer != NULL) {
                     67:         xmlBufferFree(in->buffer);
                     68:        in->buffer = NULL;
                     69:     }
                     70: #ifdef HAVE_ZLIB_H
                     71:     if (in->gzfile != NULL)
                     72:         gzclose(in->gzfile);
                     73: #endif
                     74:     if (in->fd >= 0)
                     75:         close(in->fd);
1.3     ! veillard   76:     memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
1.1       daniel     77:     free(in);
                     78: }
                     79: 
                     80: /**
                     81:  * xmlParserInputBufferCreateFilename:
                     82:  * @filename:  a C string containing the filename
                     83:  * @enc:  the charset encoding if known
                     84:  *
                     85:  * Create a buffered parser input for the progressive parsing of a file
                     86:  * If filename is "-' then we use stdin as the input.
                     87:  * Automatic support for ZLIB/Compress compressed document is provided
                     88:  * by default if found at compile-time.
                     89:  *
                     90:  * Returns the new parser input or NULL
                     91:  */
                     92: xmlParserInputBufferPtr
                     93: xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
                     94:     xmlParserInputBufferPtr ret;
                     95: #ifdef HAVE_ZLIB_H
                     96:     gzFile input;
                     97: #else
                     98:     int input = -1;
                     99: #endif
                    100: 
                    101:     if (filename == NULL) return(NULL);
                    102: 
                    103:     if (!strcmp(filename, "-")) {
                    104: #ifdef HAVE_ZLIB_H
                    105:         input = gzdopen (fileno(stdin), "r");
                    106:         if (input == NULL) {
1.2       daniel    107: #ifdef VERBOSE_FAILURE
1.1       daniel    108:             fprintf (stderr, "Cannot read from stdin\n");
                    109:             perror ("gzdopen failed");
1.2       daniel    110: #endif
1.1       daniel    111:             return(NULL);
                    112:        }
                    113: #else
                    114: #ifdef WIN32
                    115:         input = -1;
                    116: #else
                    117:         input = fileno(stdin);
                    118: #endif
                    119:         if (input < 0) {
1.2       daniel    120: #ifdef VERBOSE_FAILURE
1.1       daniel    121:             fprintf (stderr, "Cannot read from stdin\n");
                    122:             perror ("open failed");
1.2       daniel    123: #endif
                    124:            return(NULL);
1.1       daniel    125:        }
                    126: #endif
                    127:     } else {
                    128: #ifdef HAVE_ZLIB_H
                    129:        input = gzopen (filename, "r");
                    130:        if (input == NULL) {
1.2       daniel    131: #ifdef VERBOSE_FAILURE
1.1       daniel    132:            fprintf (stderr, "Cannot read file %s :\n", filename);
                    133:            perror ("gzopen failed");
1.2       daniel    134: #endif
1.1       daniel    135:            return(NULL);
                    136:        }
                    137: #else
                    138: #ifdef WIN32
                    139:        input = _open (filename, O_RDONLY | _O_BINARY);
                    140: #else
                    141:        input = open (filename, O_RDONLY);
                    142: #endif
                    143:        if (input < 0) {
1.2       daniel    144: #ifdef VERBOSE_FAILURE
1.1       daniel    145:            fprintf (stderr, "Cannot read file %s :\n", filename);
                    146:            perror ("open failed");
1.2       daniel    147: #endif
1.1       daniel    148:            return(NULL);
                    149:        }
                    150: #endif
                    151:     }
                    152:     /* 
                    153:      * TODO : get the 4 first bytes and 
                    154:      * if enc == XML_CHAR_ENCODING_NONE
                    155:      * plug some encoding conversion routines here. !!!
                    156:      * enc = xmlDetectCharEncoding(buffer);
                    157:      */
                    158: 
                    159:     ret = xmlAllocParserInputBuffer(enc);
                    160:     if (ret != NULL) {
                    161: #ifdef HAVE_ZLIB_H
                    162:         ret->gzfile = input;
                    163: #else
                    164:         ret->fd = input;
                    165: #endif
                    166:     }
                    167:     xmlParserInputBufferRead(ret, 4);
                    168: 
                    169:     return(ret);
                    170: }
                    171: 
                    172: /**
                    173:  * xmlParserInputBufferCreateFile:
                    174:  * @file:  a FILE* 
                    175:  * @enc:  the charset encoding if known
                    176:  *
                    177:  * Create a buffered parser input for the progressive parsing of a FILE *
                    178:  * buffered C I/O
                    179:  *
                    180:  * Returns the new parser input or NULL
                    181:  */
                    182: xmlParserInputBufferPtr
                    183: xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) {
                    184:     xmlParserInputBufferPtr ret;
                    185: 
                    186:     if (file == NULL) return(NULL);
                    187: 
                    188:     ret = xmlAllocParserInputBuffer(enc);
                    189:     if (ret != NULL)
                    190:         ret->file = file;
                    191: 
                    192:     return(ret);
                    193: }
                    194: 
                    195: /**
                    196:  * xmlParserInputBufferCreateFd:
                    197:  * @fd:  a file descriptor number
                    198:  * @enc:  the charset encoding if known
                    199:  *
                    200:  * Create a buffered parser input for the progressive parsing for the input
                    201:  * from a file descriptor
                    202:  *
                    203:  * Returns the new parser input or NULL
                    204:  */
                    205: xmlParserInputBufferPtr
                    206: xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
                    207:     xmlParserInputBufferPtr ret;
                    208: 
                    209:     if (fd < 0) return(NULL);
                    210: 
                    211:     ret = xmlAllocParserInputBuffer(enc);
                    212:     if (ret != NULL)
                    213:         ret->fd = fd;
                    214: 
                    215:     return(ret);
                    216: }
                    217: 
                    218: /**
                    219:  * xmlParserInputBufferGrow:
                    220:  * @in:  a buffered parser input
                    221:  * @len:  indicative value of the amount of chars to read
                    222:  *
                    223:  * Grow up the content of the input buffer, the old data are preserved
                    224:  * This routine handle the I18N transcoding to internal UTF-8
                    225:  * TODO: one should be able to remove one copy
                    226:  *
                    227:  * Returns the number of chars read and stored in the buffer, or -1
                    228:  *         in case of error.
                    229:  */
                    230: int
                    231: xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
                    232:     char *buffer = NULL;
                    233: #ifdef HAVE_ZLIB_H
                    234:     gzFile input = (gzFile) in->gzfile;
                    235: #endif
                    236:     int res = 0;
                    237:     int nbchars = 0;
                    238:     int buffree;
                    239: 
                    240:     if ((len <= MINLEN) && (len != 4)) 
                    241:         len = MINLEN;
                    242:     buffree = in->buffer->size - in->buffer->use;
                    243:     if (buffree <= 0) {
                    244:         fprintf(stderr, "xmlParserInputBufferGrow : buffer full !\n");
                    245:        return(0);
                    246:     }
                    247:     if (len > buffree) 
                    248:         len = buffree;
                    249: 
                    250:     buffer = malloc((len + 1) * sizeof(char));
                    251:     if (buffer == NULL) {
                    252:         fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
                    253:        return(-1);
                    254:     }
                    255:     if (in->file != NULL) {
                    256:        res = fread(&buffer[0], 1, len, in->file);
                    257: #ifdef HAVE_ZLIB_H
                    258:     } else if (in->gzfile != NULL) {
                    259:        res = gzread(input, &buffer[0], len);
                    260: #endif
                    261:     } else if (in->fd >= 0) {
                    262:        res = read(in->fd, &buffer[0], len);
                    263:     } else {
                    264:         fprintf(stderr, "xmlParserInputBufferGrow : no input !\n");
                    265:        free(buffer);
                    266:        return(-1);
                    267:     }
                    268:     if (res == 0) {
                    269:        free(buffer);
                    270:         return(0);
                    271:     }
                    272:     if (res < 0) {
                    273:        perror ("read error");
                    274:        free(buffer);
                    275:        return(-1);
                    276:     }
                    277:     if (in->encoder != NULL) {
                    278:         CHAR *buf;
                    279: 
                    280:        buf = (CHAR *) malloc((res + 1) * 2 * sizeof(CHAR));
                    281:        if (buf == NULL) {
                    282:            fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
                    283:            free(buffer);
                    284:            return(-1);
                    285:        }
                    286:        nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(CHAR),
                    287:                                     buffer, res);
1.3     ! veillard  288:         buf[nbchars] = 0;
1.1       daniel    289:         xmlBufferAdd(in->buffer, (CHAR *) buf, nbchars);
                    290:        free(buf);
                    291:     } else {
                    292:        nbchars = res;
1.3     ! veillard  293:         buffer[nbchars] = 0;
1.1       daniel    294:         xmlBufferAdd(in->buffer, (CHAR *) buffer, nbchars);
                    295:     }
                    296: #ifdef DEBUG_INPUT
                    297:     fprintf(stderr, "I/O: read %d chars, buffer %d/%d\n",
                    298:             nbchars, in->buffer->use, in->buffer->size);
                    299: #endif
                    300:     free(buffer);
                    301:     return(nbchars);
                    302: }
                    303: 
                    304: /**
                    305:  * xmlParserInputBufferRead:
                    306:  * @in:  a buffered parser input
                    307:  * @len:  indicative value of the amount of chars to read
                    308:  *
                    309:  * Refresh the content of the input buffer, the old data are considered
                    310:  * consumed
                    311:  * This routine handle the I18N transcoding to internal UTF-8
                    312:  *
                    313:  * Returns the number of chars read and stored in the buffer, or -1
                    314:  *         in case of error.
                    315:  */
                    316: int
                    317: xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
                    318:     /* xmlBufferEmpty(in->buffer); */
                    319:     return(xmlParserInputBufferGrow(in, len));
                    320: }
                    321: 

Webmaster