Annotation of XML/xmlIO.c, revision 1.3
1.1 daniel 1: /*
2: * xmlIO.c : implementation of the I/O interfaces used by the parser
3: *
4: * See Copyright for the status of this software.
5: *
6: * Daniel.Veillard@w3.org
7: */
8:
9: #include "config.h"
10:
11: #include <sys/types.h>
12: #include <sys/stat.h>
13: #include <fcntl.h>
14: #include <malloc.h>
15: #ifdef HAVE_UNISTD_H
16: #include <unistd.h>
17: #endif
18: #ifdef HAVE_ZLIB_H
19: #include <zlib.h>
20: #endif
21:
22: #include "xmlIO.h"
23:
24: /* #define DEBUG_INPUT */
1.2 daniel 25: /* #define VERBOSE_FAILURE */
1.1 daniel 26:
27: #ifdef DEBUG_INPUT
28: #define MINLEN 40
29: #else
30: #define MINLEN 4000
31: #endif
32:
33: /**
34: * xmlAllocParserInputBuffer:
35: * @enc: the charset encoding if known
36: *
37: * Create a buffered parser input for progressive parsing
38: *
39: * Returns the new parser input or NULL
40: */
41: xmlParserInputBufferPtr
42: xmlAllocParserInputBuffer(xmlCharEncoding enc) {
43: xmlParserInputBufferPtr ret;
44:
45: ret = (xmlParserInputBufferPtr) malloc(sizeof(xmlParserInputBuffer));
46: if (ret == NULL) {
47: fprintf(stderr, "xmlAllocParserInputBuffer : out of memory!\n");
48: return(NULL);
49: }
1.3 ! veillard 50: memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
1.1 daniel 51: ret->buffer = xmlBufferCreate();
52: ret->encoder = xmlGetCharEncodingHandler(enc);
53: ret->fd = -1;
54:
55: return(ret);
56: }
57:
58: /**
59: * xmlFreeParserInputBuffer:
60: * @in: a buffered parser input
61: *
62: * Free up the memory used by a buffered parser input
63: */
64: void
65: xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
66: if (in->buffer != NULL) {
67: xmlBufferFree(in->buffer);
68: in->buffer = NULL;
69: }
70: #ifdef HAVE_ZLIB_H
71: if (in->gzfile != NULL)
72: gzclose(in->gzfile);
73: #endif
74: if (in->fd >= 0)
75: close(in->fd);
1.3 ! veillard 76: memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
1.1 daniel 77: free(in);
78: }
79:
80: /**
81: * xmlParserInputBufferCreateFilename:
82: * @filename: a C string containing the filename
83: * @enc: the charset encoding if known
84: *
85: * Create a buffered parser input for the progressive parsing of a file
86: * If filename is "-' then we use stdin as the input.
87: * Automatic support for ZLIB/Compress compressed document is provided
88: * by default if found at compile-time.
89: *
90: * Returns the new parser input or NULL
91: */
92: xmlParserInputBufferPtr
93: xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
94: xmlParserInputBufferPtr ret;
95: #ifdef HAVE_ZLIB_H
96: gzFile input;
97: #else
98: int input = -1;
99: #endif
100:
101: if (filename == NULL) return(NULL);
102:
103: if (!strcmp(filename, "-")) {
104: #ifdef HAVE_ZLIB_H
105: input = gzdopen (fileno(stdin), "r");
106: if (input == NULL) {
1.2 daniel 107: #ifdef VERBOSE_FAILURE
1.1 daniel 108: fprintf (stderr, "Cannot read from stdin\n");
109: perror ("gzdopen failed");
1.2 daniel 110: #endif
1.1 daniel 111: return(NULL);
112: }
113: #else
114: #ifdef WIN32
115: input = -1;
116: #else
117: input = fileno(stdin);
118: #endif
119: if (input < 0) {
1.2 daniel 120: #ifdef VERBOSE_FAILURE
1.1 daniel 121: fprintf (stderr, "Cannot read from stdin\n");
122: perror ("open failed");
1.2 daniel 123: #endif
124: return(NULL);
1.1 daniel 125: }
126: #endif
127: } else {
128: #ifdef HAVE_ZLIB_H
129: input = gzopen (filename, "r");
130: if (input == NULL) {
1.2 daniel 131: #ifdef VERBOSE_FAILURE
1.1 daniel 132: fprintf (stderr, "Cannot read file %s :\n", filename);
133: perror ("gzopen failed");
1.2 daniel 134: #endif
1.1 daniel 135: return(NULL);
136: }
137: #else
138: #ifdef WIN32
139: input = _open (filename, O_RDONLY | _O_BINARY);
140: #else
141: input = open (filename, O_RDONLY);
142: #endif
143: if (input < 0) {
1.2 daniel 144: #ifdef VERBOSE_FAILURE
1.1 daniel 145: fprintf (stderr, "Cannot read file %s :\n", filename);
146: perror ("open failed");
1.2 daniel 147: #endif
1.1 daniel 148: return(NULL);
149: }
150: #endif
151: }
152: /*
153: * TODO : get the 4 first bytes and
154: * if enc == XML_CHAR_ENCODING_NONE
155: * plug some encoding conversion routines here. !!!
156: * enc = xmlDetectCharEncoding(buffer);
157: */
158:
159: ret = xmlAllocParserInputBuffer(enc);
160: if (ret != NULL) {
161: #ifdef HAVE_ZLIB_H
162: ret->gzfile = input;
163: #else
164: ret->fd = input;
165: #endif
166: }
167: xmlParserInputBufferRead(ret, 4);
168:
169: return(ret);
170: }
171:
172: /**
173: * xmlParserInputBufferCreateFile:
174: * @file: a FILE*
175: * @enc: the charset encoding if known
176: *
177: * Create a buffered parser input for the progressive parsing of a FILE *
178: * buffered C I/O
179: *
180: * Returns the new parser input or NULL
181: */
182: xmlParserInputBufferPtr
183: xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) {
184: xmlParserInputBufferPtr ret;
185:
186: if (file == NULL) return(NULL);
187:
188: ret = xmlAllocParserInputBuffer(enc);
189: if (ret != NULL)
190: ret->file = file;
191:
192: return(ret);
193: }
194:
195: /**
196: * xmlParserInputBufferCreateFd:
197: * @fd: a file descriptor number
198: * @enc: the charset encoding if known
199: *
200: * Create a buffered parser input for the progressive parsing for the input
201: * from a file descriptor
202: *
203: * Returns the new parser input or NULL
204: */
205: xmlParserInputBufferPtr
206: xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
207: xmlParserInputBufferPtr ret;
208:
209: if (fd < 0) return(NULL);
210:
211: ret = xmlAllocParserInputBuffer(enc);
212: if (ret != NULL)
213: ret->fd = fd;
214:
215: return(ret);
216: }
217:
218: /**
219: * xmlParserInputBufferGrow:
220: * @in: a buffered parser input
221: * @len: indicative value of the amount of chars to read
222: *
223: * Grow up the content of the input buffer, the old data are preserved
224: * This routine handle the I18N transcoding to internal UTF-8
225: * TODO: one should be able to remove one copy
226: *
227: * Returns the number of chars read and stored in the buffer, or -1
228: * in case of error.
229: */
230: int
231: xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
232: char *buffer = NULL;
233: #ifdef HAVE_ZLIB_H
234: gzFile input = (gzFile) in->gzfile;
235: #endif
236: int res = 0;
237: int nbchars = 0;
238: int buffree;
239:
240: if ((len <= MINLEN) && (len != 4))
241: len = MINLEN;
242: buffree = in->buffer->size - in->buffer->use;
243: if (buffree <= 0) {
244: fprintf(stderr, "xmlParserInputBufferGrow : buffer full !\n");
245: return(0);
246: }
247: if (len > buffree)
248: len = buffree;
249:
250: buffer = malloc((len + 1) * sizeof(char));
251: if (buffer == NULL) {
252: fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
253: return(-1);
254: }
255: if (in->file != NULL) {
256: res = fread(&buffer[0], 1, len, in->file);
257: #ifdef HAVE_ZLIB_H
258: } else if (in->gzfile != NULL) {
259: res = gzread(input, &buffer[0], len);
260: #endif
261: } else if (in->fd >= 0) {
262: res = read(in->fd, &buffer[0], len);
263: } else {
264: fprintf(stderr, "xmlParserInputBufferGrow : no input !\n");
265: free(buffer);
266: return(-1);
267: }
268: if (res == 0) {
269: free(buffer);
270: return(0);
271: }
272: if (res < 0) {
273: perror ("read error");
274: free(buffer);
275: return(-1);
276: }
277: if (in->encoder != NULL) {
278: CHAR *buf;
279:
280: buf = (CHAR *) malloc((res + 1) * 2 * sizeof(CHAR));
281: if (buf == NULL) {
282: fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
283: free(buffer);
284: return(-1);
285: }
286: nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(CHAR),
287: buffer, res);
1.3 ! veillard 288: buf[nbchars] = 0;
1.1 daniel 289: xmlBufferAdd(in->buffer, (CHAR *) buf, nbchars);
290: free(buf);
291: } else {
292: nbchars = res;
1.3 ! veillard 293: buffer[nbchars] = 0;
1.1 daniel 294: xmlBufferAdd(in->buffer, (CHAR *) buffer, nbchars);
295: }
296: #ifdef DEBUG_INPUT
297: fprintf(stderr, "I/O: read %d chars, buffer %d/%d\n",
298: nbchars, in->buffer->use, in->buffer->size);
299: #endif
300: free(buffer);
301: return(nbchars);
302: }
303:
304: /**
305: * xmlParserInputBufferRead:
306: * @in: a buffered parser input
307: * @len: indicative value of the amount of chars to read
308: *
309: * Refresh the content of the input buffer, the old data are considered
310: * consumed
311: * This routine handle the I18N transcoding to internal UTF-8
312: *
313: * Returns the number of chars read and stored in the buffer, or -1
314: * in case of error.
315: */
316: int
317: xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
318: /* xmlBufferEmpty(in->buffer); */
319: return(xmlParserInputBufferGrow(in, len));
320: }
321:
Webmaster