/*
* parser.c : an XML 1.0 non-verifying parser
*/
#include <config.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <malloc.h>
#include <sys/stat.h>
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "parser.h"
#include "tree.h"
/*
* A few macros needed to help building the parser.
*/
#ifdef UNICODE
/*
* UNICODE version of the macros. Incomplete now !!!!
*/
#define IS_CHAR(c) \
(((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || \
(((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
#define SKIP_BLANKS(p) \
while ((*(p) == 0x20) || (*(p) == 0x09) || (*(p) == 0xa) || \
(*(p) == 0x3000)) (p)++;
/* I'm too lazy to complete this one !!!! */
#define IS_BASECHAR(c) \
((((c) >= 0x41) && ((c) <= 0x5a)) || \
(((c) >= 0x61) && ((c) <= 0x7a)) || \
(((c) >= 0xaa) && ((c) <= 0x5b)) || \
(((c) >= 0xc0) && ((c) <= 0xd6)) || \
(((c) >= 0xd8) && ((c) <= 0xf6)) || \
(((c) >= 0xf8) && ((c) <= 0xff)) || \
((c) == 0xba))
/* I'm too lazy to complete this one !!!! */
#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
/* I'm too lazy to complete this one !!!! */
#define IS_COMBINING(c) 0
#define IS_IGNORABLE(c) \
((((c) >= 0x200c) && ((c) <= 0x200f)) || \
(((c) >= 0x202a) && ((c) <= 0x202e)) || \
(((c) >= 0x206a) && ((c) <= 0x206f)) || \
((c) == 0xfeff))
#define IS_EXTENDER(c) \
(((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) || \
((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) || \
((c) == 0xec6) || ((c) == 0x3005) \
(((c) >= 0x3031) && ((c) <= 0x3035)) || \
(((c) >= 0x309b) && ((c) <= 0x309e)) || \
(((c) >= 0x30fc) && ((c) <= 0x30fe)) || \
(((c) >= 0xff70) && ((c) <= 0xff9e)) || \
((c) == 0xff9f))
#define IS_IDEOGRAPHIC(c) \
((((c) >= 0x4e00) && ((c) <= 0x9fa5)) || \
(((c) >= 0xf900) && ((c) <= 0xfa2d)) || \
(((c) >= 0x3021) && ((c) <= 0x3029)) || \
((c) == 0x3007))
#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))
/* I'm too lazy to complete this one ! */
#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
#else
/*
* 8bits / ASCII version of the macros.
*/
#define IS_CHAR(c) \
(((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) || ((c) >= 0x20))
#define IS_BASECHAR(c) \
((((c) >= 0x41) && ((c) <= 0x5a)) || \
(((c) >= 0x61) && ((c) <= 0x7a)) || \
(((c) >= 0xaa) && ((c) <= 0x5b)) || \
(((c) >= 0xc0) && ((c) <= 0xd6)) || \
(((c) >= 0xd8) && ((c) <= 0xf6)) || \
(((c) >= 0xf8) && ((c) <= 0xff)) || \
((c) == 0xba))
#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
#define IS_LETTER(c) IS_BASECHAR(c)
#define IS_COMBINING(c) 0
#define IS_IGNORABLE(c) 0
#define IS_EXTENDER(c) ((c) == 0xb7)
#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa))
#endif
#define SKIP_EOL(p) \
if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \
if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }
#define SKIP_BLANKS(p) \
while (IS_BLANK(*(p))) (p)++;
#define MOVETO_ENDTAG(p) \
while (IS_CHAR(*p) && (*(p) != '>')) (p)++;
#define MOVETO_STARTTAG(p) \
while (IS_CHAR(*p) && (*(p) != '<')) (p)++;
/*
* Forward definition for recusive behaviour.
*/
xmlNodePtr xmlParseElement(CHAR **p, xmlDocPtr doc);
/*
* xmlHandleData : this routine represent's the specific application
* behaviour when reading a piece of text.
*
* For example in WebDav, any piece made only of blanks is eliminated
*/
CHAR *xmlHandleData(CHAR *in) {
CHAR *cur;
if (in == NULL) return(NULL);
cur = in;
while (IS_CHAR(*cur)) {
if (!IS_BLANK(*cur)) goto not_blank;
cur++;
}
free(in);
return(NULL);
not_blank:
return(in);
}
/*
* xmlStrndup : a strdup for array of CHAR's
*/
CHAR *xmlStrndup(const CHAR *cur, int len) {
CHAR *ret = malloc((len + 1) * sizeof(CHAR));
if (ret == NULL) {
fprintf(stderr, "malloc of %d byte failed\n",
(len + 1) * sizeof(CHAR));
return(NULL);
}
memcpy(ret, cur, len * sizeof(CHAR));
ret[len] = 0;
return(ret);
}
/*
* xmlStrdup : a strdup for CHAR's
*/
CHAR *xmlStrdup(const CHAR *cur) {
const CHAR *p = cur;
while (IS_CHAR(*p)) p++;
return(xmlStrndup(cur, p - cur));
}
/*
* xmlParseName : parse an XML name.
*/
CHAR *xmlParseName(CHAR **p) {
CHAR *cur = *p, *q, *ret = NULL;
/*
* Name ::= (Letter | '_') (NameChar)*
*/
if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
q = cur++;
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
(*cur == '.') || (*cur == '-') || (*cur == '_') ||
(*cur == ':') ||
(IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
(IS_EXTENDER(*cur)))
cur++;
ret = xmlStrndup(q, cur - q);
*p = cur;
return(ret);
}
/*
* Parse and return a string between quotes or doublequotes
*/
CHAR *xmlParseQuotedString(CHAR **p) {
CHAR *ret = NULL;
CHAR *cur = *p, *q;
if (*cur == '"') {
cur++;
q = cur;
while (IS_CHAR(*cur) && (*cur != '"')) cur++;
if (*cur != '"')
fprintf(stderr, "String not closed \"%.50s\n", q);
else {
ret = xmlStrndup(q, cur - q);
cur++;
}
} else if (*cur == '\''){
cur++;
q = cur;
while (IS_CHAR(*cur) && (*cur != '\'')) cur++;
if (*cur != '\'')
fprintf(stderr, "String not closed '%.50s\n", q);
else {
ret = xmlStrndup(q, cur - q);
cur++;
}
}
*p = cur;
return(ret);
}
/*
* Skip an XML (SGML) comment <!-- .... -->
*/
void xmlParserSkipComment(CHAR **p) {
CHAR *cur = *p, *q, *r, *start;
/*
* An extra check may avoid errors and isn't that costly !
*/
if ((cur[0] != '<') || (cur[1] != '!') ||
(cur[2] != '-') || (cur[3] != '-')) return;
cur += 4;
start = q = cur;
cur++;
r = cur;
cur++;
while (IS_CHAR(*cur) &&
((*cur == ':') || (*cur != '>') || (*r != '-') || (*q != '-'))) {
cur++;r++;q++;
}
if (!IS_CHAR(*cur)) {
fprintf(stderr, "Comment not terminated <!--%.50s\n", start);
*p = start;
} else {
cur++;
*p = cur;
}
}
/*
* xmlParseNamespace: parse specific '<?namespace ...' constructs.
*/
void xmlParseNamespace(CHAR **p, xmlDocPtr doc) {
CHAR *cur = *p;
CHAR *href = NULL;
CHAR *AS = NULL;
int garbage = 0;
/*
* We know that 'namespace' is here.
*/
cur += 9;
SKIP_BLANKS(cur);
while (IS_CHAR(*cur) && (*cur != '>')) {
/*
* We can have 'href' or 'AS' attributes.
*/
if ((cur[0] == 'h') && (cur[1] == 'r') && (cur[2] == 'e') &&
(cur[3] == 'f')) {
garbage = 0;
cur += 4;
SKIP_BLANKS(cur);
if (*cur != '=') continue;
cur++;
SKIP_BLANKS(cur);
href = xmlParseQuotedString(&cur);
SKIP_BLANKS(cur);
} else if ((cur[0] == 'A') && (cur[1] == 'S')) {
garbage = 0;
cur += 2;
SKIP_BLANKS(cur);
if (*cur != '=') continue;
cur++;
SKIP_BLANKS(cur);
AS = xmlParseQuotedString(&cur);
SKIP_BLANKS(cur);
} else if ((cur[0] == '?') && (cur[1] == '>')) {
garbage = 0;
cur ++;
} else {
/*
* Found garbage when parsing the namespace
*/
if (!garbage) fprintf(stderr,
"\nxmlParseNamespace found garbage: ");
fprintf(stderr, "%c", *cur);
cur++;
}
}
MOVETO_ENDTAG(cur);
cur++;
/*
* Register the DTD.
*/
if (href != NULL)
xmlNewDtd(doc, href, AS);
if (AS != NULL) free(AS);
if (href != NULL) free(href);
*p = cur;
}
/*
* xmlParsePI: parse an XML Processing Instruction.
*/
void xmlParsePI(CHAR **p, xmlDocPtr doc) {
CHAR *cur = *p;
if ((cur[0] == '<') && (cur[1] == '?')) {
/*
* this is a Processing Instruction.
*/
cur += 2;
/*
* Special for WebDav, support for the Processing Instruction
* '<?namespace ...' contruct in the header of the XML document.
*/
if ((cur[0] == 'n') && (cur[1] == 'a') &&
(cur[2] == 'm') && (cur[3] == 'e') &&
(cur[4] == 's') && (cur[5] == 'p') &&
(cur[6] == 'a') && (cur[7] == 'c') &&
(cur[8] == 'e')) {
xmlParseNamespace(&cur, doc);
} else {
/* Unknown PI, ignore it ! */
fprintf(stderr, "xmlParsePI : skipping unknown PI %30s\n", cur);
MOVETO_ENDTAG(cur);
cur++;
}
}
*p = cur;
}
/*
* xmlParseAttribute: parse a start of tag.
*
* Attribute ::= Name Eq AttValue
*/
void xmlParseAttribute(CHAR **p, xmlNodePtr node) {
CHAR *cur = *p, *q, *name, *value = NULL;
if (!IS_LETTER(*cur) && (*cur != '_')) {
return;
}
q = cur++;
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
(*cur == '.') || (*cur == '-') || (*cur == '_') ||
(*cur == ':') ||
(IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
(IS_EXTENDER(*cur)))
cur++;
name = xmlStrndup(q, cur - q);
/*
* We should have the equal, we are laxist here and allow attributes
* without values and extra spaces.
*/
SKIP_BLANKS(cur);
if (*cur == '=') {
cur++;
SKIP_BLANKS(cur);
if ((*cur != '\'') && (*cur != '"')) {
fprintf(stderr, "Quotes were expected for attribute value %.20s\n",
q);
} else
value = xmlParseQuotedString(&cur);
}
/*
* Add the attribute to the node.
*/
if (name != NULL)
xmlNewProp(node, name, value);
*p = cur;
}
/*
* xmlParseStartTag: parse a start of tag.
*/
xmlNodePtr xmlParseStartTag(CHAR **p, xmlDocPtr doc) {
CHAR *cur = *p, *q, *ns, *name;
xmlDtdPtr dtd = NULL;
xmlNodePtr ret = NULL;
/*
* Theorically one should just parse a Name, but with the addition
* of the namespace needed for WebDav, it's a bit more complicated
* since the element name may be prefixed by a namespace prefix.
*
* QName ::= (NSPart ':')? LocalPart
* NSPart ::= Name
* LocalPart ::= Name
* STag ::= '<' QName (S Attribute)* S? '>'
*
* instead of :
*
* STag ::= '<' QName (S Attribute)* S? '>'
*/
if (*cur != '<') return(NULL);
cur++;
if (!IS_LETTER(*cur) && (*cur != '_')) return(NULL);
q = cur++;
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
(*cur == '.') || (*cur == '-') || (*cur == '_') ||
(IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
(IS_EXTENDER(*cur)))
cur++;
if (*cur == ':') {
ns = xmlStrndup(q, cur - q);
cur++; /* skip the column */
if (!IS_LETTER(*cur) && (*cur != '_')) {
fprintf(stderr,
"Start tag : no element name after namespace identifier %.20s\n",
q);
free(ns);
*p = cur;
return(NULL);
}
q = cur++;
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
(*cur == '.') || (*cur == '-') || (*cur == '_') ||
(*cur == ':') ||
(IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
(IS_EXTENDER(*cur)))
cur++;
name = xmlStrndup(q, cur - q);
/*
* Search the DTD associated to ns.
*/
dtd = xmlSearchDtd(doc, ns);
if (dtd == NULL)
fprintf(stderr, "Start tag : Couldn't find namespace %s\n", ns);
free(ns);
} else
name = xmlStrndup(q, cur - q);
ret = xmlNewNode(dtd, name, NULL);
/*
* Now parse the attributes, it ends up with the ending
*
* (S Attribute)* S?
*/
SKIP_BLANKS(cur);
while ((IS_CHAR(*cur)) &&
(*cur != '>') &&
((cur[0] != '/') || (cur[1] != '>'))) {
if (IS_LETTER(*cur) || (*cur == '_'))
xmlParseAttribute(&cur, ret);
else {
/* We should warn !!! */
cur++;
}
SKIP_BLANKS(cur);
}
*p = cur;
return(ret);
}
/*
* xmlParseEndTag: parse an end of tag, note that the '</' part has
* already been read.
*/
void xmlParseEndTag(CHAR **p, xmlDocPtr doc, xmlDtdPtr *dtdPtr, CHAR **tagPtr) {
CHAR *cur = *p, *q, *ns, *name;
xmlDtdPtr dtd = NULL;
*dtdPtr = NULL;
*tagPtr = NULL;
/*
* Theorically one should just parse a Name, but with the addition
* of the namespace needed for WebDav, it's a bit more complicated
* since the element name may be prefixed by a namespace prefix.
*
* QName ::= (NSPart ':')? LocalPart
* NSPart ::= Name
* LocalPart ::= Name
* ETag ::= '</' QName S? '>'
*
* instead of :
*
* ETag ::= '</' Name S? '>'
*/
if (!IS_LETTER(*cur) && (*cur != '_')) return;
q = cur++;
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
(*cur == '.') || (*cur == '-') || (*cur == '_') ||
(IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
(IS_EXTENDER(*cur)))
cur++;
if (*cur == ':') {
ns = xmlStrndup(q, cur - q);
cur++; /* skip the column */
if (!IS_LETTER(*cur) && (*cur != '_')) {
fprintf(stderr,
"End tag : no element name after namespace identifier %.20s\n",
q);
free(ns);
*p = cur;
return;
}
q = cur++;
while ((IS_LETTER(*cur)) || (IS_DIGIT(*cur)) ||
(*cur == '.') || (*cur == '-') || (*cur == '_') ||
(*cur == ':') ||
(IS_COMBINING(*cur)) || (IS_IGNORABLE(*cur)) ||
(IS_EXTENDER(*cur)))
cur++;
name = xmlStrndup(q, cur - q);
/*
* Search the DTD associated to ns.
*/
dtd = xmlSearchDtd(doc, ns);
if (dtd == NULL)
fprintf(stderr, "End tag : Couldn't find namespace %s\n", ns);
free(ns);
} else
name = xmlStrndup(q, cur - q);
*dtdPtr = dtd;
*tagPtr = name;
/*
* We should definitely be at the ending "S? '>'" part
*/
SKIP_BLANKS(cur);
if ((!IS_CHAR(*cur)) || (*cur != '>')) {
fprintf(stderr, "End tag : expected '>', got %.20s\n", cur);
/*
* Note : skipping to the next '>' is probably otherkill,
* especially in case the '>' is hust missing.
*
* Otherwise add:
* MOVETO_ENDTAG(cur);
*/
} else
cur++;
*p = cur;
return;
}
/*
* xmlParseCDSect: escaped pure raw content.
*/
CHAR *xmlParseCDSect(CHAR **p) {
CHAR *cur = *p, *r, *s, *base, *ret;
base = cur;
if (!IS_CHAR(*cur)) {
fprintf(stderr, "CData section not finished : %.20s\n", base);
return(NULL);
}
r = cur++;
if (!IS_CHAR(*cur)) {
fprintf(stderr, "CData section not finished : %.20s\n", base);
return(NULL);
}
s = cur++;
while (IS_CHAR(*cur) &&
((*r != ']') || (*s != ']') || (*cur != '>'))) {
r++;s++;cur++;
}
if (!IS_CHAR(*cur)) {
fprintf(stderr, "CData section not finished : %.20s\n", base);
return(NULL);
}
ret = xmlStrndup(base, cur-base);
*p = cur;
return(ret);
}
/*
* xmlParseContent: a content is
* (element | PCData | Reference | CDSect | PI | Comment)
*
* element : starts by '<'
* PCData : any CHAR but '&' or '<'
* Reference : starts by '&'
* CDSect : starts by '<![CDATA['
* PI : starts by '<?'
*/
xmlNodePtr xmlParseContent(CHAR **p, xmlDocPtr doc, xmlNodePtr node) {
CHAR *cur = *p, *q, *data = NULL;
xmlNodePtr ret = NULL;
/*
* First case : a Processing Instruction.
*/
if ((cur[0] == '<') && (cur[1] == '?')) {
xmlParsePI(&cur, doc);
}
/*
* Second case : a CDSection
*/
if ((cur[0] == '<') && (cur[1] == '!') && (cur[2] == '[') &&
(cur[3] == 'C') && (cur[4] == 'D') && (cur[5] == 'A') &&
(cur[6] == 'T') && (cur[7] == 'A') && (cur[8] == '[')) {
cur += 9;
data = xmlParseCDSect(&cur);
}
/*
* Third case : a sub-element.
*/
else if (cur[0] == '<') {
ret = xmlParseElement(&cur, doc);
}
/*
* Last case, text. Note that References are handled directly.
*/
else {
q = cur;
while (IS_CHAR(*cur) && (*cur != '<')) cur++;
if (!IS_CHAR(*cur)) {
fprintf(stderr, "Truncated content : %.50s\n", q);
*p = cur;
return(NULL);
}
data = xmlStrndup(q, cur - q);
/* Should apply the &...; reduction !!!! */
}
/*
* Handle the data if any. If there is no child
* add it as content, otherwise create a new node of type text.
*/
if (data != NULL)
data = xmlHandleData(data);
if (data != NULL) {
if (node->childs == NULL)
xmlNodeSetContent(node, data);
else {
ret = xmlNewText(data);
}
}
*p = cur;
return(ret);
}
/*
* xmlParseElement: parse an XML element
*/
xmlNodePtr xmlParseElement(CHAR **p, xmlDocPtr doc) {
CHAR *cur = *p;
xmlNodePtr ret, child;
CHAR *openTag = *p;
CHAR *closeTag = *p;
ret = xmlParseStartTag(&cur, doc);
if (ret == NULL) {
*p = cur;
return(NULL);
}
/*
* Check for an Empty Element.
*/
if ((cur[0] == '/') && (cur[1] == '>')) {
cur += 2;
*p = cur;
return(ret);
}
if (cur[0] == '>') cur++;
else {
fprintf(stderr, "Couldn't find end of Start Tag %.30s\n", *p);
*p = cur;
return(ret);
}
/*
* Parse the content of the element:
* (element | PCData | Reference | CDSect | PI | Comment) *
*
* element : starts by '<'
* PCData : any CHAR but '&' or '<'
* Reference : starts by '&'
* CDSect : starts by '<![CDATA['
* PI : starts by '<?'
*
* The loop stops upon detection of an end of tag '</'
*/
while ((IS_CHAR(cur[0])) && ((cur[0] != '<') || (cur[1] != '/'))) {
child = xmlParseContent(&cur, doc, ret);
if (child != NULL)
xmlAddChild(ret, child);
}
if (!IS_CHAR(cur[0])) {
fprintf(stderr, "Premature end of data in tag %.30s\n", *p);
*p = cur;
return(ret);
}
/*
* parse the end of tag : '</' has been detected.
*/
cur += 2;
if (*cur == '>') cur++; /* simplified closing </> */
else {
CHAR *endTag;
xmlDtdPtr endDtd;
xmlParseEndTag(&cur, doc, &endDtd, &endTag);
/*
* Check that the Name in the ETag is the same as in the STag.
*/
if (endDtd != ret->dtd) {
fprintf(stderr, "Start and End tags don't use the same DTD:\n");
fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
}
if (strcmp(ret->name, endTag)) {
fprintf(stderr, "Start and End tags don't use the same name:\n");
fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
}
}
*p = cur;
return(ret);
}
/*
* xmlParseXMLDecl: parse an XML declaration header
*/
xmlDocPtr xmlParseXMLDecl(CHAR **p) {
CHAR *cur = *p;
CHAR *version;
xmlDocPtr ret;
/*
* We know that '<?XML' is here.
*/
cur += 5;
/*
* Parse the version info
*/
SKIP_BLANKS(cur);
/*
* We should have 'version=' here !
*/
if ((cur[0] == 'v') && (cur[1] == 'e') && (cur[2] == 'r') &&
(cur[3] == 's') && (cur[4] == 'i') && (cur[5] == 'o') &&
(cur[6] == 'n') && (cur[7] == '=')) {
cur += 8;
version = xmlParseQuotedString(&cur);
if (version == NULL)
ret = xmlNewDoc(XML_DEFAULT_VERSION);
else {
ret = xmlNewDoc(version);
free(version);
}
} else {
ret = xmlNewDoc(XML_DEFAULT_VERSION);
}
/*
* We should check for encoding !!!!
*/
/*
* We should check for Required Markup Declaration !!!!
*/
MOVETO_ENDTAG(cur);
cur++;
*p = cur;
return(ret);
}
/*
* xmlParseMisc: parse an XML Misc optionnal field.
* (Comment | PI | S)*
*/
void xmlParseMisc(CHAR **p, xmlDocPtr doc) {
CHAR *cur = *p;
while (((cur[0] == '<') && (cur[1] == '?')) ||
((cur[0] == '<') && (cur[1] == '!') &&
(cur[2] == '-') && (cur[2] == '-')) ||
IS_BLANK(*cur)) {
if ((cur[0] == '<') && (cur[1] == '?')) {
xmlParsePI(&cur, doc);
} else if (IS_BLANK(*cur)) {
cur++;
} else
xmlParserSkipComment(&cur);
}
*p = cur;
}
/*
* xmlParseDoc : parse an XML document and build a tree.
*/
xmlDocPtr xmlParseDoc(CHAR *cur) {
xmlDocPtr ret;
/*
* Wipe out everything which is before the first '<'
*/
SKIP_BLANKS(cur);
/*
* Check for the XMLDecl in the Prolog.
*/
if ((cur[0] == '<') && (cur[1] == '?') &&
(cur[2] == 'X') && (cur[3] == 'M') &&
(cur[4] == 'L')) {
ret = xmlParseXMLDecl(&cur);
/* SKIP_EOL(cur); */
SKIP_BLANKS(cur);
} else {
ret = xmlNewDoc(XML_DEFAULT_VERSION);
}
/*
* The Misc part of the Prolog
* (Comment | PI | S) *
*/
xmlParseMisc(&cur, ret);
/*
* Time to start parsing
*/
ret->root = xmlParseElement(&cur, ret);
return(ret);
}
/*
* xmlParseFile : parse an XML file and build a tree.
*/
xmlDocPtr xmlParseFile(const char *filename) {
xmlDocPtr ret;
int input;
int res;
struct stat buf;
char *buffer;
res = stat(filename, &buf);
if (res < 0) return(NULL);
buffer = malloc(buf.st_size + 100);
if (buffer == NULL) {
perror("malloc");
return(NULL);
}
memset(buffer, 0, sizeof(buffer));
input = open (filename, O_RDONLY);
if (input < 0) {
fprintf (stderr, "Cannot read file %s :\n", filename);
perror ("open failed");
return(NULL);
}
res = read(input, buffer, buf.st_size);
if (res < 0) {
fprintf (stderr, "Cannot read file %s :\n", filename);
perror ("read failed");
return(NULL);
}
close(input);
buffer[buf.st_size] = '\0';
ret = xmlParseDoc(buffer);
free(buffer);
return(ret);
}
Webmaster