File:  [Public] / XML / entities.c
Revision 1.28: download - view: text, annotated - select for diffs
Wed Jul 7 09:16:19 1999 UTC (24 years, 10 months ago) by daniel
Branches: MAIN
CVS tags: HEAD
Adding HTML support, testcases, and updates from bugs.gnome.org, Daniel

/*
 * entities.c : implementation for the XML entities handking
 *
 * See Copyright for the status of this software.
 *
 * Daniel.Veillard@w3.org
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "entities.h"

/*
 * The XML predefined entities.
 */

struct xmlPredefinedEntityValue {
    const char *name;
    const char *value;
};
struct xmlPredefinedEntityValue xmlPredefinedEntityValues[] = {
    { "lt", "<" },
    { "gt", ">" },
    { "apos", "'" },
    { "quot", "\"" },
    { "amp", "&" }
};

xmlEntitiesTablePtr xmlPredefinedEntities = NULL;

/*
 * xmlFreeEntity : clean-up an entity record.
 */
void xmlFreeEntity(xmlEntityPtr entity) {
    if (entity == NULL) return;

    if (entity->name != NULL)
	free((char *) entity->name);
    if (entity->ExternalID != NULL)
        free((char *) entity->ExternalID);
    if (entity->SystemID != NULL)
        free((char *) entity->SystemID);
    if (entity->content != NULL)
        free((char *) entity->content);
    if (entity->orig != NULL)
        free((char *) entity->orig);
    memset(entity, -1, sizeof(xmlEntity));
}

/*
 * xmlAddEntity : register a new entity for an entities table.
 *
 * TODO !!! We should check here that the combination of type
 *          ExternalID and SystemID is valid.
 */
static void
xmlAddEntity(xmlEntitiesTablePtr table, const CHAR *name, int type,
              const CHAR *ExternalID, const CHAR *SystemID, CHAR *content) {
    int i;
    xmlEntityPtr cur;
    int len;

    for (i = 0;i < table->nb_entities;i++) {
        cur = &table->table[i];
	if (!xmlStrcmp(cur->name, name)) {
	    /*
	     * The entity is already defined in this Dtd, the spec says to NOT
	     * override it ... Is it worth a Warning ??? !!!
	     */
	    return;
	}
    }
    if (table->nb_entities >= table->max_entities) {
        /*
	 * need more elements.
	 */
	table->max_entities *= 2;
	table->table = (xmlEntityPtr) 
	    realloc(table->table, table->max_entities * sizeof(xmlEntity));
	if (table->table) {
	    perror("realloc failed");
	    exit(1);
	}
    }
    cur = &table->table[table->nb_entities];
    cur->name = xmlStrdup(name);
    for (len = 0;name[0] != 0;name++)len++;
    cur->len = len;
    cur->type = type;
    if (ExternalID != NULL)
	cur->ExternalID = xmlStrdup(ExternalID);
    else
        cur->ExternalID = NULL;
    if (SystemID != NULL)
	cur->SystemID = xmlStrdup(SystemID);
    else
        cur->SystemID = NULL;
    if (content != NULL)
	cur->content = xmlStrdup(content);
    else
        cur->content = NULL;
    cur->orig = NULL;
    table->nb_entities++;
}

/**
 * xmlInitializePredefinedEntities:
 *
 * Set up the predefined entities.
 */
void xmlInitializePredefinedEntities(void) {
    int i;
    CHAR name[50];
    CHAR value[50];
    const char *in;
    CHAR *out;

    if (xmlPredefinedEntities != NULL) return;

    xmlPredefinedEntities = xmlCreateEntitiesTable();
    for (i = 0;i < sizeof(xmlPredefinedEntityValues) / 
                   sizeof(xmlPredefinedEntityValues[0]);i++) {
        in = xmlPredefinedEntityValues[i].name;
	out = &name[0];
	for (;(*out++ = (CHAR) *in);)in++;
        in = xmlPredefinedEntityValues[i].value;
	out = &value[0];
	for (;(*out++ = (CHAR) *in);)in++;
        xmlAddEntity(xmlPredefinedEntities, (const CHAR *) &name[0],
	             XML_INTERNAL_PREDEFINED_ENTITY, NULL, NULL,
		     &value[0]);
    }
}

/**
 * xmlGetPredefinedEntity:
 * @name:  the entity name
 *
 * Check whether this name is an predefined entity.
 *
 * Returns NULL if not, othervise the entity
 */
xmlEntityPtr
xmlGetPredefinedEntity(const CHAR *name) {
    int i;
    xmlEntityPtr cur;

    if (xmlPredefinedEntities == NULL)
        xmlInitializePredefinedEntities();
    for (i = 0;i < xmlPredefinedEntities->nb_entities;i++) {
	cur = &xmlPredefinedEntities->table[i];
	if (!xmlStrcmp(cur->name, name)) return(cur);
    }
    return(NULL);
}

/**
 * xmlAddDtdEntity:
 * @doc:  the document
 * @name:  the entity name
 * @type:  the entity type XML_xxx_yyy_ENTITY
 * @ExternalID:  the entity external ID if available
 * @SystemID:  the entity system ID if available
 * @content:  the entity content
 *
 * Register a new entity for this document DTD.
 */
void
xmlAddDtdEntity(xmlDocPtr doc, const CHAR *name, int type,
              const CHAR *ExternalID, const CHAR *SystemID, CHAR *content) {
    xmlEntitiesTablePtr table;

    if (doc->extSubset == NULL) {
        fprintf(stderr,
	        "xmlAddDtdEntity: document without external subset !\n");
	return;
    }
    table = (xmlEntitiesTablePtr) doc->extSubset->entities;
    if (table == NULL) {
        table = xmlCreateEntitiesTable();
	doc->extSubset->entities = table;
    }
    xmlAddEntity(table, name, type, ExternalID, SystemID, content);
}

/**
 * xmlAddDocEntity:
 * @doc:  the document
 * @name:  the entity name
 * @type:  the entity type XML_xxx_yyy_ENTITY
 * @ExternalID:  the entity external ID if available
 * @SystemID:  the entity system ID if available
 * @content:  the entity content
 *
 * Register a new entity for this document.
 */
void
xmlAddDocEntity(xmlDocPtr doc, const CHAR *name, int type,
              const CHAR *ExternalID, const CHAR *SystemID, CHAR *content) {
    xmlEntitiesTablePtr table;

    if (doc == NULL) {
        fprintf(stderr,
	        "xmlAddDocEntity: document is NULL !\n");
	return;
    }
    if (doc->intSubset == NULL) {
        fprintf(stderr,
	        "xmlAddDtdEntity: document without internal subset !\n");
	return;
    }
    table = (xmlEntitiesTablePtr) doc->intSubset->entities;
    if (table == NULL) {
        table = xmlCreateEntitiesTable();
	doc->intSubset->entities = table;
    }
    xmlAddEntity(table, name, type, ExternalID, SystemID, content);
}

/**
 * xmlGetDtdEntity:
 * @doc:  the document referencing the entity
 * @name:  the entity name
 *
 * Do an entity lookup in the Dtd entity hash table and
 * returns the corresponding entity, if found.
 * 
 * Returns A pointer to the entity structure or NULL if not found.
 */
xmlEntityPtr
xmlGetDtdEntity(xmlDocPtr doc, const CHAR *name) {
    int i;
    xmlEntityPtr cur;
    xmlEntitiesTablePtr table;

    if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
	table = (xmlEntitiesTablePtr) doc->extSubset->entities;
	for (i = 0;i < table->nb_entities;i++) {
	    cur = &table->table[i];
	    if (!xmlStrcmp(cur->name, name)) return(cur);
	}
    }
    return(NULL);
}

/**
 * xmlGetDocEntity:
 * @doc:  the document referencing the entity
 * @name:  the entity name
 *
 * Do an entity lookup in the document entity hash table and
 * returns the corrsponding entity, otherwise a lookup is done
 * in the predefined entities too.
 * 
 * Returns A pointer to the entity structure or NULL if not found.
 */
xmlEntityPtr
xmlGetDocEntity(xmlDocPtr doc, const CHAR *name) {
    int i;
    xmlEntityPtr cur;
    xmlEntitiesTablePtr table;

    if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
	table = (xmlEntitiesTablePtr) doc->intSubset->entities;
	for (i = 0;i < table->nb_entities;i++) {
	    cur = &table->table[i];
	    if (!xmlStrcmp(cur->name, name)) return(cur);
	}
    }
    if (xmlPredefinedEntities == NULL)
        xmlInitializePredefinedEntities();
    table = xmlPredefinedEntities;
    for (i = 0;i < table->nb_entities;i++) {
	cur = &table->table[i];
	if (!xmlStrcmp(cur->name, name)) return(cur);
    }

    return(NULL);
}

/*
 * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
 *                  | [#x10000-#x10FFFF]
 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
 */
#define IS_CHAR(c)							\
    (((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) ||			\
     (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))

/*
 * A buffer used for converting entities to their equivalent and back.
 *
 * TODO: remove this, this helps performances but forbid reentrancy in a 
 *       stupid way.
 */
static int buffer_size = 0;
static CHAR *buffer = NULL;

void growBuffer(void) {
    buffer_size *= 2;
    buffer = (CHAR *) realloc(buffer, buffer_size * sizeof(CHAR));
    if (buffer == NULL) {
        perror("realloc failed");
        exit(1);
    }
}


/**
 * xmlEncodeEntities:
 * @doc:  the document containing the string
 * @input:  A string to convert to XML.
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 *
 * TODO !!!! Once moved to UTF-8 internal encoding, the encoding of non-ascii
 *           get erroneous.
 *
 * TODO This routine is not reentrant, the interface
 *      should not be modified though.
 *
 * People must migrate their code to xmlEncodeEntitiesReentrant !
 * 
 * Returns A newly allocated string with the substitution done.
 */
const CHAR *
xmlEncodeEntities(xmlDocPtr doc, const CHAR *input) {
    const CHAR *cur = input;
    CHAR *out = buffer;

    if (input == NULL) return(NULL);
    if (buffer == NULL) {
        buffer_size = 1000;
        buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
	if (buffer == NULL) {
	    perror("malloc failed");
            exit(1);
	}
	out = buffer;
    }
    while (*cur != '\0') {
        if (out - buffer > buffer_size - 100) {
	    int index = out - buffer;

	    growBuffer();
	    out = &buffer[index];
	}

	/*
	 * By default one have to encode at least '<', '>', '"' and '&' !
	 */
	if (*cur == '<') {
	    *out++ = '&';
	    *out++ = 'l';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '>') {
	    *out++ = '&';
	    *out++ = 'g';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '&') {
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'm';
	    *out++ = 'p';
	    *out++ = ';';
	} else if (*cur == '"') {
	    *out++ = '&';
	    *out++ = 'q';
	    *out++ = 'u';
	    *out++ = 'o';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '\'') {
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'p';
	    *out++ = 'o';
	    *out++ = 's';
	    *out++ = ';';
	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
	    (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
	    /*
	     * default case, just copy !
	     */
	    *out++ = *cur;
#ifndef USE_UTF_8
	} else if ((sizeof(CHAR) == 1) && (*cur >= 0x80)) {
	    char buf[10], *ptr;
#ifdef HAVE_SNPRINTF
	    snprintf(buf, 9, "&#%d;", *cur);
#else
	    sprintf(buf, "&#%d;", *cur);
#endif
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
#endif
	} else if (IS_CHAR(*cur)) {
	    char buf[10], *ptr;

#ifdef HAVE_SNPRINTF
	    snprintf(buf, 9, "&#%d;", *cur);
#else
	    sprintf(buf, "&#%d;", *cur);
#endif
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
	}
#if 0
	else {
	    /*
	     * default case, this is not a valid char !
	     * Skip it...
	     */
	    fprintf(stderr, "xmlEncodeEntities: invalid char %d\n", (int) *cur);
	}
#endif
	cur++;
    }
    *out++ = 0;
    return(buffer);
}

/*
 * Macro used to grow the current buffer.
 */
#define growBufferReentrant() {						\
    buffer_size *= 2;							\
    buffer = (CHAR *) realloc(buffer, buffer_size * sizeof(CHAR));	\
    if (buffer == NULL) {						\
	perror("realloc failed");					\
	exit(1);							\
    }									\
}


/**
 * xmlEncodeEntitiesReentrant:
 * @doc:  the document containing the string
 * @input:  A string to convert to XML.
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
 * must be deallocated.
 *
 * TODO !!!! Once moved to UTF-8 internal encoding, the encoding of non-ascii
 *           get erroneous.
 *
 * Returns A newly allocated string with the substitution done.
 */
CHAR *
xmlEncodeEntitiesReentrant(xmlDocPtr doc, const CHAR *input) {
    const CHAR *cur = input;
    CHAR *buffer = NULL;
    CHAR *out = NULL;
    int buffer_size = 0;

    if (input == NULL) return(NULL);

    /*
     * allocate an translation buffer.
     */
    buffer_size = 1000;
    buffer = (CHAR *) malloc(buffer_size * sizeof(CHAR));
    if (buffer == NULL) {
	perror("malloc failed");
	exit(1);
    }
    out = buffer;

    while (*cur != '\0') {
        if (out - buffer > buffer_size - 100) {
	    int index = out - buffer;

	    growBufferReentrant();
	    out = &buffer[index];
	}

	/*
	 * By default one have to encode at least '<', '>', '"' and '&' !
	 */
	if (*cur == '<') {
	    *out++ = '&';
	    *out++ = 'l';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '>') {
	    *out++ = '&';
	    *out++ = 'g';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '&') {
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'm';
	    *out++ = 'p';
	    *out++ = ';';
	} else if (*cur == '"') {
	    *out++ = '&';
	    *out++ = 'q';
	    *out++ = 'u';
	    *out++ = 'o';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '\'') {
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'p';
	    *out++ = 'o';
	    *out++ = 's';
	    *out++ = ';';
	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
	    (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
	    /*
	     * default case, just copy !
	     */
	    *out++ = *cur;
#ifndef USE_UTF_8
	} else if ((sizeof(CHAR) == 1) && (*cur >= 0x80)) {
	    char buf[10], *ptr;
#ifdef HAVE_SNPRINTF
	    snprintf(buf, 9, "&#%d;", *cur);
#else
	    sprintf(buf, "&#%d;", *cur);
#endif
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
#endif
	} else if (IS_CHAR(*cur)) {
	    char buf[10], *ptr;

#ifdef HAVE_SNPRINTF
	    snprintf(buf, 9, "&#%d;", *cur);
#else
	    sprintf(buf, "&#%d;", *cur);
#endif
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
	}
#if 0
	else {
	    /*
	     * default case, this is not a valid char !
	     * Skip it...
	     */
	    fprintf(stderr, "xmlEncodeEntities: invalid char %d\n", (int) *cur);
	}
#endif
	cur++;
    }
    *out++ = 0;
    return(buffer);
}

/**
 * xmlCreateEntitiesTable:
 *
 * create and initialize an empty entities hash table.
 *
 * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
 */
xmlEntitiesTablePtr
xmlCreateEntitiesTable(void) {
    xmlEntitiesTablePtr ret;

    ret = (xmlEntitiesTablePtr) 
         malloc(sizeof(xmlEntitiesTable));
    if (ret == NULL) {
        fprintf(stderr, "xmlCreateEntitiesTable : malloc(%ld) failed\n",
	        (long)sizeof(xmlEntitiesTable));
        return(NULL);
    }
    ret->max_entities = XML_MIN_ENTITIES_TABLE;
    ret->nb_entities = 0;
    ret->table = (xmlEntityPtr ) 
         malloc(ret->max_entities * sizeof(xmlEntity));
    if (ret == NULL) {
        fprintf(stderr, "xmlCreateEntitiesTable : malloc(%ld) failed\n",
	        ret->max_entities * (long)sizeof(xmlEntity));
	free(ret);
        return(NULL);
    }
    return(ret);
}

/**
 * xmlFreeEntitiesTable:
 * @table:  An entity table
 *
 * Deallocate the memory used by an entities hash table.
 */
void
xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
    int i;

    if (table == NULL) return;

    for (i = 0;i < table->nb_entities;i++) {
        xmlFreeEntity(&table->table[i]);
    }
    free(table->table);
    free(table);
}

/**
 * xmlCopyEntitiesTable:
 * @table:  An entity table
 *
 * Build a copy of an entity table.
 * 
 * Returns the new xmlEntitiesTablePtr or NULL in case of error.
 */
xmlEntitiesTablePtr
xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
    xmlEntitiesTablePtr ret;
    xmlEntityPtr cur, ent;
    int i;

    ret = (xmlEntitiesTablePtr) malloc(sizeof(xmlEntitiesTable));
    if (ret == NULL) {
        fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
	return(NULL);
    }
    ret->table = (xmlEntityPtr) malloc(table->max_entities *
                                         sizeof(xmlEntity));
    if (ret->table == NULL) {
        fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
	free(ret);
	return(NULL);
    }
    ret->max_entities = table->max_entities;
    ret->nb_entities = table->nb_entities;
    for (i = 0;i < ret->nb_entities;i++) {
	cur = &ret->table[i];
	ent = &table->table[i];
	cur->len = ent->len;
	cur->type = ent->type;
	if (ent->name != NULL)
	    cur->name = xmlStrdup(ent->name);
	else
	    cur->name = NULL;
	if (ent->ExternalID != NULL)
	    cur->ExternalID = xmlStrdup(ent->ExternalID);
	else
	    cur->ExternalID = NULL;
	if (ent->SystemID != NULL)
	    cur->SystemID = xmlStrdup(ent->SystemID);
	else
	    cur->SystemID = NULL;
	if (ent->content != NULL)
	    cur->content = xmlStrdup(ent->content);
	else
	    cur->content = NULL;
	if (ent->orig != NULL)
	    cur->orig = xmlStrdup(ent->orig);
	else
	    cur->orig = NULL;
    }
    return(ret);
}

/**
 * xmlDumpEntitiesTable:
 * @buf:  An XML buffer.
 * @table:  An entity table
 *
 * This will dump the content of the entity table as an XML DTD definition
 */
void
xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
    int i;
    xmlEntityPtr cur;

    if (table == NULL) return;

    for (i = 0;i < table->nb_entities;i++) {
        cur = &table->table[i];
        switch (cur->type) {
	    case XML_INTERNAL_GENERAL_ENTITY:
	        xmlBufferWriteChar(buf, "<!ENTITY ");
		xmlBufferWriteCHAR(buf, cur->name);
		xmlBufferWriteChar(buf, " ");
		if (cur->orig != NULL)
		    xmlBufferWriteQuotedString(buf, cur->orig);
		else
		    xmlBufferWriteQuotedString(buf, cur->content);
		xmlBufferWriteChar(buf, ">\n");
	        break;
	    case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
	        xmlBufferWriteChar(buf, "<!ENTITY ");
		xmlBufferWriteCHAR(buf, cur->name);
		if (cur->ExternalID != NULL) {
		     xmlBufferWriteChar(buf, " PUBLIC ");
		     xmlBufferWriteQuotedString(buf, cur->ExternalID);
		     xmlBufferWriteChar(buf, " ");
		     xmlBufferWriteQuotedString(buf, cur->SystemID);
		} else {
		     xmlBufferWriteChar(buf, " SYSTEM ");
		     xmlBufferWriteQuotedString(buf, cur->SystemID);
		}
		xmlBufferWriteChar(buf, ">\n");
	        break;
	    case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
	        xmlBufferWriteChar(buf, "<!ENTITY ");
		xmlBufferWriteCHAR(buf, cur->name);
		if (cur->ExternalID != NULL) {
		     xmlBufferWriteChar(buf, " PUBLIC ");
		     xmlBufferWriteQuotedString(buf, cur->ExternalID);
		     xmlBufferWriteChar(buf, " ");
		     xmlBufferWriteQuotedString(buf, cur->SystemID);
		} else {
		     xmlBufferWriteChar(buf, " SYSTEM ");
		     xmlBufferWriteQuotedString(buf, cur->SystemID);
		}
		if (cur->content != NULL) { /* Should be true ! */
		    xmlBufferWriteChar(buf, " NDATA ");
		    if (cur->orig != NULL)
			xmlBufferWriteCHAR(buf, cur->orig);
		    else
			xmlBufferWriteCHAR(buf, cur->content);
		}
		xmlBufferWriteChar(buf, ">\n");
	        break;
	    case XML_INTERNAL_PARAMETER_ENTITY:
	        xmlBufferWriteChar(buf, "<!ENTITY % ");
		xmlBufferWriteCHAR(buf, cur->name);
		xmlBufferWriteChar(buf, " ");
		if (cur->orig == NULL)
		    xmlBufferWriteQuotedString(buf, cur->content);
		else
		    xmlBufferWriteQuotedString(buf, cur->orig);
		xmlBufferWriteChar(buf, ">\n");
	        break;
	    case XML_EXTERNAL_PARAMETER_ENTITY:
	        xmlBufferWriteChar(buf, "<!ENTITY % ");
		xmlBufferWriteCHAR(buf, cur->name);
		if (cur->ExternalID != NULL) {
		     xmlBufferWriteChar(buf, " PUBLIC ");
		     xmlBufferWriteQuotedString(buf, cur->ExternalID);
		     xmlBufferWriteChar(buf, " ");
		     xmlBufferWriteQuotedString(buf, cur->SystemID);
		} else {
		     xmlBufferWriteChar(buf, " SYSTEM ");
		     xmlBufferWriteQuotedString(buf, cur->SystemID);
		}
		xmlBufferWriteChar(buf, ">\n");
	        break;
	    default:
	        fprintf(stderr,
		    "xmlDumpEntitiesTable: internal: unknown type %d\n",
		        cur->type);
	}
    }
}

Webmaster