/*
 * $Id: Resolver.java,v 1.1.1.1 2002/09/30 15:08:51 smartine Exp $
 * Copyright (C) 1999-2000 David Brownell
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package xml;

import java.io.*;

import java.util.Dictionary;
import java.util.Hashtable;

import org.xml.sax.*;


// $Id: Resolver.java,v 1.1.1.1 2002/09/30 15:08:51 smartine Exp $

/**
 * Utility implementation of a SAX resolver, which can be used to improve
 * network utilization of SAX based XML (especially XHTML) components.
 *
 * @version $Date: 2002/09/30 15:08:51 $
 */
public class Resolver implements EntityResolver, Cloneable
{
    /**
     * Table pairing the SGML FPIs for XHTML 1.0 DTD components with the
     * filenames from any copy of that spec (so we can resolve to such a
     * local copy, and avoid issues of network access and latency).
     *
     * <p> NOTE -- this should eventually get supplanted by a catalog
     * facility of some kind.
     */
    private static final String	xhtmlPubidMap [][] = {
	    // NOTE:  These IDs are synced up with the XHTML 1.0 REC

	// usage profiles for the xhtml namespace
	{ "-//W3C//DTD XHTML 1.0 Strict//EN", "xhtml1-strict.dtd" },
	{ "-//W3C//DTD XHTML 1.0 Transitional//EN", "xhtml1-transitional.dtd" },
	{ "-//W3C//DTD XHTML 1.0 Frameset//EN", "xhtml1-frameset.dtd" },

	// entities shared between all those usage profiles
	{ "-//W3C//ENTITIES Latin 1 for XHTML//EN", "xhtml-lat1.ent" },
	{ "-//W3C//ENTITIES Special for XHTML//EN", "xhtml-special.ent" },
	{ "-//W3C//ENTITIES Symbols for XHTML//EN", "xhtml-symbol.ent" }
    };


    /**
     * Constructs a resolver that uses the specified local directory
     * to cache the XHTML 1.0 DTD components.
     *
     * @param directoryPath pathname to a copy of those DTD components
     */
    public static Resolver createXhtmlResolver (String directoryPath)
    throws IOException
    {
	File		directory = new File (directoryPath);
	Hashtable	table = new Hashtable (xhtmlPubidMap.length * 2);

	addDirectoryMapping (table, xhtmlPubidMap, directory);
	return new Resolver (table);
    }


    /**
     * Updates a dictionary used to map PUBLIC identifiers to file names,
     * so that it uses the mappings in a specified directory.
     *
     * @param mappings Array of string pairs, where the first member
     *	of each pair is a PUBLIC identifier and the second is the
     *	name of a file, relative to the specified directory.
     * @param directory File holding the specified files.
     */
    public static void addDirectoryMapping (
	Dictionary	table,
	String		mappings [][],
	File		directory
    ) throws IOException
    {
	for (int i = 0; i < mappings.length; i++) {
	    File	file = new File (directory, mappings [i][1]);
	    String	temp;

	    if (!file.exists ())	// ?? log a warning ??
		continue;

	    temp = fileToURL (file);
	    table.put (mappings [i][0], temp);
	}
    }

	// XXX these *URL routines don't quite belong here, except
	// that they're all in the same spirit of making it easy to
	// use local filesystem URIs with XML parsers.

    /**
     * Provides the URL for a named file, without relying on the JDK 1.2
     * {@link java.io.File#toURL File.toURL}() utility method.
     *
     * @param filename the file name to convert.  Relative file names
     *	are resolved the way the JVM resolves them (current to the
     *	process-global current working directory).
     *
     * @exception IOException if the file does not exist
     */
    public static String fileNameToURL (String filename)
    throws IOException
    {
	return fileToURL (new File (filename));
    }

    /**
     * Provides the URL for a file, without relying on the JDK 1.2
     * {@link java.io.File#toURL File.toURL}() utility method.
     *
     * @param f the file to convert.  Relative file names
     *	are resolved the way the JVM resolves them (current to the
     *	process-global current working directory).
     *
     * @exception IOException if the file does not exist
     */
    public static String fileToURL (File f)
    throws IOException
    {
	String	temp;

	if (!f.exists ())
	    throw new IOException ("no such file: " + f.getName ());

	    // XXX getAbsolutePath() seems buggy; I'm seeing components
	    // like "/foo/../" which are clearly not "absolute"
	    // and should have been resolved with the filesystem.

	    // Substituting "/" would be wrong, "foo" may have been
	    // symlinked ... the URL code will make that change
	    // later, so that things can get _really_ broken!

	temp = f.getAbsolutePath ();

	if (File.separatorChar != '/')
	    temp = temp.replace (File.separatorChar, '/');
	if (!temp.startsWith ("/"))
	    temp = "/" + temp;
	if (!temp.endsWith ("/") && f.isDirectory ())
	    temp = temp + "/";
	return "file:" + temp;
    }


    /**
     * Returns a URL string.  Note that if a malformed URL is provided, or
     * the parameter names a nonexistent file, the resulting URL may be
     * malformed.
     *
     * @param fileOrURL If this is the name of a file which exists,
     *	then its URL is returned.  Otherwise the argument is returned.
     */
    public static String getURL (String fileOrURL)
    {
	try {
	    return fileNameToURL (fileOrURL);
	} catch (Exception e) {
	    return fileOrURL;
	}
    }



    // note:  cloneable, this is just copied; unguarded against mods
    private Dictionary		pubidMapping;

    /**
     * Constructs a resolver which understands how to map PUBLIC identifiers
     * to other URIs, typically for local copies of standard DTD components.
     * 
     * @param dictionary maps PUBLIC identifiers to URIs.  This is not
     *	copied; subsequent modifications will be reported through the
     *	resolution operations.
     */
    public Resolver (Dictionary dict)
	{ pubidMapping = dict; }

    
    // XXX want notion of a "system default" resolver, presumably
    // loaded with all sorts of useful stuff.  At the same time need
    // a notion of resolver chaining (failure --> next) so that subsystems
    // can set up things that won't interfere with other ones.

    /**
     * This parses most MIME content type strings that have <em>charset=...</em>
     * encoding declarations to and returns the specified encoding.  This
     * conforms to RFC 2376, and is useful when constructing InputSource
     * objects from URLConnection objects or other objects using MIME
     * content typing.
     *
     * @param contentType the MIME content type that will be parsed; must
     *	not be null.
     */
    static public String getEncoding (String contentType)
    {
	// currently a dumb parsing algorithm that works "mostly" and handles
	//	..anything...charset=ABC
	//	..anything...charset=ABC;otherAttr=DEF
	//	..anything...charset=ABC (comment);otherAttr=DEF
	//	..anything...charset= "ABC" (comment);otherAttr=DEF

	int	temp;
	String	encoding;
	String	defValue = null;

	if (contentType.startsWith ("text/"))
	    defValue = contentType.startsWith ("text/html")
		    ? "ISO-8859-1" : "US-ASCII";

	// Assumes 'charset' is only an attribute name, not part
	// of a value, comment, or other attribute name
	// ALSO assumes no escaped values like "\;" or "\)"
	if ((temp = contentType.indexOf ("charset")) != -1) {
	    // strip out everything up to '=' ...
	    temp = contentType.indexOf ('=', temp);
	    if (temp == -1)
		return defValue;
	    encoding = contentType.substring (temp + 1);
	    // ... and any subsequent attributes
	    if ((temp = encoding.indexOf (';')) != -1)
		encoding = encoding.substring (0, temp);
	    // ... and any comments after value
	    if ((temp = encoding.indexOf ('(')) != -1)
		encoding = encoding.substring (0, temp);
	    // ... then whitespace, and any (double) quotes
	    encoding = encoding.trim ();
	    if (encoding.charAt (0) == '"')
		encoding = encoding.substring (1, encoding.length () - 1);
	} else
	    encoding = defValue;
	return encoding;
    }


    /**
     * Uses a local dictionary of public identifiers to resolve URIs,
     * normally with the goal of minimizing network traffic or latencies.
     */
    public InputSource resolveEntity (String pubid, String sysid)
    throws IOException, SAXException
    {
	InputSource	retval = null;
	String		uri;

	if (pubid != null
		&& ((uri = (String) pubidMapping.get (pubid)) != null)) {
	    retval = new InputSource (uri);
	    retval.setPublicId (pubid);
	}

	// Should do URN resolution here

	// URL resolution always done by parser though

	return retval;
    }
}
