"""

This module provides some classes to inherit from which provide some
simple tools for writing input processors (parsers, de-serializers) or
output processors (generators, serializers).

Our model is that there is an object created to manage the one
parse/generation task.  If you want to parse three files, you have
three Parser objects (unless there's some continuity between them).

We assume the data is going into/out-of an RDF graph, but it doesn't
really have to be the case.

"""

import sys
import re
import urllib2
import cStringIO

import rdflib
import qname

#
# Stream Utils
#

uriSchemePattern = re.compile(r"""^([a-zA-Z_0-9]*):""")

def ensure_open_source(source):
    """Given various possibilities for how you might want to provide
    some input data, return a readable & closeable stream for it.

    Filenames and URLs are distinguished from input data by the
    presense of one or more newlines.   That's kind a hack, isn't it?
    Heh.   Maybe that should come out.

    """

    if hasattr(source, "read") and hasattr(source, "close"):
        return source
    if source.find("\n") >= 0:
        return cStringIO.StringIO(source)
    if uriSchemePattern.match(source):
        return urllib2.urlopen(source)
    else:
        return open(source, "r")
    
def ensure_open_sink(sink):
    """Given a stream-like object, or a filename, or a URL, return an
    open stream.   This lets people be more casual when calling us."""   
    if hasattr(sink, "write") and hasattr(sink, "close"):
        return sink
    return open(sink, "w")

def default_importer(source):
    raise Error

def _init(self, kwargs, r_or_w, default_stream):
    """ make sure self.graph, .map, and .stream are set up, from
    kwargs or defaults.
    """

    self.__dict__.update(kwargs)

    # Set up a _graph, in case they want one...
    try:
        self._graph = kwargs['graph']
    except KeyError:
        self._graph = rdflib.ConjunctiveGraph()

    # Set up a _map, in case they want one...
    try:
        self._map = kwargs['map']
    except KeyError:
        self._map = qname.Map(defaults=[qname.common])

    # Set up a _stream, in case they want one...
    try:
        self._stream = kwargs['stream']
    except KeyError:
        try:
            filename = kwargs['filename']
            if r_or_w == "r":
                self._stream = ensure_open_source(filename)
            elif r_or_w == "w":
                self._stream = ensure_open_sink(filename)
            else:
                raise Error, ('r_or_w not r or w, but ', r_or_w)
        except KeyError:
            self._stream = default_stream


class Parser (object) :    # DESERIALIZER
    """

    your init should be:

    def __init__(self, **kwargs):
        super(YOURCLASS, self).__init__(**kwargs)   ?
        # any code you want, here.

    """

    def __init__(self, **kwargs):
        _init(self, kwargs, "r", sys.stdin)
        

class Generator (object) :        # SERIALIZER
    """

    your init should be:

    def __init__(self, **kwargs):
        super(YOURCLASS, self).__init__(**kwargs)   ?
        # any code you want, here.

    """

    def __init__(self, **kwargs):
        # print >>sys.stderr, "GEN running"
        _init(self, kwargs, "w", sys.stdout)
        
    def write(self, *obj):
        self._stream.write(" ".join([str(x) for x in obj]))





# here's a subclass which knows its data is a 'Model', not a graph.
#
#  class ModelGenerator ( Generator ) :
#  
#      def __init__(self, **kwargs):
#          super(ModelGenerator, self).__init__(**kwargs)
#          # print >>sys.stderr, "MoGEN running"
#          self.model = getattr(self, "model", objectmodel.ObjectModel())
#          
#      def writeModel(self, out, model):
#          self.stream = out
#          self.model = model
#          self.run()
#  
#  


class xmlStream:

    def __init__(self, generator):
        self._generator = generator
        self._stack = []

    def begin(self, elementIRI):   #  @@ attributes, too!
        gen = self._generator
        #   need to do on-demand namespace additions here
        self.indent()
        gen.write("<" + gen._map.qname(elementIRI) + ">")
        self._stack.append(elementIRI)

    def end(self):
        gen = self._generator
        elementIRI = self._stack.pop()
        self.indent()
        gen.write("</" + gen._map.qname(elementIRI) + ">")

    def text(self, text):
        gen = self._generator
        gen.write("\n", "  "*len(self._stack))    ## NOT WHITESPACE PRESERVING
        gen.write(text)    # @@@ escaping!!

    def indent(self):
        gen = self._generator
        gen.write("\n"+("  "*(len(self._stack))))

# A very very very simple (yet sufficient!) minidom.  :-)

class XMLElement:
    def __init__(self):
        self.tagIRI = None
        self.attributes = { }
        self.children = []
    def serializeTo(self, stream, map, indent="", allowMultiLine=True,
                    isRoot=False):
        '''
        in singleline mode, outputs like
              "<foo>stuff</foo>"
        in multiline mode, outputs like
              "<foo>\n        <bar/>\n      </foo>\n    "
                                        ^indent+2    ^indent
        so in any case the caller should NOT pad it at all.

        the indent is whatever we will need to print after the
        close tag, in multiline mode
        '''
        tag = map.qname(self.tagIRI)
        multiline = allowMultiLine and self.multiline()
        stream.write('<'+tag)
        for (key, value) in self.attributes.iteritems():
            if multiline and len(value) > 40:
                stream.write('\n      '+indent)
            if key.startswith('xml:'):
                keyname = key
            else:
                keyname=map.qname(key)
            stream.write(' '+keyname+'="')
            stream.write(xmlAttrEscaped(value))
            stream.write('"')
        if isRoot: # define xmlns stuff...
            for (key, value) in map.iteritems():
                if key:
                    key = "xmlns:"+key
                else:
                    key = "xmlns"
                stream.write('\n      '+indent+key+'="')
                stream.write(xmlAttrEscaped(value))
                stream.write('"')
        if not self.children:
            stream.write('/>')
            if multiline:
                stream.write('\n'+indent)
            return
        stream.write('>')
        for child in self.children:
            if isinstance(child, basestring):
                stream.write(xmlContentEscaped(child))
            else:
                childIndent = indent+"    "
                if multiline:
                    stream.write("\n"+childIndent)
                child.serializeTo(stream, map, childIndent, multiline)
        if multiline:
            stream.write('\n'+indent)
        stream.write('</'+tag+'>')
        #if multiline:
        #    stream.write('\n'+indent)
    def multiline(self):
        '''Will there be any line breaks in the serialization of this
           element?
           '''
        multilineElements = 0
        singlelineElements = 0
        for child in self.children:
            assert child != self
            if isinstance(child, basestring):
                return False # never safe to break elements with text in them
            else:
                if child.multiline():
                    multilineElements += 1
                else:
                    singlelineElements += 1
        if multilineElements > 0:
            return True
        for value in self.attributes.values():
            if len(value) > 40:
                return True
        if singlelineElements >= 2:
            return True
        return False

def xmlAttrEscaped(s):
    return s

def xmlContentEscaped(s):
    return s

class XMLTree:    # should be called XMLDocument?   XMLWriter?
    '''
    >>> import langio2
    >>> from sys import stdout
    >>> xml = langio2.XMLTree()

    Output an empty tree:
    
    >>> xml.serializeTo(stdout)
    <?xml version='1.0' encoding='UTF-8'?>
    <!-- empty document -->

    SINGLE ELEMENT DOCUMENT

    >>> xml.begin('http://example.com/#Document')
    >>> xml.serializeTo(stdout)
    <?xml version='1.0' encoding='UTF-8'?>
    <ns1:Document
          xmlns:ns1="http://example.com/#"/>

    ADD A NAMESPACE MAP
        
    >>> map = qname.Map(defaults=[qname.common])
    >>> map.main = "http://example.com/#"
    >>> map.alt = "http://example.org/#"
    >>> xml.serializeTo(stdout, map=map)
    <?xml version='1.0' encoding='UTF-8'?>
    <main:Document
          xmlns:alt="http://example.org/#"
          xmlns:main="http://example.com/#"/>

    ADD A DEFAULT NAMESPACE          

    >>> map.bind('', map.main)
    >>> xml.serializeTo(stdout, map=map)
    <?xml version='1.0' encoding='UTF-8'?>
    <Document
          xmlns="http://example.com/#"
          xmlns:alt="http://example.org/#"
          xmlns:main="http://example.com/#"/>
    
    >>> xml.begin(map.uri('alt:metadata'))
    >>> xml.begin(map.uri('alt:author'))
    >>> xml.addText('Jim White')
    >>> xml.end()
    >>> xml.begin(map.uri('alt:author'))
    >>> xml.begin(map.uri('alt:firstName'))
    >>> xml.addText('Bobby')
    >>> xml.end()
    >>> xml.begin(map.uri('alt:lastName'))
    >>> xml.addText('Blue')
    >>> xml.end()
    >>> xml.end()
    >>> xml.begin(map.uri('alt:author'))
    >>> xml.begin(map.uri('alt:impressive'))
    >>> xml.addText('Fred ')
    >>> xml.addText('Red')
    >>> xml.end()
    >>> xml.end()
    >>> xml.end()
    >>> xml.begin('http://example.com/#maindata')
    >>> xml.serializeTo(stdout, map=map)
    <?xml version='1.0' encoding='UTF-8'?>
    <Document
          xmlns="http://example.com/#"
          xmlns:alt="http://example.org/#"
          xmlns:main="http://example.com/#">
        <alt:metadata>
            <alt:author>Jim White</alt:author>
            <alt:author>
                <alt:firstName>Bobby</alt:firstName>
                <alt:lastName>Blue</alt:lastName>
            </alt:author>
            <alt:author><alt:impressive>Fred Red</alt:impressive></alt:author>
        </alt:metadata>
        <maindata/>
    </Document>



    '''
    def __init__(self):
        self._cursor = None
        self._stack = []
        self._root = None
    def begin(self, tagIRI):
        new = XMLElement()
        if self._root is None:
            self._root = new
        new.tagIRI = tagIRI
        if self._cursor:
            self._cursor.children.append(new)
            self._stack.append(self._cursor)
        self._cursor = new
    def addAttribute(self, attributeIRI, value):
        self._cursor.attributes[attributeIRI] = value
    def addText(self, text):
        self._cursor.children.append(text)
    def end(self):
        try:
            self._cursor = self._stack.pop()
        except IndexError:
            print >>sys.stderr, 'stack underflow'
    def serializeTo(self, stream, map=None):
        if not map:
            map = qname.Map(defaults=[qname.common])
        stream.write("<?xml version='1.0' encoding='UTF-8'?>\n")
        if self._root is None:
            stream.write("<!-- empty document -->\n")
        else:
            self._root.serializeTo(stream, map, isRoot=True)
    def serializeInHTML(self, stream):
        """output the XML with line numbers, hyperlinks, color coding
        and such, for when you want to show people the XML on a web page
        """
        pass

if __name__ == "__main__":
    import doctest, sys
    doctest.testmod(sys.modules[__name__])
