""" Read/Write instances in stripe-skipping XML

The idea of stripe-skipping is based on"fully-striped" XML, which is
RDF/XML where the only attribute is rdf:parseType="Collection".
(and maybe there are some questions about URI & data types)

Stripe-skipping means you omit some XML element -- skipping directly
to their child elements -- when the element name gives us redundant
information and is not needed for grouping.

see also http://www.w3.org/2000/10/swap/StripeSkippingParser.py
another previous attempt was xmli.py



http://docs.python.org/lib/dom-node-objects.html


"""

__version__ = "$Revision: 1.5 $"
# $Source: /sources/public/2007/asn/ssxml.py,v $

from sys import stderr
import sys
from xml.dom.minidom import parse

import rdflib

import qname
import opendata.loader

class Error(Exception):
    pass

class IgnoreThisElement(Exception):
    pass

class Parser:
    """

    TOTALLY BROKEN -- IN PROCESS FROM FULLY-STRIPED TO STRIPE-SKIPPING....

    [ Yes, this is kind of like a validating XML parser... ]
    
    >>> import opendata
    >>> model = opendata.loader.load("test-data/poscond2.asn")
    >>> parser = opendata.ssxml.Parser(model)

    >>> stream = open("test-data/poscond-m3.xml", "r")
    >>> root = parser.load(stream)

    x>>> p.graph.serialize("out.rdf", format="pretty-xml")
    x>>> p.graph.serialize("out.nt", format="nt")

    """

    def __init__(self, model):
        self.indent = 0
        self.graph = rdflib.ConjunctiveGraph()
        self.map = qname.Map()
        self.map.defaults = [ qname.common ]
        self.model = model

    def load(self, input):
        self.dom = parse(input)
        #  self._printElement(self.dom.documentElement)
        value = self.get_value(self.dom.documentElement)
        return value

    def _printElement(self, e, prefix = ""):
        " just something I use for internal diagnostics "
        if e.nodeType == e.ELEMENT_NODE:
            print >>stderr,  prefix+e.localName,
            if e.namespaceURI:
                print >>stderr,  "("+e.namespaceURI+")"
            else:
                print >>stderr 
            for child in e.childNodes:
                self._printElement(child, prefix+"|    ")
        elif e.nodeType == e.TEXT_NODE:
            if e.data.strip():
                print >>stderr,  prefix+repr(e.data)
        else:
            print >>stderr,  prefix+"unknown node type"

    def get_datavalue(self, e, property):

        value = e.data

        (ns, local) = qname.uri_split(property.to)
        if property.multi or property.list:
            raise Error("multi/list data values not supported")

        if ns == "http://www.w3.org/2001/XMLSchema#":
        
            if local == "string":
                return e.data

            if local == "int":
                return int(e.data)

            raise Error("xsd:%s not implemented yet, sorry" % local)
        
        if value.strip():
            raise Error("text '%s' when expecting a %s" %
                        value, self.map.qname(clsuri))
        else:
            raise IgnoreThisElement()

    def get_value(self, e, property=None):

        print >>stderr, "get_value, class:", self.map.qname(getattr(property, "to", "x#y"))

        if e.nodeType == e.TEXT_NODE:
            return self.get_datavalue(e, property)

        if e.nodeType == e.ELEMENT_NODE:

            print >>stderr,  "   tag:", e.nodeName
            
            #if e.localName == "List":
            #    return self.listValue(e)
            
            type = rdflib.URIRef(e.namespaceURI + e.localName)

            if property and type == property.to:
                return self.get_instance(e, type)

            me = None

            for child in e.childNodes:
                uri = self.givesURI(child)
                if uri:
                    me = rdflib.URIRef(uri)
            if not me:
                me = rdflib.BNode()

            self.graph.add((me, rdflib.RDF.type, type))
            for child in e.childNodes:
                if not self.givesURI(child):
                    result = self.pv(child)
                    if result:
                        (prop, value) = result
                        self.graph.add((me, prop, value))

            return me
        
        raise RuntimeError('looking for value, got non-text, non-element')

    def givesURI(self, e):
        if e.nodeType == e.ELEMENT_NODE:
            if e.localName == "uri":     # @@ namespace?
                return self.children_as_string(e)
            if e.localName == "qname":
                q = self.children_as_string(e)
                raise RuntimeError('not implemented yet')
        
    def pv(self, e):
        if e.nodeType == e.TEXT_NODE:
            if e.data.strip():
                raise Error("text '%s' where property element expected"
                            % e.data)
            else:
                return
        print >>stderr, "looking for property at "+e.nodeName
        if e.nodeType == e.ELEMENT_NODE:
            prop = rdflib.URIRef(e.namespaceURI + e.localName)
            node = self.model.get_by_uri(prop)
            ## is this node the expected property, or did we skip
            ## something?....  Ummmm.

            try:
                text = self.children_as_string(e)
                return (prop, Literal(text))
            except AttributeError:   # missing "data" attribute
                pass 

            # there should only be one child -- a type --
            # unless the type is being skipped.
            ####assert(len(e.childNodes) == 1)

            for child in e.childNodes:
                value = self.get_value(child, property)
                print >>stderr, "  got value:  "+repr(value)
                if not value:
                    continue
                return (prop, value)
            self._printElement(e)
            raise RuntimeError('property %s has no usable content'
                               % repr(e.localName))

        raise RuntimeError('unexpect xml structure')

    def children_as_string(self, e):
        # doesn't check types -- dom should tell us if we're wrong
        return "".join([child.data for child in e.childNodes])

    def listValue(self, e):

        # @@@ doesn't handle empty lists
        prev = None
        first = rdflib.RDF.nil
        for child in e.childNodes:
            value = self.value(child, text_ok = False)
            if not value:
                continue
            here = rdflib.BNode()
            if first == rdflib.RDF.nil:
                first = here
            if prev:
                pass
                self.graph.add((prev, rdflib.RDF.rest, here))
            self.graph.add((here, rdflib.RDF.first, value))
            prev = here
        self.graph.add((prev, rdflib.RDF.rest, RDF.nil))
        return first

class Generator:
    """

    # rather tedious setup for now....

    >>> import rdflib
    >>> graph = rdflib.ConjunctiveGraph()
    >>> dummy = graph.parse('test-data/books-eg.rdf')
    >>> root = u'http://example.net/my_stuff#bk1'

    >>> import opendata.loader
    >>> model = opendata.loader.load('test-data/books.asn')

    >>> import opendata.ssxml
    >>> g = Generator(graph, model)
    >>> g.map.bind('', 'http://www.w3.org/2007/01/ss-example#')

    # but.... maybe it'll work?  :-)

    >>> import sys

    x>>> g.serializeDocument(sys.stdout, root)

    x>>> test_poscond()
    
"""
    
    def __init__(self, graph, model):
        self.graph = graph
        self.map = qname.Map()
        self.map.defaults = [ qname.common ]
        self.model = model

    def get_class(self, value):
        """return the objectmodel.Class of this value, where value
        is in the rdflib form -- strings are understood to be URIs,
        use Literal() for real string values."""

        #for s,p,o in self.graph:
        #    print >>stderr, s,p,o

        class_URIs = [ t for t in self.graph.objects(value, rdflib.RDF.type) ]

        for c in self.model.classes:
            if c.name in class_URIs:
                return c
        raise RuntimeError('no ontology for '+value+', rdf:type '+`class_URIs`)
        
    def out(self, text):
        self.stream.write(text)
        
    def serializeDocument(self, stream, root):
        self.stream = stream
        # issue namespace declarations?    do it later?


        self.serializeValue(root)

    def serializeValue(self, value,
                       prefix="",
                       use_single_element=True,
                       required_type=None,
                       newline="\n"):
        """serialize the given value, as best we can, guided by the
        context conveyed by the flags.
        """

        if required_type == self.map.uri("xsd:string"):
            if newline: self.out(prefix)
            self.out(value+newline)
            return

        cls = self.get_class(value)
        # print >>stderr, "Class is "+repr(cls)

        one_pv = ( self.model.count_pv(cls) == 1 )
        
        if cls.name == required_type:
            if use_single_element:
                if one_pv:
                    skip = True
                else:
                    skip = False
            else:
                skip = True
        else:
            skip = False

        if skip:
            inner_prefix = prefix
            print >>stderr, "skipping class "+self.map.qname(cls.name)
        else:
            self.out(prefix+"<"+self.map.qname(cls.name)+">\n")
            inner_prefix = prefix+"  "

        for prop in self.model.propertiesForClassWithInheritance(cls):
            self.serialize_property(value, prop,
                use_single_element=use_single_element,
                prefix = inner_prefix,
                skip_to_value = one_pv)

        if not skip:
            self.out(prefix+"</"+self.map.qname(cls.name)+">\n")

    def serialize_property(self, subject, prop,
                           prefix = "",
                           use_single_element=False,  # for skip_to_value
                           skip_to_value = False) :

        print >> stderr, "output all values for "+prop.name

        uri = prop.name
        range_uri = prop.to

        for value in self.graph.objects(subject, uri):

            print >> stderr, "  got a value:", value, type(value)

            # If the value is an rdf:List, consider each element
            # in the list to be a value for this property.  This
            # may seem odd, but it works pretty well.
            firsts = [ t for t in self.graph.objects(value, rdflib.RDF.first) ]
            if firsts:
                vlist = [ item for item in self.graph.items(value) ]
                print >> stderr, "  it's a list:", vlist
            else:
                vlist = ( value, )

            if vlist == ( rdflib.RDF.nil , ) :
                vlist = ()
                
            for value in vlist:

                # check type of value against range_uri ?   :-)

                # if isinstance(value, rdflib.Literal):
                #
                #    this bit doesn't work -- it doesn't know about
                #    skipped stripes, right?
                #
                #    we'll need some radical cleverness, or to
                #    output a dom tree or something.
                if 1==0 and range_uri == self.map.uri("xsd:string"):
                    newline=""
                else:
                    newline="\n"
                
                if skip_to_value:
                    print >>stderr, "skipping property "+self.map.qname(uri)
                    inner_prefix = prefix
                else:
                    self.out(prefix+"<"+self.map.qname(uri)+">"+newline)
                    inner_prefix = prefix+"  "
                    user_single_element = False # we've now taken care of that

                self.serializeValue(value,
                                    prefix = inner_prefix,
                                    use_single_element=use_single_element,
                                    required_type=range_uri,
                                    newline=newline)

                if not skip_to_value:
                    if newline:
                        self.out(prefix)
                    self.out("</"+self.map.qname(uri)+">\n")


def test_poscond():
    graph = rdflib.ConjunctiveGraph()
    dummy = graph.parse('test-data/poscond-eg.rdf')
    root = u'http://example.net/and'

    model = opendata.loader.load('test-data/poscond.asn')
    
    g = Generator(graph, model)
    g.map.bind('', 'http://www.w3.org/2007/01/rif#"')
    
    g.serializeDocument(sys.stdout, root)

    print g.map

if __name__ == "__main__":
    import doctest, sys
    doctest.testmod(sys.modules[__name__])
