"""

This module implements a relatively-static mapping between RDF graphs
and Python objects.

It's kind of like a node-centric API for RDF, but the data is actually
stored in conventional Python objects (not even inheriting from
something special).   The connection is not live -- you have to
explicitely convert between the Python form and the RDF form of the
data.  

TODO:
   --- merge over to webdata_rdf.py
           * more in the autoconvert2 style
           * match based on class name, NOT properties
           * so no properties are "required".

           * need some kind of FLAG object.   Or Preload.
             convert('http://www.w3.org/2002/03owlt/testOntology#Full', "some-flag")
           
   - better error messages on missing properties...
     (if the type is present, or some properties are present)
   - handle loops of objects
   - have a way to make back-links
        pageset <--> page
        
   
See static_map_rdf_demo.py for an example.

Optionally turn on debugging:

   x>>> import debugtools
   x>>> debugtools.tags.add("toRDF")

Set up:

   >>> import rdflib
   >>> import person_demo
   >>> NS = rdflib.Namespace("http://example.com/ns#")
   >>> autoconvert(person_demo.Person, name=NS, age=NS, likesFood=(NS,list))
   >>> graph = rdflib.ConjunctiveGraph()
   >>> prmap = PythonRDFMap(graph)   # must be after autoconverts

Create an instance of a suitable object:
  
   >>> eric = person_demo.Person()
   >>> eric.name=u"Eric Lastname"
   >>> eric.age=31
   >>> eric.likesFood = [u"Cheese", u"Wine", u"Salad"]
   >>> eric
   Person({age: 31, likesFood: [u'Cheese', u'Wine', u'Salad'], name: u'Eric Lastname'})

Then map to RDF:

   >>> ericNode = prmap.toRDF(eric)

We can even print the graph here in doctest (if not indented):
    
>>> print graph.serialize(format="n3")       
<BLANKLINE>
@prefix _3: <http://example.com/ns#>.
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
<BLANKLINE>
 [ _3:age "31"^^<http://www.w3.org/2001/XMLSchema#int>;
         _3:likesFood ( "Cheese" "Wine" "Salad" );
         _3:name "Eric Lastname"].

And back again:
   >>> eric2 = prmap.fromRDF(ericNode)
   >>> eric == eric2
   True
   >>> eric2
   Person({age: 31, likesFood: [u'Cheese', u'Wine', u'Salad'], name: u'Eric Lastname'})


   

See "Sparta" for something like this.  Right now, I want to write it
myself.
http://en.wikipedia.org/wiki/RDFLib
"""
__version__ = "$Revision: 1.7 $"

import sys
import rdflib
from debugtools import debug

import debugtools
# debugtools.tags.add("autoconvert")

globalConverterClasses = [ ] 
def registerConverterClass(c):
    debug("toRDF", "added converter %s" % `c`)
    globalConverterClasses.append(c)

class NotConvertable(RuntimeError):
    pass
 
class Plugin:

    def __init__(self, prmap):
        self.prmap = prmap
        
    def toRDF(self, thing):
        raise NotConvertable, thing

    def fromRDF(self, node):
        raise NotConvertable, node

class NaturalLiteral(Plugin):
    """
    this version is just for demo.

    should really use rdflib's own stuff, like PythonTOXSD,
    .toPython(), etc.
    
    """

    def __init__(self, map):
        pass
    
    def toRDF(self, thing):

        if type(thing) is type(1):
            return rdflib.Literal(thing)

        if isinstance(thing, basestring):
            return rdflib.Literal(thing)

        raise NotConvertable, thing

    def fromRDF(self, node):

        try:
            language = node.language
            datatype = node.datatype
            value = node # rdflib is too clever for me
        except AttributeError:
            raise NotConvertable, "not like an rdflib.Literal"

        if language is None and datatype is None:
            return unicode(value)   # should it really be unicode???

        if datatype == rdflib.URIRef('http://www.w3.org/2001/XMLSchema#int'):
            return int(value)

        raise NotConvertable, "not a rdflib.Literal of a type we understand"
            
registerConverterClass(NaturalLiteral)


class PythonRDFMap:

    def __init__(self, graph):
        self.graph = graph
        self.converters = []
        for converterClass in globalConverterClasses:
            self.converters.append(converterClass(self))

    def getSoleObject(self, subject, predicate, default=NotConvertable):
        result = None
        for object in self.graph.objects(subject, predicate):
            if result is None:
                result = self.fromRDF(object)
            else:
                raise RuntimeError, "Extra values!"
        if result is None:
            if default is NotConvertable:
                raise NotConvertable, "Missing property %s" % `predicate`
            else:
                return default
        else:
            return result

    def fromRDF(self, node):
        """
        Look at the RDF graph around this node and figure out if we
        have a Python object which conveys the same information.

        There are all sort of scaling/efficiency things we might do
        some day -- this is a simple/naive implementation.

        In particular -- right now -- we do a "first-perfect-match"
        approach. 

        UG:   toRDF(self, map)   vs   toRDF(self, thing)
        
        """
        debug("fromRDF(", "trying to convert %s" % `node`)
        for (s,p,o) in self.graph.triples( (node,None,None) ):
           debug("fromRDF", "  %s" % `p`)
           debug("fromRDF", "  = %s" % `o`)
        for converter in self.converters:
            try:
                debug("fromRDF", "converter: %s" % `converter`)
                result = converter.fromRDF(node)
                #if result is None:
                #    continue
                debug("fromRDF)", "succeeded, returned %s" % result)
                return result
            except NotConvertable, e:
                pass
        debug("fromRDF)", "failed")
        raise NotConvertable, "No converter from RDF accepts %s" % `node`

    def toRDF(self, thing):
        """
        Return an RDF node for the thing, adding as necessary to the
        graph, so that the data "in" the thing is all encoded into the
        graph.

        This is (should be) extensible both by poking at the python
        object for how it might convert itself, and by having a
        registry of converters.
        """
        debug("toRDF(", "trying to convert %s" % `thing`)
        for converter in self.converters:
            try:
                debug("toRDF", "converter: %s" % `converter`)
                result = converter.toRDF(thing)
                if result is None:
                    continue
                debug("toRDF)", "succeeded, returned %s" % result)
                return result
            except NotConvertable, e:
                pass

        debug("toRDF)", "failed")
        raise NotConvertable, "No converter to RDF accepts %s" % `thing`

def decodeFlags(propname, flags):
    """

    Flags:
        (propname, proptype, default)

        propname -- can either be URIRef or just an rdflib.Namespace
        proptype -- is None (for scalar) or list or set (the python types)
        default  -- is the default value to use if it's absent, or NotConvertable
                    if you want it to throw that error (ie to not recognize the
                    object -- ie to require this property)

        returns ( propuri, proptype, default )
        
    """
    if isinstance(flags, basestring):
       p1 = flags
       proptype = None
       default = NotConvertable
    elif len(flags) == 2:
       (p1, proptype) = flags
       default = NotConvertable
    elif len(flags) == 3:
       (p1, proptype, default) = flags
    else:
       raise RuntimeError

    if isinstance(p1, rdflib.Namespace):
        propURI = p1[propname]
    elif isinstance(p1, rdflib.URIRef):
        propURI = p1
    elif isinstance(p1, basestring):
        propURI = rdflib.URIRef(p1+propname)
    else:
        raise RuntimeError

    return (propURI, proptype, default)


# other issues
#
#     - is None == absent from graph?  I think so
#     - recognition by type or properties?
    
class AutoConverter(Plugin):
    def __init__(self, map, cls, props):
        Plugin.__init__(self, map)
        self.cls = cls
        self.props = props
    def toRDF(self, obj):
        node = rdflib.BNode()
        # add class?
        for (propname, flags) in self.props.iteritems():
            (propuri, proptype, default) = decodeFlags(propname, flags)
            value  = getattr(obj, propname)
            if proptype == None:
                self.prmap.graph.add( (node, propuri, self.prmap.toRDF(value)) )
            elif proptype == list:
                # generate the bunch of rdf:List tuples
                value  = getattr(obj, propname)
                rest = rdflib.RDF.nil
                for n in xrange(len(value)-1, 0-1, -1):
                    listNode = rdflib.BNode()
                    valueNode = self.prmap.toRDF(value[n])
                    self.prmap.graph.add( (listNode, rdflib.RDF.first, valueNode) )
                    self.prmap.graph.add( (listNode, rdflib.RDF.rest, rest) )
                    rest = listNode
                self.prmap.graph.add( (node, propuri, rest) )

            elif proptype == set:
                # add a bunch of triples to the graph
                for i in value:
                    self.prmap.graph.add( (node, propuri, self.prmap.toRDF(i)) )
            else:
                raise NotImplemented

        return node
    
    def fromRDF(self, node):
        result = self.cls()    # a huge waste if we fail
        debug("fromRDF(", "Made an instance: %s" % `result`)
        if isinstance(node, rdflib.URIRef):
           pass
        else:
           node = rdflib.URIRef(node)
           debug("fromRDF", "wrapped node into URIRef: %s" % `node`)

        # hmmmm.   most of this code is written assuming we dispatch
        # to classes based on whatever works, rather than on the
        # RDF.type of the node.   But let's see if we can start trying
        # to use that type...
        for rdfcls in self.prmap.graph.objects(node, rdflib.RDF.type):
           debug("fromRDF", "is of class %s" % `rdfcls`)
        #try:
        module_ns = sys.modules[self.cls.__module__].webdata_ns
        #except:
        #   module_ns = None
        classMatch = False
        if module_ns:
           rdfcls = rdflib.URIRef(module_ns + self.cls.__name__)
           debug("fromRDF", "graph has %s ?" % `(node, rdflib.RDF.type, rdfcls)`)
           if (node, rdflib.RDF.type, rdfcls) in self.prmap.graph:
              debug("fromRDF", "THIS IS THE CLASS MATCH")
              classMatch = True
           else:
              debug("fromRDF)", "NOT CLASS MATCH.  try another converter.")
              raise NotConvertable, "Python class and RDF class don't match"
              
        for (propname, flags) in self.props.iteritems():
            (propuri, proptype, default) = decodeFlags(propname, flags)
            debug("fromRDF", "looking for value for property: %s, %s"
                  % (propname, `propuri`))
            if proptype == None:
               debug("fromRDF", "decoding single-value")
               valueNode = self.prmap.graph.value(node, propuri)
               if valueNode is None:
                  if default is NotConvertable:
                     debug("fromRDF", "it's missing but required")
                     #for (s, p, o) in self.prmap.graph:
                     #   if s.endswith("snap1"):
                     #      if node == s:
                     #         pass
                     #      else:
                     #         print >> sys.stderr, "MISMATCH", `node`
                     #         print >> sys.stderr, "is not =", `s`
                     #      print >> sys.stderr, "XXX", `node`, `(s,p,o)`
                     debug("fromRDF)", "abandoning instance, %s" % `result`)
                     if classMatch:
                        raise RuntimeError, "Missing required property %s" % `propuri`
                     else:
                        raise NotConvertable, "Missing property %s" % `propuri`
                  else:
                     debug("fromRDF", "it's missing, using default value")
                     value = default
               else:
                  debug("fromRDF", "valueNode %s" % `valueNode`)
                  value = self.prmap.fromRDF(valueNode)
               # value = self.prmap.getSoleObject(node, propuri, default)
            elif proptype == list:
               debug("fromRDF", "decoding rdf:List type")
               listNode = self.prmap.graph.value(node, propuri)
               value = []
               for item in self.prmap.graph.items(listNode):
                  value.append(self.prmap.fromRDF(item))
            elif proptype == set:
               debug("fromRDF", "decoding multivalues as set() type")
               value = set()
               for item in self.prmap.graph.objects(node, propuri):
                  value.add(self.prmap.fromRDF(item))
            else:
               debug("fromRDF)", "abandoning instance, %s" % `result`)
               raise RuntimeError, "not implemented"
            setattr(result, propname, value)
        debug("fromRDF)", "succeeded - returning instance, %s" % `result`)
        return result


def autoconvert(cls, **kwargs):
    """kwargs maps from each slot to some info we need about the
    slot.  In the trivial case, it's just the Namespace object to use.
    Some day it may include cardinality, etc"""
    debug("autoconvert", "called with %s, %s" % (cls.__name__, `kwargs`))
    pseudoClass = lambda map: AutoConverter(map, cls, kwargs)
    registerConverterClass(pseudoClass)

def autoconvert2(cls):
   """like autoconvert, but it gets the property names from a dummy
   instance of the object.  IE whatever is set by __init__ is what
   we'll use.

   This isn't perfect.  There are various pythonic ways of having
   pseudo-properties, which this wont know about.  It just looks as
   __dict__ and __slots__.   Perhaps we should use dir()?

   
   """
   dummy = cls()
   ns = sys.modules[dummy.__module__].webdata_ns

   kw = {}
   for p in dir(dummy):
      # skip _foo, because that's too internal, I think.
      # skip __foo__ because that's python internal
      if p.startswith("_"): 
         continue
      value = getattr(dummy, p)
      if value is None:
         valueType = None
      elif value == NotConvertable:
         valueType = None
      elif value == set():
         valueType = set
      elif value == []:
         valueType = list
      else:
         raise RuntimeError, (
            "initial value %s for property %s not implemented" % (`value`,`p`))

      kw[p] = (ns, valueType, value)

   autoconvert(cls, **kw)

def rdf_namespaced(cls, attr=None):
   """
   Return the URI of a python class or its attribute
   """
   (ns, name) = namespaced(cls, attr)
   if ns[-1].isalnum:
      return ns+"#"+name
   return ns+name
   
def namespaced(cls, attr=None):
   """Find out the namespaced (global, URI) name for a Python class or
   an attribute of a Python class.

   If attr is given, it's the name of the attribute.  If it's omitted,
   then the namespaced name of the class itself is returned.

   The namespaced name is determined on a module-by-module basis, using
       webdata_ns     --- the default namespace for the module
       webdata_prefix --- a dict from prefixes to namespaces
       webdata_rename --- a dict from names to namespaced names
       webdata_special -- a callable to do the mapping
   """
   module = sys.modules[self.cls.__module__]
   webdata_ns = getattr(module, "webdata_ns", None)
   webdata_prefix= getattr(module, "webdata_prefix", {} )
   webdata_rename = getattr(module, "webdata_rename", {} )
   webdata_special = getattr(module, "webdata_special", None)
   name = attr or cls

   if webdata_special:
      return webdata_special(name)

   try:
      return webdata_rename[name]
   except KeyError:
      pass

   if name.endswith("_"):   # a python convention for python keywords
      name = name[:-1]

   for (key, value) in webdata_prefix.iter_items():
      if name.startswith(key):
         return (value, name[len(key):])

   return (webdata_ns, name)


   

def load(URI, publicURI=None):
   """For when you don't want to know/care about RDF
   """
   
   graph = rdflib.ConjunctiveGraph()
   graph.parse(URI, publicURI )
   debug("web", "Loaded %d triples" % len(graph))
   #print graph.serialize(format="n3")       
   prmap = PythonRDFMap(graph)
   if publicURI is None:
      publicURI = URI
   result = prmap.fromRDF(publicURI)
   return result



   
if __name__ == "__main__":
    import doctest, sys
    doctest.testmod(sys.modules[__name__])
