"""

This module implements a relatively-static mapping between RDF graphs
and Python objects.

It's kind of like a node-centric API for RDF, but the data is actually
stored in conventional Python objects (not even inheriting from
something special).   The connection is not live -- you have to
explicitely convert between the Python form and the RDF form of the
data.  

TODO:
   --- merge over to webdata_rdf.py
           * more in the autoconvert2 style
           * match based on class name, NOT properties
           * so no properties are "required".

           * need some kind of FLAG object.   Or Preload.
             convert('http://www.w3.org/2002/03owlt/testOntology#Full', "some-flag")
           
   - better error messages on missing properties...
     (if the type is present, or some properties are present)
   - handle loops of objects
   - have a way to make back-links
        pageset <--> page
        
   
See static_map_rdf_demo.py for an example.

Optionally turn on debugging:

   x>>> import debugtools
   x>>> debugtools.tags.add("toRDF")

Set up:

   >>> import rdflib
   >>> import person_demo
   >>> NS = rdflib.Namespace("http://example.com/ns#")
   >>> autoconvert(person_demo.Person, name=NS, age=NS, likesFood=(NS,list))
   >>> graph = rdflib.ConjunctiveGraph()
   >>> prmap = PythonRDFMap(graph)   # must be after autoconverts

Create an instance of a suitable object:
  
   >>> eric = person_demo.Person()
   >>> eric.name=u"Eric Lastname"
   >>> eric.age=31
   >>> eric.likesFood = [u"Cheese", u"Wine", u"Salad"]
   >>> eric
   Person({age: 31, likesFood: [u'Cheese', u'Wine', u'Salad'], name: u'Eric Lastname'})

Then map to RDF:

   >>> ericNode = prmap.toRDF(eric)

We can even print the graph here in doctest (if not indented):
    
>>> print graph.serialize(format="n3")       
<BLANKLINE>
@prefix _3: <http://example.com/ns#>.
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
<BLANKLINE>
 [ _3:age "31"^^<http://www.w3.org/2001/XMLSchema#int>;
         _3:likesFood ( "Cheese" "Wine" "Salad" );
         _3:name "Eric Lastname"].

And back again:
   >>> eric2 = prmap.fromRDF(ericNode)
   >>> eric == eric2
   True
   >>> eric2
   Person({age: 31, likesFood: [u'Cheese', u'Wine', u'Salad'], name: u'Eric Lastname'})


   

See "Sparta" for something like this.  Right now, I want to write it
myself.
http://en.wikipedia.org/wiki/RDFLib
"""
__version__ = "$Revision: 1.5 $"

import sys
import rdflib
from debugtools import debug

import webdata

import debugtools
debugtools.tags.add("autoconvert")

globalConverterClasses = [ ] 
def registerConverterClass(c):
    debug("toRDF", "added converter %s" % `c`)
    globalConverterClasses.append(c)

class NotConvertable(RuntimeError):
    pass
 
class Plugin:

    def __init__(self, prmap):
        self.prmap = prmap
        
    def toRDF(self, thing):
        raise NotConvertable, thing

    def fromRDF(self, node):
        raise NotConvertable, node

class NaturalLiteral(Plugin):
    """
    this version is just for demo.

    should really use rdflib's own stuff, like PythonTOXSD,
    .toPython(), etc.
    
    """

    def __init__(self, map):
        pass
    
    def toRDF(self, thing):

        if type(thing) is type(1):
            return rdflib.Literal(thing)

        if isinstance(thing, basestring):
            return rdflib.Literal(thing)

        raise NotConvertable, thing

    def fromRDF(self, node):

        try:
            language = node.language
            datatype = node.datatype
            value = node # rdflib is too clever for me
        except AttributeError:
            raise NotConvertable, "not like an rdflib.Literal"

        if language is None and datatype is None:
            return unicode(value)   # should it really be unicode???

        if datatype == rdflib.URIRef('http://www.w3.org/2001/XMLSchema#int'):
            return int(value)

        raise NotConvertable, "not a rdflib.Literal of a type we understand"
            
registerConverterClass(NaturalLiteral)


class PythonRDFMap:

    def __init__(self, graph):
        self.graph = graph
        self.converters = []
        for converterClass in globalConverterClasses:
            self.converters.append(converterClass(self))

    def getSoleObject(self, subject, predicate, default=NotConvertable):
        result = None
        for object in self.graph.objects(subject, predicate):
            if result is None:
                result = self.fromRDF(object)
            else:
                raise RuntimeError, "Extra values!"
        if result is None:
            if default is NotConvertable:
                raise NotConvertable, "Missing property %s" % `predicate`
            else:
                return default
        else:
            return result

    def fromRDF(self, node):
        """
        Look at the RDF graph around this node and figure out if we
        have a Python object which conveys the same information.

        There are all sort of scaling/efficiency things we might do
        some day -- this is a simple/naive implementation.

        In particular -- right now -- we do a "first-perfect-match"
        approach. 

        UG:   toRDF(self, map)   vs   toRDF(self, thing)
        
        """
        debug("fromRDF(", "trying to convert %s" % `node`)
        for (s,p,o) in self.graph.triples( (node,None,None) ):
           debug("fromRDF", "  %s" % `p`)
           debug("fromRDF", "  = %s" % `o`)
        for converter in self.converters:
            try:
                debug("fromRDF", "converter: %s" % `converter`)
                result = converter.fromRDF(node)
                #if result is None:
                #    continue
                debug("fromRDF)", "succeeded, returned %s" % result)
                return result
            except NotConvertable, e:
                pass
        debug("fromRDF)", "failed")
        raise NotConvertable, "No converter from RDF accepts %s" % `node`

    def toRDF(self, thing):
        """
        Return an RDF node for the thing, adding as necessary to the
        graph, so that the data "in" the thing is all encoded into the
        graph.

        This is (should be) extensible both by poking at the python
        object for how it might convert itself, and by having a
        registry of converters.
        """
        debug("toRDF(", "trying to convert %s" % `thing`)
        for converter in self.converters:
            try:
                debug("toRDF", "converter: %s" % `converter`)
                result = converter.toRDF(thing)
                if result is None:
                    continue
                debug("toRDF)", "succeeded, returned %s" % result)
                return result
            except NotConvertable, e:
                pass

        debug("toRDF)", "failed")
        raise NotConvertable, "No converter to RDF accepts %s" % `thing`

def decodeFlags(propname, flags):
    """

    Flags:
        (propname, proptype, default)

        propname -- can either be URIRef or just an rdflib.Namespace
        proptype -- is None (for scalar) or list or set (the python types)
        default  -- is the default value to use if it's absent, or NotConvertable
                    if you want it to throw that error (ie to not recognize the
                    object -- ie to require this property)

        returns ( propuri, proptype, default )
        
    """
    if isinstance(flags, basestring):
       p1 = flags
       proptype = None
       default = NotConvertable
    elif len(flags) == 2:
       (p1, proptype) = flags
       default = NotConvertable
    elif len(flags) == 3:
       (p1, proptype, default) = flags
    else:
       raise RuntimeError

    if isinstance(p1, rdflib.Namespace):
        propURI = p1[propname]
    elif isinstance(p1, rdflib.URIRef):
        propURI = p1
    elif isinstance(p1, basestring):
        propURI = rdflib.URIRef(p1+propname)
    else:
        raise RuntimeError

    return (propURI, proptype, default)


# other issues
#
#     - is None == absent from graph?  I think so
#     - recognition by type or properties?
    
class AutoConverter(Plugin):
    def __init__(self, map, cls, props):
        Plugin.__init__(self, map)
        self.cls = cls
        self.props = props
    def toRDF(self, obj):
        node = rdflib.BNode()
        # add class?
        for (propname, flags) in self.props.iteritems():
            (propuri, proptype, default) = decodeFlags(propname, flags)
            value  = getattr(obj, propname)
            if proptype == None:
                self.prmap.graph.add( (node, propuri, self.prmap.toRDF(value)) )
            elif proptype == list:
                # generate the bunch of rdf:List tuples
                value  = getattr(obj, propname)
                rest = rdflib.RDF.nil
                for n in xrange(len(value)-1, 0-1, -1):
                    listNode = rdflib.BNode()
                    valueNode = self.prmap.toRDF(value[n])
                    self.prmap.graph.add( (listNode, rdflib.RDF.first, valueNode) )
                    self.prmap.graph.add( (listNode, rdflib.RDF.rest, rest) )
                    rest = listNode
                self.prmap.graph.add( (node, propuri, rest) )

            elif proptype == set:
                # add a bunch of triples to the graph
                for i in value:
                    self.prmap.graph.add( (node, propuri, self.prmap.toRDF(i)) )
            else:
                raise NotImplemented

        return node
    
    def fromRDF(self, node):
        result = self.cls()    # a huge waste if we fail
        debug("fromRDF(", "Made an instance: %s" % `result`)
        if isinstance(node, rdflib.URIRef):
           pass
        else:
           node = rdflib.URIRef(node)
           debug("fromRDF", "wrapped node into URIRef: %s" % `node`)

        # hmmmm.   most of this code is written assuming we dispatch
        # to classes based on whatever works, rather than on the
        # RDF.type of the node.   But let's see if we can start trying
        # to use that type...
        for rdfcls in self.prmap.graph.objects(node, rdflib.RDF.type):
           debug("fromRDF", "is of class %s" % `rdfcls`)
        #try:
        module_ns = sys.modules[self.cls.__module__].webdata_ns
        #except:
        #   module_ns = None
        classMatch = False
        if module_ns:
           rdfcls = rdflib.URIRef(module_ns + self.cls.__name__)
           debug("fromRDF", "graph has %s ?" % `(node, rdflib.RDF.type, rdfcls)`)
           if (node, rdflib.RDF.type, rdfcls) in self.prmap.graph:
              debug("fromRDF", "THIS IS THE CLASS MATCH")
              classMatch = True
           else:
              debug("fromRDF)", "NOT CLASS MATCH.  try another converter.")
              raise NotConvertable, "Python class and RDF class don't match"
              
        for (propname, flags) in self.props.iteritems():
            (propuri, proptype, default) = decodeFlags(propname, flags)
            debug("fromRDF", "looking for value for property: %s, %s"
                  % (propname, `propuri`))
            if proptype == None:
               debug("fromRDF", "decoding single-value")
               valueNode = self.prmap.graph.value(node, propuri)
               if valueNode is None:
                  if default is NotConvertable:
                     debug("fromRDF", "it's missing but required")
                     #for (s, p, o) in self.prmap.graph:
                     #   if s.endswith("snap1"):
                     #      if node == s:
                     #         pass
                     #      else:
                     #         print >> sys.stderr, "MISMATCH", `node`
                     #         print >> sys.stderr, "is not =", `s`
                     #      print >> sys.stderr, "XXX", `node`, `(s,p,o)`
                     debug("fromRDF)", "abandoning instance, %s" % `result`)
                     if classMatch:
                        raise RuntimeError, "Missing required property %s" % `propuri`
                     else:
                        raise NotConvertable, "Missing property %s" % `propuri`
                  else:
                     debug("fromRDF", "it's missing, using default value")
                     value = default
               else:
                  debug("fromRDF", "valueNode %s" % `valueNode`)
                  value = self.prmap.fromRDF(valueNode)
               # value = self.prmap.getSoleObject(node, propuri, default)
            elif proptype == list:
               debug("fromRDF", "decoding rdf:List type")
               listNode = self.prmap.graph.value(node, propuri)
               value = []
               for item in self.prmap.graph.items(listNode):
                  value.append(self.prmap.fromRDF(item))
            elif proptype == set:
               debug("fromRDF", "decoding multivalues as set() type")
               value = set()
               for item in self.prmap.graph.objects(node, propuri):
                  value.add(self.prmap.fromRDF(item))
            else:
               debug("fromRDF)", "abandoning instance, %s" % `result`)
               raise RuntimeError, "not implemented"
            setattr(result, propname, value)
        debug("fromRDF)", "succeeded - returning instance, %s" % `result`)
        return result


def autoconvert(cls, **kwargs):
    """kwargs maps from each slot to some info we need about the
    slot.  In the trivial case, it's just the Namespace object to use.
    Some day it may include cardinality, etc"""
    debug("autoconvert", "called with %s, %s" % (cls.__name__, `kwargs`))
    pseudoClass = lambda map: AutoConverter(map, cls, kwargs)
    registerConverterClass(pseudoClass)

def autoconvert2(cls):
   """like autoconvert, but it gets the property names from a dummy
   instance of the object.  IE whatever is set by __init__ is what
   we'll use.

   This isn't perfect.  There are various pythonic ways of having
   pseudo-properties, which this wont know about.  It just looks as
   __dict__ and __slots__.   Perhaps we should use dir()?

   
   """
   dummy = cls()
   ns = sys.modules[dummy.__module__].webdata_ns

   kw = {}
   for p in dir(dummy):
      # skip _foo, because that's too internal, I think.
      # skip __foo__ because that's python internal
      if p.startswith("_"): 
         continue
      value = getattr(dummy, p)
      if value is None:
         valueType = None
      elif value == NotConvertable:
         valueType = None
      elif value == set():
         valueType = set
      elif value == []:
         valueType = list
      else:
         raise RuntimeError, (
            "initial value %s for property %s not implemented" % (`value`,`p`))

      kw[p] = (ns, valueType, value)

   autoconvert(cls, **kw)

def namespaced(cls, attr=None):
   """
   Return the URI of a python class or its attribute
   """
   (ns, name) = webdata._namespaced(cls, attr)
   if ns[-1].isalnum:
      return ns+"#"+name
   return ns+name
   

def xxload(URI, publicURI=None):
   """For when you don't want to know/care about RDF
   """
   
   graph = rdflib.ConjunctiveGraph()
   graph.parse(URI, publicURI )
   debug("web", "Loaded %d triples" % len(graph))
   #print graph.serialize(format="n3")       
   prmap = PythonRDFMap(graph)
   if publicURI is None:
      publicURI = URI
   result = prmap.fromRDF(publicURI)
   return result

def xxload(URI, publicURI=None):
   """For when you don't want to know/care about RDF
   """
   
   graph = rdflib.ConjunctiveGraph()
   graph.parse(URI, publicURI )
   debug("web", "Loaded %d triples" % len(graph))
   #print graph.serialize(format="n3")       
   prmap = PythonRDFMap(graph)
   if publicURI is None:
      publicURI = URI
   result = prmap.fromRDF(publicURI)
   return result


def load(view, resource, location):
   view.graph.parse(location, resource)


def toPython(view, node, autoload=True):
   """
   Look at the RDF graph around this node and figure out if we
   have a Python object which conveys the same information.

   """
   if isinstance(node, rdflib.Literal):
      debug("fromRDF", "trying to convert literal %s" % `node`)
      if node.language is None and node.datatype is None:
         return unicode(node) 
      if node.datatype == rdflib.URIRef('http://www.w3.org/2001/XMLSchema#int'):
         return int(node)
      if node.datatype == rdflib.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral'):
         s = unicode(node)
         debug("fromRDF", "... ended up with %s" % `s`)
         return s
      raise RuntimeError, "not implemented decoding type %s" % node.datatype

   debug("fromRDF(", "trying to convert %s" % `node`)
   for (s,p,o) in view.graph.triples( (node.rdfNode,None,None) ):
      debug("fromRDF", "  %s" % `p`)
      debug("fromRDF", "  = %s" % `o`)
   debug("fromRDF", "--- end of properties")

   assert isinstance(node, webdata.ID)

   classes = []
   debug("x", "...foo")
   debug("x", "node.rdfNode is", `node.rdfNode`)
   for rdftype in view.graph.objects(node.rdfNode, rdflib.RDF.type):
      debug("x", "has rdf:type %s" % `rdftype`)
      rdftypeID = webdata.normalizeID(rdftype)
      debug("x", " id %s" % `rdftypeID`)
      try:
         pyclass = view.classForID[rdftypeID]
      except KeyError:
         debug("x", "No class for that id")
         continue
      add_subclass(pyclass, classes)

   debug("fromRDF", "classes: %s" %`classes`)

   if len(classes) == 0:
      debug("x", "view.classForID:", view.classForID)
      raise RuntimeError, "No python class found to handle %s"%`node`
      classes = [ object ] 
   if len(classes) > 1:
      raise RuntimeError, ("%d python classes found to handle %s"%
                          (len(classes),`node`))
   cls = classes[0]

   debug("fromRDF", "class: %s" %`cls`)
   
   obj = cls()

   debug("fromRDF", "obj: %s" %`obj`)

   try:
      attrs = obj.__slots__
   except AttributeError:
      attrs = dir(obj)   # includes some stuff we don't want... :-(
      
   for attr in attrs:

      # skip _foo, because that's too internal, I think.
      # skip __foo__ because that's python internal
      if attr.startswith("_"):
         continue

      init_value = getattr(obj, attr)
      if callable(init_value):
         continue

      debug("fromRDF(", "Looking in graph for values for %s" % `attr`)
      value_type = determine_value_type(init_value)
      debug("fromRDF", "Value type: %s" % `value_type`)
      propID = webdata.objID(cls, attr)
      debug("fromRDF", "Prop ID: %s" % `propID`)
      try:
         new_value = value_type.get_value(view, node, propID, autoload)
      except KeyError:
         debug("fromRDF)", "not found; unchanged")
         continue   # just leave the value unchanged.
      
      debug("fromRDF)", "new value = %s" % `new_value`)
      setattr(obj, attr, new_value)

   return obj

def add_subclass(new, all):
   """
   Add this new class to the given list of classes, except that
   whenever there's a subclass/superclass pair in the emerging list,
   discard the superclass.  We only want the most-specific class.  I'm
   not really thinking about multiple inheritance and diamonds and
   stuff.
   """
   for i in all:
      if issubclass(i, new):
         return
      if issubclass(new, i):
         all.remove(i)
         break
   all.append(new)
   
class Scalar:
   """Handler for attributes which are not any sort of collection"""
   @staticmethod
   def get_value(view, subjID, propID, autoload):
      value_node = view.graph.value(subjID.rdfNode, propID.rdfNode)
      if value_node is None:
         raise KeyError
      else:
         return view.toPython(value_node, autoload)

class Link:
   """for Content Link, I think..."""
   @staticmethod
   def get_value(view, subjID, propID, autoload):
      value_node = view.graph.value(subjID.rdfNode, propID.rdfNode)
      if value_node is None:
         raise KeyError
      else:
         return str(value_node)

class List:
   """Handler for attributes which are sequences of values

   If a value is present in the data, and it's a list, even an empty
   list, then this value will be set to an array of that list's values.

   If no value is present, the value will be untouched (left defaulted)

   If the value is present byt not a list, it's an Exception.

   """
   @staticmethod
   def get_value(view, subjID, propID, autoload):
      list_node = view.graph.value(subjID.rdfNode, propID.rdfNode)
      if list_node is None:
         raise KeyError
      debug("fromRDF", "Extracting List: %s" % `list_node`)
      value = []
      first = view.graph.value(list_node, rdflib.RDF.first)
      if first is None:
         raise RuntimeError, ("list value expected, but not found (%s.%s)"
                              % (`subjID`, `propID`))
      for item in view.graph.items(list_node):
         debug("fromRDF", "Extracting List Entry: %s" % `item`)
         value.append(view.toPython(item, autoload))
      return value

class Set:
   """Handler for attributes with multiple, unordered values

   Any values found are added to the set.
   """
   @staticmethod
   def get_value(view, subjID, propID, autoload):
      value = set()
      for item in view.graph.objects(subjID.rdfNode, propID.rdfNode):
         value.add(view.toPython(item, autoload))
      return value

def determine_value_type(init_value):
     if init_value == "http:":
        valueType = Link
     elif init_value is None:
        valueType = Scalar
     elif isinstance(init_value, basestring):
        valueType = Scalar
     elif isinstance(init_value, set):
        valueType = Set
     elif isinstance(init_value, list):
        valueType = List
     else:
        raise RuntimeError, (
           "initial value %s not implemented" % `init_value`)
     return valueType

   
if __name__ == "__main__":
    import doctest, sys
    doctest.testmod(sys.modules[__name__])
