"""Generate a random instance tree

>>> import randomdata
>>> import loader
>>> model = loader.load('test-data/poscond.asn')
>>> p = randomdata.Populator(model=model)
>>> root = p.run()

>>> p.graph.serialize("randomout.rdf", format="pretty-xml")

"""

__version__ = "$Revision: 1.2 $"
# $Source: /sources/public/2007/asn/randomdata.py,v $

import sys
import time
import random

import rdflib

import langio
import objectmodel

XSD = rdflib.Namespace('http://www.w3.org/2001/XMLSchema#')

class Error(RuntimeError):
    pass

class TooRecursive(RuntimeError):
    """We found ourselves recursing too deep -- you probably have a
    required property with a range of its own class."""
    pass

def populate(**kwargs):  #  graph, model, seed, size, ...
    p = Parser(kwargs)
    return p.parse()
             
class Populator ( langio.Parser ):
    """Parser random data from thin air.  :-)

    """

    def __init__(self, **kwargs):
        """Arguments: (inherited, plus....)

            seed = (seed value for random number generator, defaults
                   to using the current time)
            size = a rough guess for how many leaves you want in the
                   generated tree.   I think the actual size depends
                   on the grammar,  but this should be some guide

        """
        
        langio.Parser.__init__(self, **kwargs)

        self.model = kwargs.get("model", objectmodel.ObjectModel())
        self._seed = kwargs.get("seed", time.time())
        self._random = random.Random()
        self._random.seed(self._seed)

        size = kwargs.get("size", 10)
        self._growth_probability = (size-1.0)/size

        self.words = kwargs.get("words",
                                ["apple", "orange", "yellow",
                                 "blue", "banana", "dog", "cat",
                                 "lorem", "ipsum", ":-)"])
        
    def _should_expand(self, depth):
        """randomly returns True or False -- more likely to
        return True if growth_probability is higher (aka
        size is higher)"""
        this = self._random.random()
        limit = self._growth_probability ** float(depth)
        print >>sys.stderr, "random ",this,"<",limit,"?",(this<limit)
        return this < limit

    def run(self):   # allow __init__ args again?
        """Return a random graph object, and add triples to
        the graph in keeping with the grammar.
        """
        cls = self.model.classes[0]
        return self.new_instance_in_model(cls, 1)

    def _new_subject(self):
        return rdflib.BNode()

    def _pick_leaf_subclass(self, cls):
        subs = [ c for c in self.model.subclassesForClass(cls)]
        if subs:
            sub = self._random.choice(subs)
            return self._pick_leaf_subclass(sub)
        else:
            return cls
        
    def new_instance_in_model(self, cls, depth):

        ocls = cls
        cls = self._pick_leaf_subclass(cls)
        print >>sys.stderr, "New instance of ", ocls, cls, "depth", depth

        subject = self._new_subject()
        self.graph.add((subject, rdflib.RDF.type, rdflib.URIRef(cls.name)))
        
        for p in self.model.propertiesForClassWithInheritance(cls):
            if p.optional and not self._should_expand(depth):
                pass
            else:
                while True:
                    if p.list:
                        value = self.new_list(p.to, depth+1)
                    else:
                        value = self.new_instance(p.to, depth+1)
                    self.graph.add((rdflib.URIRef(subject),
                                    rdflib.URIRef(p.name),
                                    value))
                    if p.multi and self._should_expand(depth):
                        depth += 1
                        continue
                    else:
                        break
        return subject

    def new_list(self, clsuri, depth):
        if self._should_expand(depth):
            result = rdflib.BNode()
            first = self.new_instance(clsuri, depth+1)
            rest = self.new_list(clsuri, depth+1)
            self.graph.add((result, rdflib.RDF.first, first))
            self.graph.add((result, rdflib.RDF.rest, rest))
            return result
        else:
            return rdflib.RDF.nil
        
    def new_instance(self, clsuri, depth):

        if depth > 100:
            raise TooRecursive("Either your random numbers are very bad or your model has no finite instances.  Try making something optional.")
        if depth > 30:
            return rdflib.Literal('***TRUNCATED:INVALID***')
        if clsuri == XSD.string:
            return rdflib.Literal(self.random_string())
        for cls in self.model.classes:
            if cls.name == clsuri:
                return self.new_instance_in_model(cls, depth)
        raise Error("What to do about external ranges?")

    def random_string(self):
        return "---text-goes-here---"
    
    def xrandom_string(self):
        """
        >>> import randomdata
        >>> r = randomdata.Populator(seed=0, size=10, words=["a", "b", "c"])
        >>> r.xrandom_string()
        'c a b a b'
        >>> r = randomdata.Populator(seed=10, size=10, words=["a", "b", "c"])
        >>> r.xrandom_string()
        'b a c a a c'

        """
        result = []
        while self._should_expand():
            result.append(self._random.choice(self.words))
        return " ".join(result)

if __name__ == "__main__":
    import doctest, sys
    doctest.testmod(sys.modules[__name__])

    
