# -*- coding: utf-8 -*-
"""
Implementation of the Literal handling. Details of the algorithm are described on
U{RDFa Task Force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.

@summary: RDFa Literal generation
@requires: U{RDFLib package<http://rdflib.net>}
@organization: U{World Wide Web Consortium<http://www.w3.org>}
@author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">}
@license: This software is available for use under the
U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">}
"""

"""
$Id: Literal.py,v 1.19 2008-05-12 18:20:46 ivan Exp $
$Date: 2008-05-12 18:20:46 $
"""

import re
from rdflib.Literal		import Literal
from rdflib.RDF			import RDFNS  as ns_rdf
from rdflib.RDF     	import XMLLiteral
from rdflib.Namespace	import Namespace

_XSD_NS = Namespace(u'http://www.w3.org/2001/XMLSchema#')

#### The real meat...
def generate_literal(node, graph, subject, state) :
	"""Generate the literal the C{@property}, taking into account datatype, etc.
	Note: this method is called only if the C{@property} is indeed present, no need to check. The
	C{@content} property is also treated on the caller side.

	This method is an encoding of the algorithm documented
	U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.

	@param node: DOM element node
	@param graph: the (RDF) graph to add the properies to
	@param subject: the RDFLib URIRef serving as a subject for the generated triples
	@param state: the current state to be used for the CURIE-s
	@type state: L{State.ExecutionContext}
	@return: whether a triple has been added to the graph or not
	@rtype: Boolean
	"""
	def _get_literal(Pnode):
		"""
		Get (recursively) the full text from a DOM Node.

		@param Pnode: DOM Node
		@return: string
		"""
		rc = ""
		for node in Pnode.childNodes:
			if node.nodeType == node.TEXT_NODE:
				rc = rc + node.data
			elif node.nodeType == node.ELEMENT_NODE :
				rc = rc + _get_literal(node)

		# The decision of the group in February 2008 is not to normalize the result by default.
		# This is reflected in the default value of the option
		if state.options.space_preserve :
			return rc
		else :
			return re.sub(r'(\r| |\n|\t)+'," ",rc).strip()
	# end getLiteral

	def _get_XML_literal(Pnode) :
		"""
		Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done
		via a C{node.toxml} call of the xml minidom implementation.)

		@param Pnode: DOM Node
		@return: string
		"""
		def collectPrefixes(prefixes, node) :
			def addPf(prefx, string) :
				pf = string.split(':')[0]
				if pf != string and pf not in prefx : prefx.append(pf)
			# edn addPf
				
			# first the local name of the node
			addPf(prefixes, node.tagName)
			# get all the attributes and children
			for child in node.childNodes :
				if child.nodeType == node.ELEMENT_NODE :
					collectPrefixes(prefixes, child)
				elif child.nodeType == node.ATTRIBUTE_NODE :
					addPf(prefixes, node.child.name)
		# end collectPrefixes
					
		rc = ""
		prefixes = []
		for node in Pnode.childNodes :
			if node.nodeType == node.ELEMENT_NODE :
				collectPrefixes(prefixes,node)
		
		for node in Pnode.childNodes:
			if node.nodeType == node.TEXT_NODE:
				rc = rc + node.data
			elif node.nodeType == node.ELEMENT_NODE :
				# Decorate the element with namespaces and lang values
				for prefix in prefixes :
					if prefix in state.ns and not node.hasAttribute("xmlns:%s" % prefix) :
						node.setAttribute("xmlns:%s" % prefix,"%s" % state.ns[prefix])
				# Set the default namespace, if not done (and is available)
				if not node.getAttribute("xmlns") and state.defaultNS != None :
					node.setAttribute("xmlns",state.defaultNS)
				# Get the lang, if necessary
				if not node.getAttribute("xml:lang") and state.lang != None :
					node.setAttribute("xml:lang",state.lang)
				rc = rc + node.toxml()
		return rc
		# If XML Literals must be canonicalized for space, then this is the return line:
		#return re.sub(r'(\r| |\n|\t)+'," ",rc).strip()
	# end getXMLLiteral

	retval = False
	# Get the Property URI-s
	props = state.get_resources(node.getAttribute("property"), prop=True)

	# Get, if exists, the value of @datatype
	datatype = ''
	dtset    = False
	if node.hasAttribute("datatype") :
		dtset = True
		dt = node.getAttribute("datatype")
		if dt != "" :
			datatype = state.get_resource(dt)

	if state.lang != None :
		lang = state.lang
	else :
		lang = ''

	# The simple case: separate @content attribute
	if node.hasAttribute("content") :
		val = node.getAttribute("content")
		object = Literal(node.getAttribute("content"), datatype=datatype, lang=lang)
		# The value of datatype has been set, and the keyword paramaters take care of the rest
	else :
		# see if there *is* a datatype (even if it is empty!)
		if dtset :
			# yep. The Literal content is the pure text part of the current element:
			# We have to check whether the specified datatype is, in fact, and
			# explicit XML Literal
			if datatype == XMLLiteral :
				object = Literal(_get_XML_literal(node),datatype=XMLLiteral)
			else :
				object = Literal(_get_literal(node),datatype=datatype,lang=lang)
		else :
			# no controlling @datatype. We have to see if there is markup in the contained
			# element
			if True in [ n.nodeType == node.ELEMENT_NODE for n in node.childNodes ] :
				# yep, and XML Literal should be generated
				object = Literal(_get_XML_literal(node),datatype=XMLLiteral)
			else :
				object = Literal(_get_literal(node),lang=lang)

	# The object may be empty, for example in an ill-defined <meta> element...
	if object != "" :
		for prop in props :
			retval = True
			graph.add((subject,prop,object))

	return True

