#!/usr/bin/python
# -*- coding: utf-8 -*-
""" $Id: grddl.py,v 1.47 2013-10-21 15:55:46 dom Exp $
"""

import cgi
import sys
import os
import urlparse


errors = []
notrdfxml = False

def rdfxmlhandler(code, level, facility, message, line, column, byte, file, uri):
    global notrdfxml
    notrdfxml = True
    if False:
        global errors
        errors.append({"code":code,"level":level,"facility":facility,"message":message,"line":line,"column":column,"byte":byte,"file":file,"uri":uri})
        

def grddlhandler(code, level, facility, message, line, column, byte, file, uri):
    global errors
    errors.append({"code":code,"level":level,"facility":facility,"message":message,"line":line,"column":column,"byte":byte,"file":file,"uri":uri})

"""
Filter URIs that can be loaded from GRDDL operations
"""
# would be cool to deal with Basic Auth too, but the current API
# isn't flexible enough for that
def urifilter(uri):
    # cf http://dev.w3.org/cvsweb/2004/PythonLib-IH/checkremote.py
    from checkremote import check_url_safety, UnsupportedResourceError
    try:
        check_url_safety(uri)
    except UnsupportedResourceError:
        return False
    return True

Page = """
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
<head><title>W3C GRDDL service</title>
    <style type="text/css" media="all">
    @import "http://validator.w3.org/style/base.css";
    #error_loop { list-style: disc;}
    #results { padding:1em;}
    </style>
    <script type="text/javascript" src="http://validator.w3.org/scripts/mootools.js"></script>

    <script type="text/javascript" src="grddl.js"></script>    
</head>
<body>
<div id="banner">
<h1 id="title">
<a href="http://www.w3.org/"><img alt="W3C" width="110" height="61" id="logo" src="http://validator.w3.org/images/w3c.png" /></a> <span>GRDDL Service</span></h1>
<p id="tagline">Extracting RDF from XHTML/XML using <a href="http://www.w3.org/TR/grddl/"><abbr title="Gleaning Resource Descriptions from Dialects of Languages">GRDDL</abbr></a></p>
</div>

"""
Page2 = """
<div id="frontforms">
	<ul id="tabset_tabs">
		<li><a href="#validate-by-uri"><span>Validate by</span> URI</a></li>

		<li><a href="#validate-by-input"><span>Validate by</span> Direct Input</a></li>
	</ul>
<div id="fields">
<fieldset id="validate-by-uri" class="tabset_content front"><legend class="tabset_label">Validate by URI</legend>
<form method="get" action="">
<p><label for="uri">Address of the document to extract RDF from:</label> <input name="docAddr" id="uri" value="%s" size="50"/></p>
<p><label for='output'>Output format:</label> <select name='output' id='output'><option selected='selected' value='rdfxml'>RDF/XML as application/rdf+xml</option><option value='textxml'>RDF/XML as text/xml</option><option value='turtle'>Turtle as application/x-turtle</option><option value='turtlen3'>Turtle as text/rdf+n3</option><option value='turtleplain'>Turtle as text/plain</option></select></p>
<p class="submit_button"><input type="submit" value="get GRDDL results"/></p>
</form>
</fieldset>

<fieldset id="validate-by-input"  class="tabset_content front"><legend class="tabset_label">Validate by direct input</legend>
                  <form method="post" enctype="multipart/form-data" action="">
<p class="instructions"><label title="Paste a complete (HTML) Document here" for="fragment">Enter the document markup to GRDDL</label>:<br />
<textarea id="fragment" name="fragment" rows="12" cols="80">%s</textarea>
                        </p>
                        <p><label for='output2'>Output format:</label> <select name='output2' id='output2'><option selected='selected' value='rdfxml'>RDF/XML as application/rdf+xml</option><option value='textxml'>RDF/XML as text/xml</option><option value='turtle'>Turtle as application/x-turtle</option><option value='turtlen3'>Turtle as text/rdf+n3</option><option value='turtleplain'>Turtle as text/plain</option></select></p>
<p class="submit_button">
                  							<input title="Submit for validation" type="submit" value="get GRDDL results" />
                  					</p>
                                                        </form>

                        </fieldset>
</div>
</div>
<hr />
<h2>Stuff used to build this service</h2>
<ul>
<li><a href="http://dev.w3.org/cvsweb/2007/grddl-cgi/">Python script</a> based on </li>
<li><a href="http://librdf.org/docs/python.html">python-librdf</a></li>
<li><a href="http://www.python.org/">python</a>, apache, etc.</li>
</ul>
<address>
script $Revision: 1.47 $ of $Date: 2013-10-21 15:55:46 $<br />
by <a href="http://www.w3.org/People/Dom/">Dominique Hazaël-Massieux</a><br />
but I didn't do the real work, i.e. writing redland
</address>
</body>
</html>
"""



def serveRequest():
    fields = cgi.FieldStorage()

    if not fields.has_key('docAddr') and not fields.has_key('fragment'):
        print "Content-Type: text/html;charset=utf-8"
	print
        print Page
	print Page2 % ("","")
    else:
        import RDF
        model=RDF.Model()
        rdfparser = RDF.Parser(name="rdfxml")
        global errors, notrdfxml
        addr = None
        markup = None
        res = None
        if fields.has_key('docAddr'):        
            addr = fields['docAddr'].value
            if not urifilter(addr):
                print "Status: 403"
                print "Content-Type: text/plain"
                print
                print "sorry, I decline to handle this type of addresses"
                sys.exit(1)
            # Trying to parse as RDF/XML first per http://www.w3.org/TR/2007/PR-grddl-tests-20070716/#rdfXMLDoc
            res = rdfparser.parse_into_model(model,addr,None,rdfxmlhandler)
        elif fields.has_key("fragment"):
            markup = fields["fragment"].value
            # Trying to parse as RDF/XML first per http://www.w3.org/TR/2007/PR-grddl-tests-20070716/#rdfXMLDoc
            res = rdfparser.parse_string_into_model(model,markup,'http://example.org/',rdfxmlhandler)

        if notrdfxml:
            errors = []
            model=RDF.Model()
            grddlparser=RDF.Parser(name="grddl")
            # This requires a really recent version of python-librdf 
            grddlparser.set_uri_filter(urifilter)
            if addr:
                res = grddlparser.parse_into_model(model,addr,None,grddlhandler)
            else:
                res = grddlparser.parse_string_into_model(model,markup,'http://example.org/',grddlhandler)

        if res and (len(model) or not len(errors)):
            turtleOutput=False
            if fields.has_key('turtle'):
                if fields['turtle'].value:
                    turtleOutput=True
            contenttype='application/rdf+xml'
            if fields.has_key('output'):
                if fields['output'].value=='textxml':
                    turtleOutput=False
                    contenttype='text/xml'
                elif fields['output'].value=='turtle':
                    turtleOutput=True
                    contenttype='application/x-turtle'
                elif fields['output'].value=='turtlen3':
                    turtleOutput=True
                    contenttype='text/rdf+n3'
                elif fields['output'].value=='turtleplain':
                    turtleOutput=True
                    contenttype='text/plain'
            print "Content-Type: %s" % (contenttype)
            print
            if turtleOutput:
                serializer = RDF.Serializer(name="turtle")
            else:
                serializer = RDF.RDFXMLSerializer()
            print serializer.serialize_model_to_string(model)
            # turtle output doesn't show errors or source at this point
            if not turtleOutput:
                if addr:
                    print "<!-- Extracted from %s by http://www.w3.org/2007/08/grddl/ at http://www.w3.org/2007/08/grddl/?docAddr=%s -->\n" % (addr,addr)
                elif markup:
                    print "<!-- Extracted from pasted markup by http://www.w3.org/2007/08/grddl/ -->"
                if len(errors):
                    print "<!-- The following errors where encountered while processing the resource:"
                    for err in errors:
                        print "-%s at line %d, column %d in %s\n" % (cgi.escape(err["message"]),err["line"],err["column"],err["uri"])
                    print "-->"
            else:
                if addr:
                    print "# Extracted from %s by http://www.w3.org/2007/08/grddl/ at http://www.w3.org/2007/08/grddl/?docAddr=%s " % (addr,addr)
                elif markup:
                    print "# Extracted from pasted markup by http://www.w3.org/2007/08/grddl/ "
                if len(errors):
                    print "# The following errors where encountered while processing the resource:"
                    for err in errors:
                        print "# -%s at line %d, column %d in %s\n" % (cgi.escape(err["message"]),err["line"],err["column"],err["uri"])
                

        else:
            print "Content-Type: text/html;charset=utf-8"
            print
            print Page
            if addr:
                print "<div id='results'><p>The following errors were encountered when trying to parse <a href='%s'>%s</a>:</p><ol id='error_loop'>" % (addr,addr)
            else:
                print "<div id='results'><p>The following errors were encountered:</p><ol id='error_loop'>"
            for err in errors:
                print "<li class='msg_err'><span class='msg'>%s</span> at <em>line %d, column %d</em> in %s</li>" % (cgi.escape(err["message"]),err["line"],err["column"],cgi.escape(err["uri"]))
            print "</ol></div>"
            if addr:
                print Page2 % (addr,"")
            else:
                print Page2 % ("",cgi.escape(markup))
            

if __name__ == '__main__':
    if os.environ.has_key('SCRIPT_NAME'):
        serveRequest()
