"""
    safe HTML macro

    by Dominique Hazael-Massieux <dom@w3.org>

"""
#DESCRIPTION: Outputs the html code contained within parens. [[HTML(<em>emphasized</em>)]] provided it is safe (e.g. doesn't include scripting)

import sys
import xml.sax, xml.sax.saxutils

class SafeXhtmlCleaner(xml.sax.saxutils.XMLGenerator):
    def __init__(self,out=sys.stdout,encoding="utf-8"):
        self._out = out
        self._safeElements=["a", "abbr ", "bdo", "blockquote", "br", "code", "dd", "del", "dl", "dt", "em", "ins", "li", "ol", "p", "pre", "select", "span", "strong", "table", "tbody", "td", "th", "tr", "ul"]
        self._unsafeAttributes=["onblur","onchange","onclick","ondblclick","onfocus","onkeydown","onkeypress","onkeyup","onload","onmousedown","onmousemove","onmouseover","onmouseout","onmouseup","onreset","onselect","onsubmit","onunload"]

        xml.sax.saxutils.XMLGenerator.__init__(self,out,encoding)

        self._parser = xml.sax.make_parser()
        self._parser.setFeature(xml.sax.handler.feature_validation,0)
        self._parser.setFeature(xml.sax.handler.feature_external_ges,0)
        self._parser.setFeature(xml.sax.handler.feature_external_pes,0)
        self._parser.setContentHandler(self)

    def startDocument(self):
        return

    def startElement(self,name,attrs):
        if not name in self._safeElements:
            return
        else:
            safeAttrs = {}
            for attrname,attrvalue in attrs.items():
                if not attrname in self._unsafeAttributes:
                    safeAttrs[attrname]=attrvalue
            xml.sax.saxutils.XMLGenerator.startElement(self,name,safeAttrs)

    def endElement(self,name):
        if not name in self._safeElements:
            return
        else:
            xml.sax.saxutils.XMLGenerator.endElement(self,name)

    def clean(self,content):
        from cStringIO import StringIO
        fp = StringIO("<div>" + content + "</div>")
        self._parser.parse(fp)
        


def execute(macro, args):
    from cStringIO import StringIO
    fp = StringIO()
    sxc = SafeXhtmlCleaner(fp)
    try:
        sxc.clean(str(args))
        ret = fp.getvalue()
    except xml.sax.SAXParseException, msg:
        import cgi
        ret = """<p><strong class="error">inserting non well-formed content failed</strong>: %s</p>
        <pre><code> %s </code></pre>""" %(cgi.escape(str(msg)), cgi.escape(args))
    return macro.formatter.rawHTML(ret)

import unittest

class Tests(unittest.TestCase):
    def _testContent(self,content,result):
        from cStringIO import StringIO
        fp = StringIO()
        sxc = SafeXhtmlCleaner(fp)
        sxc.clean(content)
        self.assertEqual(fp.getvalue(),result)
    
    def testWithNoChange(self):
        safeHTML = """Here is some <a href="" id="foo">HTML</a> that should pass as is; I can include a <table><tr><td>table</td></tr></table> if I want to"""
        self._testContent(safeHTML,safeHTML)

    def testWithUnsafeElements(self):
        unsafeHTML = "I'm inserting an evil <script>script</script>, will that go through?"
        madeSafe = "I'm inserting an evil script, will that go through?"
        self._testContent(unsafeHTML,madeSafe)

    def testWithUnsafeAttributes(self):
        unsafeHTML = """I'm inserting a <a onclick='alert("foo");'>link with an evil onclick</a> attribute"""
        madeSafe = """I'm inserting a <a>link with an evil onclick</a> attribute"""
        self._testContent(unsafeHTML,madeSafe)

    def testWithNonWellformedContent(self):
        nonwfHTML = """This is a non closed <a href="">link"""
        self.assertRaises(xml.sax.SAXParseException, self._testContent,nonwfHTML,"")

if __name__ == '__main__':
    unittest.main()

