
def compareDTD(uris):
    attributesSet = {'Common':set(['class', 'id', 'style', 'title', 'xml:lang']),
                     'Events':set(['onclick', 'ondblclick', 'onkeydown', 'onkeypress', 'onkeyup', 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', 'onmouseup']),
                     'I18N':set(['dir', 'lang']),
                     'XML':set(['xml:space', 'xmlns', 'xmlns:xsi'])}

    
    from xml.parsers.xmlproc import xmldtd
    all_elements = {}
    all_attributes = {}
    print "<link rel='stylesheet' href='http://www.w3.org/2001/11/results.css' />"
    print "<table border='1'><thead><tr><th>Element</th><th>Attributes</th>"
    for uri in uris:
        # hackish way to get a shorter heading for DTDs
        # ideally, there would be a way to get a human readable name
        print "<th><a href='%s'>%s</a></th>" % (uri,uri[uri.rfind('/') + 1:])
        dtd = xmldtd.load_dtd(uri)
        elements = dtd.get_elements()
        for elem_name in elements :
            elem = dtd.get_elem(elem_name)
            attributes = set(elem.get_attr_list())
            if not all_elements.has_key(elem_name):
                all_elements[elem_name] = {}
            if not all_attributes.has_key(elem_name):
                all_attributes[elem_name] = set()
            all_elements[elem_name][uri] = attributes
            all_attributes[elem_name] = all_attributes[elem_name] | attributes
    print "</tr></thead><tbody>"
    
    for elem_name in sorted(all_elements.keys()):
        dtds = all_elements[elem_name]
        attributes = all_attributes[elem_name]
        att_list = list(attributes)
        att_list.sort()
        
        elem_attributes_set = []
        rowspan = 1
        sets = []
        if att_list:
            remaining_att = attributes
            for setname in sorted(attributesSet.keys()):
                setdata = attributesSet[setname]
                if setdata<=attributes:
                    sets.append(setname)
                    remaining_att = remaining_att - setdata
                    elem_attributes_set.append(setname)
        rowspan = rowspan + len(sets)
        if len(remaining_att):
            rowspan = rowspan + 1
        if rowspan > 1:
            rowspan_markup = " rowspan='%d'" % rowspan
        print "<tr><th%s class='element'>%s</th><td class='element'></td>" % (rowspan_markup,elem_name)
        for uri in uris:
            if dtds.has_key(uri):
                print "<td class='yes element'>Element present</td>"
            else:
                print "<td class='no element' rowspan='%d'>Element not present</td>" % rowspan
        print "</tr>"
        for setname in sets:
            setlink = "<a href='#%s' title='%s'>%s</a>" % (setname,", ".join(sorted(list(attributesSet[setname]))),setname)
            print "<tr><td>%s</td>" % (setlink)
            for uri in uris:
                if dtds.has_key(uri):
                    if not(all_elements[elem_name][uri] & attributesSet[setname]):
                        print "<td class='no'>%s absent</td>" % setname
                    elif not attributesSet[setname] <= all_elements[elem_name][uri]:
                        print "<td>%s <span class='no'>without</span> %s</td>" % (setlink,", ".join(sorted(list(attributesSet[setname] - (all_elements[elem_name][uri] & attributesSet[setname])))))
                    else:
                        print "<td class='yes'>%s</td>" % (setlink)
            print "</tr>"
        if len(remaining_att):
            print "<tr><td><ul><li><code>%s</code></li></ul></td>" % ("</code></li><li><code>".join(sorted(list(remaining_att))))
            for uri in uris:
                if dtds.has_key(uri):
                    print "<td><ul>"
                    for att in sorted(list(remaining_att)):
                        if att in all_elements[elem_name][uri]:
                            print "<li class='yes'><code>%s</code></li>" % att
                        else:
                            print "<li class='no'><code>%s</code> absent</li>" % att
                    print "</ul></td>"
            print "</tr>"
    print "</tbody></table>"
    print "<dl>"
    for setname in sorted(attributesSet.keys()):
        print "<dt><a name='%s' id='%s'>%s</a></dt>" % (setname,setname,setname)
        print "<dd>%s</dd>" % ", ".join(sorted(list(attributesSet[setname])))
        
    print "</dl>"

if __name__ == '__main__':
    from optparse import OptionParser
    usage = "usage: %prog uri1 uri2 ..."
    parser = OptionParser(usage)
    import sys
    #parser.add_option("-o", "--output",
    #                  action="store", type="string", dest="filename",
    #                  help="write output to FILE", metavar="FILE", default=sys.stdout)
    (options, args) = parser.parse_args()
    if len(args) < 2:
        parser.error("please specifty the URIs of the DTD you want to compare")
    compareDTD(args)
    
