import sys
# per https://bugs.launchpad.net/ubuntu/+source/heartbeat/+bug/306185
sys.path.append('/usr/lib/python%s/site-packages/oldxml' % sys.version[:3])

inline_elements = set([("br",""),("span",""),("em",""),("strong",""),("dfn",""),("code",""),("samp",""),("kbd",""),("var",""),("cite",""),("abbr",""),("acronym",""),("q",""),("tt",""),("i",""),("b",""),("big",""),("small",""),("sub",""),("sup",""),("a",""),("img",""),("object",""),("input",""),("select",""),("textarea",""),("label",""),("button","")])
block_elements = set([("h1",""),("h2",""),("h3",""),("h4",""),("h5",""),("h6",""),("ul",""),("ol",""),("dl",""),("p",""),("div",""),("pre",""),("blockquote",""),("address",""),("table",""),("fieldset",""),("hr","")])


def describeContent(elem,mod):
    if elem=="#PCDATA":
        print "Text "
    else:
        print "<a href='#elem_%s'>%s</a> \n" % (elem,elem)
    if mod=="?":
        print " (optional), "
    elif mod=="+":
        print " (at least one), "
    else:
        print ", "

def describeTuple(t,elem_name):
    if len(t)==3:
        sub_sep,sub_cont,sub_mod=t
        describeContentModel(sub_sep,sub_cont,sub_mod,elem_name)
    elif len(t)==2:
        sub_el,sub_mod=t
        describeContent(sub_el,sub_mod)

def describeContentList(cont,elem_name):
    global inline_elements,block_elements
    try:
        c = set(cont)
        if inline_elements.issubset(c):
            print "<span class='inline'><strong>Inline elements</strong> ("
            c -= inline_elements
            for t in inline_elements:
                describeTuple(t,elem_name)
            print ")</span><br />"
        elif len(inline_elements - (c & inline_elements)) < 3:
            print "<span class='inline'><strong>Inline elements</strong> ("
            for t in c & inline_elements:
                describeTuple(t,elem_name)
            print ") <strong>except "
            for t in inline_elements - (c & inline_elements):
                describeTuple(t,elem_name)
            print "</strong></span><br />"
            c -= inline_elements
        if block_elements.issubset(c):
            print "<span class='block'><strong>Block elements</strong> ("
            c -= block_elements
            for t in block_elements:
                describeTuple(t,elem_name)
            print ")</span><br />"
        for t in c:
            describeTuple(t,elem_name)
    except:
        for t in cont:
            describeTuple(t,elem_name)

        
def describeContentModel(sep,cont,mod,elem_name):
    # Not generic, but should deal with what is actually used in XHTML
    if sep=="" and cont==[] and mod=="":
        print "Empty"
    elif sep=="|" or (sep=="" and mod!=""):
        if mod=="*":
            print "Any numbers of<br />"
        elif mod=="+":
            print "At least one of<br />"
        describeContentList(cont,elem_name)
    elif sep=="" and mod=="":
        if len(cont)>1:
            print "Unexpected list with more than one item @@@ %s" % cont
        describeTuple(cont[0],elem_name)
    elif sep==",":
        if mod=="?":
            print "Optionally "
        elif mod!="":
            print "unhandled mod: %s" % mod
        print "<ol>"
        for t in cont:
            print "<li>"
            describeTuple(t,elem_name)
            print "</li>"
        print "</ol>"
    else:
        print "Unhandled separator @@@ ( %s , %s , %s )" % (sep,cont,mod)
        
def describeAttributesSet(attributes):
    for attr_name in attributes:
        print "<a href='#attr_%s'>%s</a> " % (attr_name.replace(":","_"),attr_name)
    
def describeDTD(uri,file):
    global inline_elements
    global block_elements
    attr_collections_order=["XML Attributes","Core Attributes","Style Attributes","Event Attributes","Other"]
    attr_collections={}
    attr_collections["XML Attributes"] = set(["xml:lang", "xmlns" ,"xmlns:xsi"])
    attr_collections["Core Attributes"] = set(["xml:space","class","id","title"])
    attr_collections["Style Attributes"] = set(["style"])
    attr_collections["Event Attributes"] = set(["onclick", "ondblclick", "onkeydown", "onkeypress", "onkeyup", "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup"])
    all_attributes={}
    other_attributes =set()
    from xml.parsers.xmlproc import xmldtd
    dtd = xmldtd.load_dtd(uri)
    print "<style type='text/css'>.inline { background:#cfc; } .block {background:#ccf;} .other {background:#ccc;}</style>\n"
    elements = dtd.get_elements()
    all_elements = set(elements)
    elements.sort()
    print "<p>"
    for elem_name in elements :
        print "<a href='#elem_%s'>%s</a> |" % (elem_name,elem_name)
    print "</p>"
    print("<table border='1'><caption>Elements</caption><thead><tr><th scope='col'>Element</th><th scope='col'>Attributes</th><th scope='col'>Content model</th></tr></thead><tbody>\n")
    for elem_name in elements :
        elem = dtd.get_elem(elem_name)
        if (elem_name,"") in inline_elements:
            classname="inline"
        elif (elem_name,"") in block_elements:
            classname="block"
        else:
            classname="other"
        print("<tr><th scope='row' class='%s'><a name='elem_%s' id='elem_%s'>%s</a></th>" % (classname,elem_name,elem_name,elem_name))
        print("<td>")
        attributes = set(elem.get_attr_list())
        for coll_name,coll in attr_collections.iteritems():
            if coll.issubset(attributes):
                print "<strong>%s</strong> (" % coll_name
                describeAttributesSet(coll)
                for a in coll:
                    if not all_attributes.has_key(a):
                        all_attributes[a]={}
                    attr = elem.get_attr(a)
                    t = attr.get_type()
                    if isinstance(t,str):
                        pass
                    elif isinstance(t,list):
                        t = tuple(t)
                    key = tuple([t,attr.get_decl(),attr.get_default()])
                    if not all_attributes[a].has_key(key):
                        all_attributes[a][key]=[]
                    all_attributes[a][key].append(elem_name)
                print ")<br />"
                attributes -= coll
        if len(attributes):
            print "<strong>Other</strong>:"
            describeAttributesSet(attributes)
            for a in attributes:
                if not all_attributes.has_key(a):
                    all_attributes[a]={}
                attr = elem.get_attr(a)
                t = attr.get_type()
                if isinstance(t,str):
                    pass
                elif isinstance(t,list):
                    t = tuple(t)
                key = tuple([t,attr.get_decl(),attr.get_default()])
                if not all_attributes[a].has_key(key):
                    all_attributes[a][key]=[]
                all_attributes[a][key].append(elem_name)
            other_attributes.update(attributes)
        print("</td>")
        print("<td>")
        sep,cont,mod = elem.get_content_model()
        describeContentModel(sep,cont,mod,elem_name)
        print("</td>")
        print("</tr>\n")
    print("</tbody></table>\n")
    print("<table border='1'><caption>Attributes</caption><thead><tr><th scope='col'>Attribute</th><th scope='col'>Related Elements</th><th scope='col'>Type</th><th scope='col'>Default</th></tr></thead><tbody>\n")
    attr_collections["Other"] = other_attributes 

    for coll_name  in attr_collections_order:
        coll = attr_collections[coll_name]
        if coll_name!="Other":
            attr_collections["Other"] -= coll
        coll = list(coll)
        coll.sort()
        print "<tr><th colspan='4'>%s</th></tr>" %coll_name
        for attr_name in coll:
            print "<tr>"
            rowspan=""
            if all_attributes.has_key(attr_name):
                if len(all_attributes[attr_name])>1:
                    rowspan="rowspan='%d'" % (len(all_attributes[attr_name]))
                print "<th scope='row' %s><a id='attr_%s' name='attr_%s'>%s</a></th>" % (rowspan,attr_name.replace(":","_"),attr_name.replace(":","_"),attr_name)
                morethanone= False
                for t,decl,default in all_attributes[attr_name].keys():
                    if morethanone:
                        print "<tr>"
                    print "<td>"
                    attr_elements = set(all_attributes[attr_name][tuple([t,decl,default])])
                    if attr_elements==all_elements:
                        print "Any element"
                    elif len(all_elements - attr_elements) < 10:
                        print "Any element but "
                        for el in all_elements - attr_elements:
                            print "<a href='#elem_%s'>%s</a> " % (el,el)
                    else:
                        for el in attr_elements:
                            print "<a href='#elem_%s'>%s</a> " % (el,el)
                    print "</td>"
                    if t=="CDATA":
                        # http://www.w3.org/TR/2006/REC-xml11-20060816/#dt-chardata
                        print "<td>Any characters (with &lt; and &amp; escaped)</td>"
                    elif t=="ID":
                        # http://www.w3.org/TR/2006/REC-xml11-20060816/#dt-id
                        print "<td>%s</td>" % t
                    elif t=="IDREFS":
                        # http://www.w3.org/TR/2006/REC-xml11-20060816/#idref
                        print "<td>White-space separated list of existing ids</td>" 
                    elif t=="IDREF":
                        print "<td>Name of an existing id</td>" 
                    elif t=="NMTOKENS":
                        print "<td>White-space separated list of NMTOKEN</td>" 
                    elif isinstance(t,tuple):
                        print "<td>'<code>%s</code>'</td>" % ("</code>','<code>".join(t))
                    else:
                        print "<td>%s</td>" % t
                    print "<td>"
                    if decl=="#FIXED":
                        print "Fixed value: "
                    elif decl=="#REQUIRED":
                        print "Required"
                    elif decl=="#DEFAULT":
                        print "Default value:"
                    if default:
                        print "'<code>%s</code>'" % default
                    print "</td>" 
                    print "</tr>"
                    morethanone=True
    print("</tbody></table>\n")
    
    

if __name__ == '__main__':
    from optparse import OptionParser
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    import sys
    parser.add_option("-o", "--output",
                      action="store", type="string", dest="filename",
                      help="write output to FILE", metavar="FILE", default=sys.stdout)
    (options, args) = parser.parse_args()
    if len(args) != 1:
        parser.error("please specifty the URI of the DTD you want to parse")
    describeDTD(args[0],sys.stdout)
    
