import sys # per https://bugs.launchpad.net/ubuntu/+source/heartbeat/+bug/306185 sys.path.append('/usr/lib/python%s/site-packages/oldxml' % sys.version[:3]) inline_elements = set([("br",""),("span",""),("em",""),("strong",""),("dfn",""),("code",""),("samp",""),("kbd",""),("var",""),("cite",""),("abbr",""),("acronym",""),("q",""),("tt",""),("i",""),("b",""),("big",""),("small",""),("sub",""),("sup",""),("a",""),("img",""),("object",""),("input",""),("select",""),("textarea",""),("label",""),("button","")]) block_elements = set([("h1",""),("h2",""),("h3",""),("h4",""),("h5",""),("h6",""),("ul",""),("ol",""),("dl",""),("p",""),("div",""),("pre",""),("blockquote",""),("address",""),("table",""),("fieldset",""),("hr","")]) def describeContent(elem,mod): if elem=="#PCDATA": print "Text " else: print "%s \n" % (elem,elem) if mod=="?": print " (optional), " elif mod=="+": print " (at least one), " else: print ", " def describeTuple(t,elem_name): if len(t)==3: sub_sep,sub_cont,sub_mod=t describeContentModel(sub_sep,sub_cont,sub_mod,elem_name) elif len(t)==2: sub_el,sub_mod=t describeContent(sub_el,sub_mod) def describeContentList(cont,elem_name): global inline_elements,block_elements try: c = set(cont) if inline_elements.issubset(c): print "Inline elements (" c -= inline_elements for t in inline_elements: describeTuple(t,elem_name) print ")
" elif len(inline_elements - (c & inline_elements)) < 3: print "Inline elements (" for t in c & inline_elements: describeTuple(t,elem_name) print ") except " for t in inline_elements - (c & inline_elements): describeTuple(t,elem_name) print "
" c -= inline_elements if block_elements.issubset(c): print "Block elements (" c -= block_elements for t in block_elements: describeTuple(t,elem_name) print ")
" for t in c: describeTuple(t,elem_name) except: for t in cont: describeTuple(t,elem_name) def describeContentModel(sep,cont,mod,elem_name): # Not generic, but should deal with what is actually used in XHTML if sep=="" and cont==[] and mod=="": print "Empty" elif sep=="|" or (sep=="" and mod!=""): if mod=="*": print "Any numbers of
" elif mod=="+": print "At least one of
" describeContentList(cont,elem_name) elif sep=="" and mod=="": if len(cont)>1: print "Unexpected list with more than one item @@@ %s" % cont describeTuple(cont[0],elem_name) elif sep==",": if mod=="?": print "Optionally " elif mod!="": print "unhandled mod: %s" % mod print "

" describeTuple(t,elem_name) print "

" else: print "Unhandled separator @@@ ( %s , %s , %s )" % (sep,cont,mod) def describeAttributesSet(attributes): for attr_name in attributes: print "%s " % (attr_name.replace(":","_"),attr_name) def describeDTD(uri,file): global inline_elements global block_elements attr_collections_order=["XML Attributes","Core Attributes","Style Attributes","Event Attributes","Other"] attr_collections={} attr_collections["XML Attributes"] = set(["xml:lang", "xmlns" ,"xmlns:xsi"]) attr_collections["Core Attributes"] = set(["xml:space","class","id","title"]) attr_collections["Style Attributes"] = set(["style"]) attr_collections["Event Attributes"] = set(["onclick", "ondblclick", "onkeydown", "onkeypress", "onkeyup", "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup"]) all_attributes={} other_attributes =set() from xml.parsers.xmlproc import xmldtd dtd = xmldtd.load_dtd(uri) print "\n" elements = dtd.get_elements() all_elements = set(elements) elements.sort() print "

" for elem_name in elements : print "%s |" % (elem_name,elem_name) print "

" print("\n") for elem_name in elements : elem = dtd.get_elem(elem_name) if (elem_name,"") in inline_elements: classname="inline" elif (elem_name,"") in block_elements: classname="block" else: classname="other" print("" % (classname,elem_name,elem_name,elem_name)) print("") print("") print("\n") print("

Elements
Element	Attributes	Content model
%s	") attributes = set(elem.get_attr_list()) for coll_name,coll in attr_collections.iteritems(): if coll.issubset(attributes): print "%s (" % coll_name describeAttributesSet(coll) for a in coll: if not all_attributes.has_key(a): all_attributes[a]={} attr = elem.get_attr(a) t = attr.get_type() if isinstance(t,str): pass elif isinstance(t,list): t = tuple(t) key = tuple([t,attr.get_decl(),attr.get_default()]) if not all_attributes[a].has_key(key): all_attributes[a][key]=[] all_attributes[a][key].append(elem_name) print ") " attributes -= coll if len(attributes): print "Other:" describeAttributesSet(attributes) for a in attributes: if not all_attributes.has_key(a): all_attributes[a]={} attr = elem.get_attr(a) t = attr.get_type() if isinstance(t,str): pass elif isinstance(t,list): t = tuple(t) key = tuple([t,attr.get_decl(),attr.get_default()]) if not all_attributes[a].has_key(key): all_attributes[a][key]=[] all_attributes[a][key].append(elem_name) other_attributes.update(attributes) print("	") sep,cont,mod = elem.get_content_model() describeContentModel(sep,cont,mod,elem_name) print("

\n") print("\n") attr_collections["Other"] = other_attributes for coll_name in attr_collections_order: coll = attr_collections[coll_name] if coll_name!="Other": attr_collections["Other"] -= coll coll = list(coll) coll.sort() print "" %coll_name for attr_name in coll: print "" rowspan="" if all_attributes.has_key(attr_name): if len(all_attributes[attr_name])>1: rowspan="rowspan='%d'" % (len(all_attributes[attr_name])) print "" % (rowspan,attr_name.replace(":","_"),attr_name.replace(":","_"),attr_name) morethanone= False for t,decl,default in all_attributes[attr_name].keys(): if morethanone: print "" print "" if t=="CDATA": # http://www.w3.org/TR/2006/REC-xml11-20060816/#dt-chardata print "" elif t=="ID": # http://www.w3.org/TR/2006/REC-xml11-20060816/#dt-id print "" % t elif t=="IDREFS": # http://www.w3.org/TR/2006/REC-xml11-20060816/#idref print "" elif t=="IDREF": print "" elif t=="NMTOKENS": print "" elif isinstance(t,tuple): print "" % ("','

".join(t))
                    else:
                        print "

" % t
                    print "

" 
                    print "

"
                    morethanone=True
    print("

Attributes
Attribute	Related Elements	Type	Default
%s
%s
" attr_elements = set(all_attributes[attr_name][tuple([t,decl,default])]) if attr_elements==all_elements: print "Any element" elif len(all_elements - attr_elements) < 10: print "Any element but " for el in all_elements - attr_elements: print "%s " % (el,el) else: for el in attr_elements: print "%s " % (el,el) print "	Any characters (with < and & escaped)	%s	White-space separated list of existing ids	Name of an existing id	White-space separated list of NMTOKEN	'`%s`'	%s	" if decl=="#FIXED": print "Fixed value: " elif decl=="#REQUIRED": print "Required" elif decl=="#DEFAULT": print "Default value:" if default: print "'`%s`'" % default print "

\n")
    
    

if __name__ == '__main__':
    from optparse import OptionParser
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    import sys
    parser.add_option("-o", "--output",
                      action="store", type="string", dest="filename",
                      help="write output to FILE", metavar="FILE", default=sys.stdout)
    (options, args) = parser.parse_args()
    if len(args) != 1:
        parser.error("please specifty the URI of the DTD you want to parse")
    describeDTD(args[0],sys.stdout)