from PyXML import * from graph import Graph from Tkinter import Tk import sys import types outfile=sys.stdout class schema: def __init__(self): self.archetypeTable={} self.archetypes=[] self.roots=[] self.archetypeForwardRefs=[] self.elementTypeForwardRefs=[] self.leaves=[] self.elementTypeTable={} self.flex='closed' self.modelGroupTable={} self.attrGroupTable={} def fromFile(self,filename=None,graph=1): if not filename: tk=Tk() filename=tk.tk.call('tk_getOpenFile','-defaultextension','.xsd', '-filetypes',(('Schema Files',('*.xsd',)), ('All files',('*.*','*')))) tk.destroy() if not filename: return if filename[1]==':': filename='file:///'+filename file=Open(filename,NSL_read+NSL_read_validate) root=ItemParse(file,GetNextBit(file).item) self.processRoot(root) for elt in root.data: self.processDecl(elt) Close(file) maxh=0 map(archetype.addDescendants,self.archetypes) for e in self.elementTypeTable.values(): e.gatherDescendants(self) for a in self.archetypes: maxh=max(maxh,a.height()) if not a.descendantNames: self.leaves.append(a) if a.ancestorNames: for an in a.ancestorNames: if not self.archetypeTable.has_key(an): print "Undefined archetype %s referenced as ancestor of %s archetype"%(an,a.name) self.maxHeight=maxh map(archetype.rootNames,self.leaves) for (name,arn) in self.archetypeForwardRefs: if self.archetypeTable.has_key(arn): self.archetypeTable[arn].elements.append(name) else: print "Undefined archetype %s referenced in definition of %s elementType"%(arn,name) for name in self.elementTypeForwardRefs: if not self.elementTypeTable.has_key(name): print "Undefined elementType %s"%name self.leaves.sort(archetype.rootCompare) print map(lambda a:a.name,self.leaves) if graph: self.graph=self.showGraph() def processRoot(self,root): flex=GetAttrVal(root,"model") if flex: self.flex=flex def processDecl(self,elt): if type(elt)==ItemType: eval("%s(self,elt)"%elt.label) def showGraph(self): column=0 g=Graph(self,font='-adobe-helvetica-medium-r-normal-*-14-*-*-*-*-*-*-*') nextColumn=self.leaves while nextColumn: g.showColumn(column,nextColumn) column=column+1 nextColumn=filter(lambda a,c=column:a.hgt==c,self.archetypes) return g def clear(self): # cut all circularities for a in self.archetypes: a.clear() self.archetypes=self.leaves=self.roots=None self.archetypeTable=None def mapArchetypes(self,fn,keys,*args): if keys: if args: allArgs=[0] allArgs[1:1]=list(args) for key in keys: if not self.archetypeTable.has_key(key): print 'Error, reference to undefined archetype %s'%key continue allArgs[0]=self.archetypeTable[key] apply(fn,tuple(allArgs)) else: for key in keys: if not self.archetypeTable.has_key(key): print 'Error, reference to undefined archetype %s'%key continue fn(self.archetypeTable[key]) def checkEltRef(self,name): if ((not self.elementTypeTable.has_key(name)) and (name not in self.elementTypeForwardRefs)): self.elementTypeForwardRefs[0:0]=[name] def dumpToDTD(self,filename=None): global outfile if not filename: tk=Tk() filename=tk.tk.call('tk_getSaveFile','-defaultextension','.dtd', '-filetypes',(('DTD Files',('*.dtd',)), ('All files',('*.*','*')))) tk.destroy() outfile=open(filename,'w') ens=self.elementTypeTable.keys() ens.sort() for en in ens: self.elementTypeTable[en].dumpForDTD() outfile.close() outfile=sys.stdout class archetype: def __init__(self,schema,elt,isElt=0,embedding=None): name=GetAttrVal(elt,"name") self.schema=schema if embedding: name=(embedding,name) if schema.archetypeTable.has_key(name): print "%s multiply defined as archetype"%name return if not embedding: schema.archetypeTable[name]=self schema.archetypes.append(self) self.name=name self.ancestorNames=None self.descendantNames=[] self.vertex=None self.hgt=None self.rtNames=None self.attrDecls={} self.model=None self.effectiveModel=None self.flex=GetAttrVal(elt,"model") or self.schema.flex if isElt: self.namesAsElement=[name] else: self.namesAsElement=[] for d in elt.data: if type(d)!=ItemType: continue if d.label=='refines': self.ancestorNames=[] for ar in d.data: if type(ar)!=ItemType: continue # should check for schemaName aname=GetAttrVal(ar,"name") if aname in self.ancestorNames: print "%s refines %s twice"%(name,aname ) else: self.ancestorNames.append(aname) elif d.label=='attrDecl': ad=attrDecl(self,d) self.attrDecls[ad.name]=ad elif d.label=='attrGroupRef': pass else: self.model=model(self,self.flex,d) if not self.ancestorNames: schema.roots.append(self) def clear(self): self.effectiveModel=self.attrDecls=self.model=None def addDescendants(self): self.schema.mapArchetypes(archetype.newDescendant, self.ancestorNames,self.name) def newDescendant(self,dName): if self.flex=='closed': print 'Error, %s refines %s, which is not open or refinable'%(dName,self.name) return self.descendantNames.append(dName) def rootNames(self): if not self.rtNames: rts=[] if self.ancestorNames: for an in self.ancestorNames: if not self.schema.archetypeTable.has_key(an): continue arns=self.schema.archetypeTable[an].rootNames() if arns: for rt in arns: if not rt in rts: rts[0:0]=[rt] else: # it's a REAL root, so it's one of my roots for sure rts[0:0]=[an] rts.sort() self.rtNames=rts return self.rtNames def height(self): if self.hgt==None: if self.descendantNames: mh=0 for dn in self.descendantNames: d=self.schema.archetypeTable[dn] mh=max(mh,d.height()) self.hgt=mh+1 else: self.hgt=0 return self.hgt def rootCompare(self,other): l1=len(self.rtNames) l2=len(other.rtNames) if l1l2: return 1 elif self.rtNamesother.rtNames: return 1 elif self.ancestorNamesother.ancestorNames: return 1 elif self.nameother.highestHeight: return 1 else: return 0 def effectiveContentModel(self): if self.effectiveModel: return self.effectiveModel if self.model: self.effectiveModel=model(self,self.model.flex,self.model.elt) self.effectiveModel.plugInModelGroups() else: self.effectiveModel=model(self,self.schema.flex) if self.ancestorNames: self.effectiveModel.merge(map(lambda an,s=self:s.schema.archetypeTable[an].effectiveContentModel(), filter(lambda an,tbl=self.schema.archetypeTable:tbl.has_key(an), self.ancestorNames))) return self.effectiveModel def dumpForDTD(self): if self.effectiveContentModel(): self.effectiveModel.dumpForDTD() class elementType: def __init__(self,schema,elt): name=GetAttrVal(elt,"name") self.name=name self.schema=schema self.eventualDescendants=None if schema.elementTypeTable.has_key(name): print "%s multiply defined as elementType"%name return schema.elementTypeTable[name]=self dd=filter(lambda e:type(e)==ItemType,elt.data) if dd and dd[0].label=='archetypeRef': # should check for schemaName arn=GetAttrVal(dd[0],"name") self.archetypeName=arn if schema.archetypeTable.has_key(arn): schema.archetypeTable[arn].namesAsElement.append(name) else: schema.archetypeForwardRefs.append((name,arn)) else: self.archetypeName=name archetype(schema,elt,1) def gatherDescendants(self,schema): if self.eventualDescendants: return self.eventualDescendants if not schema.archetypeTable.has_key(self.archetypeName): return [] a=schema.archetypeTable[self.archetypeName] # try to avoid virtual elements -- this is wrong in principle, but works for XHTML if a.descendantNames: ed=[] for dn in a.descendantNames: da=schema.archetypeTable[dn] for men in da.namesAsElement: for en in schema.elementTypeTable[men].gatherDescendants(schema): if en not in ed: ed[0:0]=[en] ed.sort() else: ed=[self.name] self.eventualDescendants=ed return ed def dumpForDTD(self): arch=self.schema.archetypeTable[self.archetypeName] if arch.flex=='refinable': # too simple, but nearly always right so far return pas("\n") class attrDecl: def __init__(self,archetype,elt): self.archetype=archetype self.name=GetAttrVal(elt,"name") class model: def __init__(self,archetype,flex,elt=None): self.archetype=archetype self.elt=elt self.flex=flex self.datatype=None self.restr=None self.allowed=None self.group=None if elt: if elt.label in ('empty','any'): self.type=elt.label elif elt.label=='datatypeRef': self.type='data' self.datatype=GetAttrVal(elt,"name") self.restr=elt.data elif elt.label=='mixed': self.type='mixed' self.allowed=filter(None, map(lambda e,s=archetype.schema,n=archetype.name:type(e)==ItemType and eltMatcher(s,e,n),elt.data)) # min/max ignored else: self.type='model' self.group=ChooseGroup(archetype.schema,elt,archetype.name) else: # default is empty mixed? self.type='default' def merge(self,others): nonDef=filter(lambda m:m.type!='default',others) if not nonDef: return if self.type=='default' and len(nonDef)==1: other=nonDef[0] self.type=other.type self.datatype=other.datatype self.restr=other.restr self.allowed=other.allowed self.group=other.group return otypes=[] for t in map(lambda m:m.type,others): if t=='default': continue if t not in otypes: otypes[0:0]=[t] if len(otypes)>1: print "Error, can't merge when ancestors (%s,%s,...) are not all of same type"%(otypes[0],otypes[1]) return if self.type==otypes[0]: if self.type=='mixed': mynames=map(lambda em:em.name,self.allowed) for other in others: if other.type!='mixed': continue for m in other.allowed: if m.name not in mynames: self.allowed.append(m) mynames[0:0]=[m.name] elif self.type=='model': # no checking for illegal duplications # could simplify in the case of all sequences ng=sequence(self.archetype,[],'1','1') ng.components=[others,self] self.group=ng elif self.type=='data': print 'Sorry, datatype refinement not yet supported' # any and empty are cool as such else: print "Error, can't merge ancestors of different type (%s) from me (%s)"%(otypes[0],self.type) def plugInModelGroups(self): if self.type=='model' and self.group: self.group=self.group.plugInModelGroups() def dumpForDTD(self): if self.type in ('default','data'): pas('(#PCDATA)') elif self.type=='empty': pas('EMPTY') elif self.type=='any': pas('ANY') elif self.type=='mixed': pas('(#PCDATA') for name in UniqueNames(self.allowed): pas('|%s'%name) pas(')') if self.allowed: pas('*') elif self.type=='model': self.group.dumpForDTD(1) else: error("shouldn't happen") def UniqueNames(ems): names=[] for em in ems: for name in em.namesForDump(): if name not in names: names[0:0]=[name] names.sort() return names def ChooseGroup(schema,elt,context=None): if type(elt)!=ItemType: return if elt.label=='modelGroupRef': name=GetAttrVal(elt,"name") # the alternative is too tedious given the possibility of forward # references from deep inside content models return mgr(schema,name, GetAttrVal(elt,"minOccur"), GetAttrVal(elt,"maxOccur")) elif elt.label in ('elementTypeRef','elementType'): m=intModelGroup(schema,[], GetAttrVal(elt,"minOccur") or (elt.label=='elementType' and '1'), GetAttrVal(elt,"maxOccur"),context) m.components=[eltMatcher(schema,elt,context)] return m else: return apply(eval(elt.label),(schema,elt, GetAttrVal(elt,"minOccur"), GetAttrVal(elt,"maxOccur"),context)) class intModelGroup: def __init__(self,schema,subElts,min,max,context=None): self.schema=schema self.min=min self.max=max self.components=filter(None,map(lambda e,s=schema,c=context:ChooseGroup(s,e,c),subElts)) self.context=context def match(self): # only caught here for elementType(Ref) modelElts pass def dumpForDTD(self,top=0): # only caught here for elementType(Ref) modelElts self.dumpComponents('',top) def dumpComponents(self,separator,wrap=1): if wrap: pas('(') self.components[0].dumpForDTD() for c in self.components[1:]: pas(separator) c.dumpForDTD() if wrap: pas(')') pas(self.exponent()) def exponent(self): if self.min=='0': if self.max=='1': return '?' else: return '*' if (self.min=='1' and (self.max=='1' or not self.max)): return '' else: return '+' def namesForDump(self): # an elt, forward if simple exponent exp=self.exponent() if exp: return ['%s%s'%(self.components[0].name,exp)] # not right in complex cases else: return self.components[0].namesForDump() def plugInModelGroups(self): n=len(self.components) i=0 while i1: pas('(%s'%names[0]) for name in names[1:]: pas('|%s'%name) pas(')') else: pas(names[0]) def namesForDump(self): if self.type=='def': return ['%s!'%self.name] # signal that this is a weird case else: # allow for impact of refinement if self.schema.elementTypeTable.has_key(self.name): return self.schema.elementTypeTable[self.name].eventualDescendants else: return [self.name] def plugInModelGroups(self): return self class datatype: def __init__(self,schema,elt): pass class include: def __init__(self,schema,elt): pass class attrGroup: def __init__(self,schema,elt): pass class modelGroup: def __init__(self,schema,elt): name=GetAttrVal(elt,"name") if schema.modelGroupTable.has_key("name"): print 'Error: %s already defined as a model group'%name return schema.modelGroupTable[name]=self for d in elt.data: if type(d)==ItemType: self.group=ChooseGroup(schema,d,name) return class notation: def __init__(self,schema,elt): pass def pas(string): outfile.write(string) # simple types # give textonly complex types a coreType to carry their datatype part # # Revision 1.52 1999/11/25 13:13:46 ht # merge in branch which switched to separate classes for ab initio types # # Revision 1.48.1.2 1999/11/25 10:21:24 aqw # convert to classes for primitive types, use them as effectiveType for # all simpleTypes # # Revision 1.48.1.1 1999/11/22 16:03:10 aqw # classes for ab initio types #