from PyXML import *
from graph import Graph
from Tkinter import Tk
import sys
import types

outfile=sys.stdout

class schema:
  def __init__(self):
    self.archetypeTable={}
    self.archetypes=[]
    self.roots=[]
    self.archetypeForwardRefs=[]
    self.elementTypeForwardRefs=[]
    self.leaves=[]
    self.elementTypeTable={}
    self.flex='closed'
    self.modelGroupTable={}
    self.attrGroupTable={}

  def fromFile(self,filename=None,graph=1):
    if not filename:
      tk=Tk()
      filename=tk.tk.call('tk_getOpenFile','-defaultextension','.xsd',
			  '-filetypes',(('Schema Files',('*.xsd',)),
					('All files',('*.*','*'))))
      tk.destroy()
      if not filename:
	return
    if filename[1]==':':
	filename='file:///'+filename
    file=Open(filename,NSL_read+NSL_read_validate)
    root=ItemParse(file,GetNextBit(file).item)
    self.processRoot(root)
    for elt in root.data:
      self.processDecl(elt)
    Close(file)
    maxh=0
    map(archetype.addDescendants,self.archetypes)
    for e in self.elementTypeTable.values():
      e.gatherDescendants(self)
    for a in self.archetypes:
      maxh=max(maxh,a.height())
      if not a.descendantNames:
	self.leaves.append(a)
      if a.ancestorNames:
	for an in a.ancestorNames:
	  if not self.archetypeTable.has_key(an):
	    print "Undefined archetype %s referenced as ancestor of %s archetype"%(an,a.name)
    self.maxHeight=maxh
    map(archetype.rootNames,self.leaves)
    for (name,arn) in self.archetypeForwardRefs:
      if self.archetypeTable.has_key(arn):
	self.archetypeTable[arn].elements.append(name)
      else:
	print "Undefined archetype %s referenced in definition of %s elementType"%(arn,name)
    for name in self.elementTypeForwardRefs:
      if not self.elementTypeTable.has_key(name):
	print "Undefined elementType %s"%name
    self.leaves.sort(archetype.rootCompare)
    print map(lambda a:a.name,self.leaves)
    if graph:
      self.graph=self.showGraph()

  def processRoot(self,root):
    flex=GetAttrVal(root,"model")
    if flex:
      self.flex=flex

  def processDecl(self,elt):
    if type(elt)==ItemType:
      eval("%s(self,elt)"%elt.label)

  def showGraph(self):
    column=0
    g=Graph(self,font='-adobe-helvetica-medium-r-normal-*-14-*-*-*-*-*-*-*')
    nextColumn=self.leaves
    while nextColumn:
      g.showColumn(column,nextColumn)
      column=column+1
      nextColumn=filter(lambda a,c=column:a.hgt==c,self.archetypes)
    return g

  def clear(self):
    # cut all circularities
    for a in self.archetypes:
      a.clear()
    self.archetypes=self.leaves=self.roots=None
    self.archetypeTable=None

  def mapArchetypes(self,fn,keys,*args):
    if keys:
      if args:
	allArgs=[0]
	allArgs[1:1]=list(args)
	for key in keys:
	  if not self.archetypeTable.has_key(key):
	    print 'Error, reference to undefined archetype %s'%key
	    continue
	  allArgs[0]=self.archetypeTable[key]
	  apply(fn,tuple(allArgs))
      else:
	for key in keys:
	  if not self.archetypeTable.has_key(key):
	    print 'Error, reference to undefined archetype %s'%key
	    continue
	  fn(self.archetypeTable[key])
      
  def checkEltRef(self,name):
    if ((not self.elementTypeTable.has_key(name)) and
	(name not in self.elementTypeForwardRefs)):
      self.elementTypeForwardRefs[0:0]=[name]
      
  def dumpToDTD(self,filename=None):
    global outfile
    if not filename:
      tk=Tk()
      filename=tk.tk.call('tk_getSaveFile','-defaultextension','.dtd',
			  '-filetypes',(('DTD Files',('*.dtd',)),
					('All files',('*.*','*'))))
      tk.destroy()
    outfile=open(filename,'w')
    ens=self.elementTypeTable.keys()
    ens.sort()
    for en in ens:
      self.elementTypeTable[en].dumpForDTD()
    outfile.close()
    outfile=sys.stdout

class archetype:
  def __init__(self,schema,elt,isElt=0,embedding=None):
    name=GetAttrVal(elt,"name")
    self.schema=schema
    if embedding:
      name=(embedding,name)
    if schema.archetypeTable.has_key(name):
      print "%s multiply defined as archetype"%name
      return
    if not embedding:
      schema.archetypeTable[name]=self
      schema.archetypes.append(self)
    self.name=name
    self.ancestorNames=None
    self.descendantNames=[]
    self.vertex=None
    self.hgt=None
    self.rtNames=None
    self.attrDecls={}
    self.model=None
    self.effectiveModel=None
    self.flex=GetAttrVal(elt,"model") or self.schema.flex
    if isElt:
      self.namesAsElement=[name]
    else:
      self.namesAsElement=[]
    for d in elt.data:
      if type(d)!=ItemType:
        continue
      if d.label=='refines':
	self.ancestorNames=[]
	for ar in d.data:
          if type(ar)!=ItemType:
            continue
	  # should check for schemaName
	  aname=GetAttrVal(ar,"name")
	  if aname in self.ancestorNames:
	    print "%s refines %s twice"%(name,aname )
	  else:
	    self.ancestorNames.append(aname)
      elif d.label=='attrDecl':
	ad=attrDecl(self,d)
	self.attrDecls[ad.name]=ad
      elif d.label=='attrGroupRef':
        pass
      else:
	self.model=model(self,self.flex,d)
    if not self.ancestorNames:
      schema.roots.append(self)

  def clear(self):
    self.effectiveModel=self.attrDecls=self.model=None

  def addDescendants(self):
    self.schema.mapArchetypes(archetype.newDescendant,
			      self.ancestorNames,self.name)

  def newDescendant(self,dName):
    if self.flex=='closed':
      print 'Error, %s refines %s, which is not open or refinable'%(dName,self.name)
      return
    self.descendantNames.append(dName)

  def rootNames(self):
    if not self.rtNames:
      rts=[]
      if self.ancestorNames:
	for an in self.ancestorNames:
	  if not self.schema.archetypeTable.has_key(an):
	    continue
	  arns=self.schema.archetypeTable[an].rootNames()
	  if arns:
	    for rt in arns:
	      if not rt in rts:
		rts[0:0]=[rt]
	  else:
	    # it's a REAL root, so it's one of my roots for sure
	    rts[0:0]=[an]
	rts.sort()
      self.rtNames=rts
    return self.rtNames

  def height(self):
    if self.hgt==None:
      if self.descendantNames:
	mh=0
	for dn in self.descendantNames:
	  d=self.schema.archetypeTable[dn]
	  mh=max(mh,d.height())
	self.hgt=mh+1
      else:
	self.hgt=0
    return self.hgt

  def rootCompare(self,other):
    l1=len(self.rtNames)
    l2=len(other.rtNames)
    if l1<l2:
      return -1
    elif l1>l2:
      return 1
    elif self.rtNames<other.rtNames:
      return -1
    elif self.rtNames>other.rtNames:
      return 1
    elif self.ancestorNames<other.ancestorNames:
      return -1
    elif self.ancestorNames>other.ancestorNames:
      return 1
    elif self.name<other.name:
      return -1
    else:
      return 1

  def higherHighest(self,other):
    if self.highestHeight<other.highestHeight:
      return -1
    elif self.highestHeight>other.highestHeight:
      return 1
    else:
      return 0

  def effectiveContentModel(self):
    if self.effectiveModel:
      return self.effectiveModel
    if self.model:
      self.effectiveModel=model(self,self.model.flex,self.model.elt)
      self.effectiveModel.plugInModelGroups()
    else:
      self.effectiveModel=model(self,self.schema.flex)
    if self.ancestorNames:
      self.effectiveModel.merge(map(lambda an,s=self:s.schema.archetypeTable[an].effectiveContentModel(),
				    filter(lambda an,tbl=self.schema.archetypeTable:tbl.has_key(an),
					   self.ancestorNames)))
    return self.effectiveModel
    
  def dumpForDTD(self):
    if self.effectiveContentModel():
      self.effectiveModel.dumpForDTD()

class elementType:
  def __init__(self,schema,elt):
    name=GetAttrVal(elt,"name")
    self.name=name
    self.schema=schema
    self.eventualDescendants=None
    if schema.elementTypeTable.has_key(name):
      print "%s multiply defined as elementType"%name
      return
    schema.elementTypeTable[name]=self
    dd=filter(lambda e:type(e)==ItemType,elt.data)
    if dd and dd[0].label=='archetypeRef':
      # should check for schemaName
      arn=GetAttrVal(dd[0],"name")
      self.archetypeName=arn
      if schema.archetypeTable.has_key(arn):
        schema.archetypeTable[arn].namesAsElement.append(name)
      else:
	schema.archetypeForwardRefs.append((name,arn))
    else:
      self.archetypeName=name
      archetype(schema,elt,1)

  def gatherDescendants(self,schema):
    if self.eventualDescendants:
      return self.eventualDescendants
    if not schema.archetypeTable.has_key(self.archetypeName):
      return []
    a=schema.archetypeTable[self.archetypeName]
    # try to avoid virtual elements -- this is wrong in principle, but works for XHTML
    if a.descendantNames:
      ed=[]
      for dn in a.descendantNames:
        da=schema.archetypeTable[dn]
        for men in da.namesAsElement:
          for en in schema.elementTypeTable[men].gatherDescendants(schema):
            if en not in ed:
              ed[0:0]=[en]
      ed.sort()
    else:
      ed=[self.name]
    self.eventualDescendants=ed
    return ed

  def dumpForDTD(self):
    arch=self.schema.archetypeTable[self.archetypeName]
    if arch.flex=='refinable':
      # too simple, but nearly always right so far
      return
    pas("<!ELEMENT %s "%self.name)
    arch.dumpForDTD()
    pas(">\n")

class attrDecl:
  def __init__(self,archetype,elt):
    self.archetype=archetype
    self.name=GetAttrVal(elt,"name")

class model:
  def __init__(self,archetype,flex,elt=None):
    self.archetype=archetype
    self.elt=elt
    self.flex=flex
    self.datatype=None
    self.restr=None
    self.allowed=None
    self.group=None
    if elt:
      if elt.label in ('empty','any'):
        self.type=elt.label
      elif elt.label=='datatypeRef':
        self.type='data'
        self.datatype=GetAttrVal(elt,"name")
        self.restr=elt.data
      elif elt.label=='mixed':
        self.type='mixed'
        self.allowed=filter(None,
                            map(lambda e,s=archetype.schema,n=archetype.name:type(e)==ItemType and eltMatcher(s,e,n),elt.data)) # min/max ignored
      else:
        self.type='model'
        self.group=ChooseGroup(archetype.schema,elt,archetype.name)
    else:
      # default is empty mixed?
      self.type='default'

  def merge(self,others):
    nonDef=filter(lambda m:m.type!='default',others)
    if not nonDef:
      return
    if self.type=='default' and len(nonDef)==1:
      other=nonDef[0]
      self.type=other.type
      self.datatype=other.datatype
      self.restr=other.restr
      self.allowed=other.allowed
      self.group=other.group
      return
    otypes=[]
    for t in map(lambda m:m.type,others):
      if t=='default':
        continue
      if t not in otypes:
        otypes[0:0]=[t]
    if len(otypes)>1:
      print "Error, can't merge when ancestors (%s,%s,...) are not all of same type"%(otypes[0],otypes[1])
      return
    if self.type==otypes[0]:
      if self.type=='mixed':
        mynames=map(lambda em:em.name,self.allowed)
        for other in others:
          if other.type!='mixed':
            continue
          for m in other.allowed:
            if m.name not in mynames:
              self.allowed.append(m)
              mynames[0:0]=[m.name]
      elif self.type=='model':
        # no checking for illegal duplications
        # could simplify in the case of all sequences
        ng=sequence(self.archetype,[],'1','1')
        ng.components=[others,self]
        self.group=ng
      elif self.type=='data':
        print 'Sorry, datatype refinement not yet supported'
      # any and empty are cool as such
    else:
      print "Error, can't merge ancestors of different type (%s) from me (%s)"%(otypes[0],self.type)

  def plugInModelGroups(self):
    if self.type=='model' and self.group:
      self.group=self.group.plugInModelGroups()

  def dumpForDTD(self):
    if self.type in ('default','data'):
      pas('(#PCDATA)')
    elif self.type=='empty':
      pas('EMPTY')
    elif self.type=='any':
      pas('ANY')
    elif self.type=='mixed':
      pas('(#PCDATA')
      for name in UniqueNames(self.allowed):
        pas('|%s'%name)
      pas(')')
      if self.allowed:
        pas('*')
    elif self.type=='model':
      self.group.dumpForDTD(1)
    else:
      error("shouldn't happen")

def UniqueNames(ems):
  names=[]
  for em in ems:
    for name in em.namesForDump():
      if name not in names:
        names[0:0]=[name]
  names.sort()
  return names

def ChooseGroup(schema,elt,context=None):
  if type(elt)!=ItemType:
    return
  if elt.label=='modelGroupRef':
    name=GetAttrVal(elt,"name")
    # the alternative is too tedious given the possibility of forward
    # references from deep inside content models
    return mgr(schema,name,
		      GetAttrVal(elt,"minOccur"),
	              GetAttrVal(elt,"maxOccur"))
  elif elt.label in ('elementTypeRef','elementType'):
    m=intModelGroup(schema,[],
                    GetAttrVal(elt,"minOccur") or (elt.label=='elementType' and '1'),
	            GetAttrVal(elt,"maxOccur"),context)
    m.components=[eltMatcher(schema,elt,context)]
    return m
  else:
    return apply(eval(elt.label),(schema,elt,
		      GetAttrVal(elt,"minOccur"),
	              GetAttrVal(elt,"maxOccur"),context))

class intModelGroup:
  def __init__(self,schema,subElts,min,max,context=None):
    self.schema=schema
    self.min=min
    self.max=max
    self.components=filter(None,map(lambda e,s=schema,c=context:ChooseGroup(s,e,c),subElts))
    self.context=context
  
  def match(self):
    # only caught here for elementType(Ref) modelElts
    pass

  def dumpForDTD(self,top=0):
    # only caught here for elementType(Ref) modelElts
    self.dumpComponents('',top)

  def dumpComponents(self,separator,wrap=1):
    if wrap:
      pas('(')
    self.components[0].dumpForDTD()
    for c in self.components[1:]:
      pas(separator)
      c.dumpForDTD()
    if wrap:
      pas(')')
    pas(self.exponent())

  def exponent(self):
    if self.min=='0':
      if self.max=='1':
        return '?'
      else:
        return '*'
    if (self.min=='1' and
        (self.max=='1' or not self.max)):
      return ''
    else:
      return '+'
      
  def namesForDump(self):
    # an elt, forward if simple exponent
    exp=self.exponent()
    if exp:
      return ['%s%s'%(self.components[0].name,exp)]  # not right in complex cases
    else:
      return self.components[0].namesForDump()

  def plugInModelGroups(self):
    n=len(self.components)
    i=0
    while i<n:
      self.components[i]=self.components[i].plugInModelGroups()
      i=i+1
    return self

class all(intModelGroup):
  def __init__(self,schema,elt,min,max,context):
    intModelGroup.__init__(self,schema,elt.data,min,max,context)
    
  def dumpForDTD(self,top=0):
    self.dumpComponents('&')
    
  def namesForDump(self):
    # nested case
    return [self]
    
class mgr:
  def __init__(self,schema,name,min,max):
    self.name=name
    self.schema=schema
    self.min=min
    self.max=max

  def real(self):
    if self.schema.modelGroupTable.has_key(self.name):
      return self.schema.modelGroupTable[self.name]
    else:
      print 'Error: %s never defined as a model group'%self.name
      return None

  def plugInModelGroups(self):
    real=self.real()
    if real:
      return real.group.plugInModelGroups()

class choice(intModelGroup):
  def __init__(self,schema,elt,min,max,context):
    intModelGroup.__init__(self,schema,elt.data,min,max,context)
    
  def dumpForDTD(self,top=0):
    pas('(')
    full=[]
    for c in self.components:
      for res in c.namesForDump():
        if res not in full:
          full[0:0]=[res]
    full.sort()   # groups to the ?end?start?
    if type(full[0])==types.StringType:
      pas(full[0])
    else:
      full[0].dumpForDTD()
    for c in full[1:]:
      pas('|')
      if type(c)==types.StringType:
        pas(c)
      else:
        c.dumpForDTD()
    pas(')')
    pas(self.exponent())
    
  def namesForDump(self):
    # nested case
    return [self]
    
class sequence(intModelGroup):
  def __init__(self,schema,elt,min,max,context):
    intModelGroup.__init__(self,schema,elt.data,min,max,context)
    
  def dumpForDTD(self,top=0):
    self.dumpComponents(',')
      
  def namesForDump(self):
    # nested case
    return [self]
    
class eltMatcher:
  def __init__(self,schema,elt,context):
    self.schema=schema
    self.name=GetAttrVal(elt,"name")
    if elt.label=='elementTypeRef':
      self.type='ref'
      schema.checkEltRef(self.name)
    else:
      self.type='def'
      dd=filter(lambda e:type(e)==ItemType,elt.data)
      if dd and dd[0].label=='archetypeRef':
        # should check for schemaName
        self.archName=GetAttrVal(dd[0],"name")
        if schema.archetypeTable.has_key(self.archName):
          self.archetype=schema.archetypeTable[self.archName]
          self.archetype.elements.append((context,self.name))
        else:
          schema.archetypeForwardRefs.append(((context,self.name),self.archName))
      else:
        self.archetype=archetype(schema,elt,1,context)

  def dumpForDTD(self,top=0):
    # come here if NOT within a CHOICE
    names=UniqueNames([self])
    if len(names)>1:
      pas('(%s'%names[0])
      for name in names[1:]:
        pas('|%s'%name)
      pas(')')
    else:
      pas(names[0])

  def namesForDump(self):
    if self.type=='def':
      return ['%s!'%self.name]   # signal that this is a weird case
    else:
      # allow for impact of refinement
      if self.schema.elementTypeTable.has_key(self.name):
	return self.schema.elementTypeTable[self.name].eventualDescendants
      else:
	return [self.name]

  def plugInModelGroups(self):
    return self

class datatype:
  def __init__(self,schema,elt):
    pass

class include:
  def __init__(self,schema,elt):
    pass

class attrGroup:
  def __init__(self,schema,elt):
    pass

class modelGroup:
  def __init__(self,schema,elt):
    name=GetAttrVal(elt,"name")
    if schema.modelGroupTable.has_key("name"):
      print 'Error: %s already defined as a model group'%name
      return
    schema.modelGroupTable[name]=self
    for d in elt.data:
      if type(d)==ItemType:
	self.group=ChooseGroup(schema,d,name)
	return

class notation:
  def __init__(self,schema,elt):
    pass

def pas(string):
  outfile.write(string)
# simple types
# give textonly complex types a coreType to carry their datatype part
#
# Revision 1.52  1999/11/25 13:13:46  ht
# merge in branch which switched to separate classes for ab initio types
#
# Revision 1.48.1.2  1999/11/25 10:21:24  aqw
# convert to classes for primitive types, use them as effectiveType for
# all simpleTypes
#
# Revision 1.48.1.1  1999/11/22 16:03:10  aqw
# classes for ab initio types
#