# actually apply a schema to an instance
# $Id: applyschema.py,v 1.37 2000/04/24 09:41:43 ht Exp $
from PyXML import *
from XML import *
from schema import *
import layer
import sys
import re
import xpath
import types
whitespace = re.compile("^[ \t\r\n]*$")
xsi = "http://www.w3.org/1999/XMLSchema-instance"
def readXML(url):
input = Open(url, NSL_read|NSL_read_namespaces|NSL_read_defaulted_attributes)
# item = GetNextQueryItem(input, ParseQuery(input.doctype, "."))
# elem = Element(item, input.doctype)
elem = Element(input, 1)
Close(input)
return elem # error return??
def validate(element, typedef, schema):
schema.factory.errors=0
validateElement(element, typedef, schema)
return schema.factory.errors
def validateElement(element, type, schema,eltDecl=None):
global vel, vtype
vel = element
vtype = type
# print "validating element %s against %s" % (element.name, type)
if element.nsattrs.has_key((xsi, "type")):
t = element.nsattrs[(xsi, "type")].value;
try:
qt = QName(t, element.nsdict)
except SchemaError:
verror(element,"namespace not found for xsi:type %s" % t,schema)
return
if schema.vComplexTypeTable.has_key(qt):
xsitype=schema.vComplexTypeTable[qt]
elif schema.vSimpleTypeTable.has_key(qt):
xsitype=schema.vSimpleTypeTable[qt]
else:
verror(element,"xsi:type %s undefined" % qt,schema)
return
if not xsitype.isSubtype(type):
verror(element,"xsi:type %s is not a subtype of the declared type %s" % (qt, type.name),schema)
return
vwarn(element,"using xsi:type %s instead of original %s" % (qt, type.name))
type = xsitype
if type:
# might have none in case of recursive call inside
if isinstance(type, AbInitio):
return validateElementSimple(element, type, schema)
if isinstance(type, simpleType):
return validateElementSimple(element, type.primitiveType, schema)
assignAttributeTypes(element, type.attributeDeclarations,
type.prohibitedSubstitutions, schema)
validateAttributeTypes(element, element.attrTable,
type.attributeDeclarations, schema)
# print "assigning types for %s" % element.name
assignChildTypes(element.children, type.elementTable,
type.prohibitedSubstitutions, schema)
# we must look at the content model before checking the types, so that
# we know which children matched
validateContentModel(element, type, schema)
validateChildTypes(element.children, schema)
eqn=QName(None,element.local,element.uri)
if not eltDecl and s.vElementTable.has_key(eqn):
eltDecl=s.vElementTable[eqn]
if eltDecl:
validateKeys(eltDecl,element)
def validateElementSimple(element, type, schema):
# check that:
# it has no attributes
# it has one pcdata child, and if so
# the text of the pcdata matches the type
name = element.name
if element.attrs:
verror(element,"element %s has attributes %s but has type %s" % (name, element.attrs, type),schema)
return
return validateTextModel(element, type, schema)
def validateText(text, type, schema):
if isinstance(type,simpleType):
if type==urType:
return
else:
return type.primitiveType.checkString(text)
else:
return type.checkString(text)
def assignAttributeTypes(element, attrdefs, extendable, schema):
# look up each attribute in attrdefs and assign its type
# error if attr declaration is not found and type is not extendable
# print "assigning attrs for %s {%s}%s" % (element.name, element.uri, element.local)
element.attrTable={}
for a in element.attrs.values():
# print "assigning attr %s {%s}%s" % (a.name, a.uri, a.local)
an=QName(None,a.local,a.uri)
element.attrTable[an]=a
if a.uri == xsi:
if a.local == "type":
# we've already handled it
pass
elif a.local == "schemaLocation":
# we've already handled it
pass
else:
verror(element,"unknown xsi attribute %s" % an,schema)
elif attrdefs.has_key(an):
a.type = attrdefs[an]
elif attrdefs.has_key("#any"):
# XXX check the namespaces
vwarn(element,"allowing undeclared attribute %s because anyAttribute(%s)" % (an, attrdefs["#any"]))
a.type = None
else:
verror(element,"undeclared attribute %s" % an,schema)
a.type = None
return
def validateAttributeTypes(element,attrs, attrdefs, schema):
# check that each attribute matches its type
# check that all required attributes are present
# add defaulted attributes (shouldn't need to check their types)
for (adq,ad) in attrdefs.items():
if ad.minOccurs==1 and not attrs.has_key(adq):
verror(element,"required attribute %s not present"%adq,schema)
for (an,a) in attrs.items():
if an.uri!=xsi and a.type:
res=validateText(a.value,a.type.attributeDeclaration.typeDefinition,
schema)
if res:
verror(element,"attribute type check failed for %s: %s%s"%(an,
a.value,
res),
schema)
def assignChildTypes(children, elementTable, extendable, schema):
# look up each child tag and record the type
# (it may not be an error if it is not declared; we don't know that
# until we see what it matches in the content model)
for child in children:
if child.__class__ == Element:
qname = QName(None,child.local,child.uri)
if elementTable.has_key(qname):
child.type = elementTable[qname][1]
else:
child.type = None
return 1
def validateContentModel(element, type, schema):
# trace a path through the content model
# if a child matches an we need to indicate
# that that child should be validated with its xsd:type if it has one
# if a child matches some other kind of we need to indicate
# that it's not an error if we can't find its type
# print "validating model for %s content %s" % (element.name, type.content)
if type.contentType == "empty":
validateEmptyModel(element, type, schema)
elif type.contentType == "textOnly":
validateTextModel(element, type.model, schema)
else:
validateElementModel(element, type.fsm,
type.contentType == "mixed", schema)
def validateEmptyModel(element, type, schema):
if len(element.children) != 0:
verror(element,"element %s must be empty but is not" % element.name,schema)
def validateTextModel(element, type, schema):
# check that:
# it has one pcdata child, and if so
# the text of the pcdata matches the type
name = element.name
n = len(element.children)
if n > 1:
verror(element,"element %s has %s (> 1) children but has type %s" % (name, n, type),schema)
return
elif n > 0 and element.children[0].__class__ != Pcdata:
verror(element,"element %s has non-text children but has type %s" % (name, type),schema)
return
else:
if n == 0:
text = ""
else:
text = element.children[0].value
res=validateText(text, type, schema)
if res:
verror(element,"element content failed type check: %s%s"%(text,res),
schema)
def validateElementModel(element, fsm, mixed, schema):
# print "validating element model for %s" % element.name
n = fsm.startNode
for c in element.children:
if c.__class__ == Pcdata:
if (not mixed) and (not whitespace.match(c.value)):
verror(c,"text not allowed in element %s: |%s|" % (element.name,c.value),schema)
return
elif c.__class__ == Element:
qname = QName(None, c.local, c.uri)
next = None
anynext = None
for e in n.edges:
if e.label == qname:
next = e.dest
break
if isinstance(e.label, AnyWrap):
# XXX check the namespaces
anynext = e.dest
anylab = e.label
if not next:
if anynext:
n = anynext
# this is no longer an error, but something more complicated is XXX
# if c.type:
# where(child.where)
# print "element matched but had a type assigned"
# v = 0
# else:
# c.type = ""
c.type = anylab
else:
verror(c,"element %s not allowed here in element %s" % (qname, QName(None,element.local,element.uri)),schema)
fsm.printme(sys.stderr)
else:
n = next
if not n.isEndNode:
verror(element,"content of %s is not allowed to end here" % element.name,
schema,1)
fsm.printme(sys.stderr)
return
def validateChildTypes(children, schema):
# validate each child element against its type, if we know it
# report an error if we don't know it and it's not in
v = 1
for child in children:
if child.__class__ == Element:
if child.type:
if child.type.__class__ == AnyWrap:
q = QName(None,child.local,child.uri)
vwarn(child,"allowing %s because it matched " % q)
if child.type.any.processContents!='skip':
if schema.factory.schemas.has_key(child.uri):
# only try if we might win -- needs work
try:
e = schema.vElementTable[q]
except KeyError:
e=None
if e:
vwarn(None,"validating it against %s" % e)
validateElement(child, e.typeDefinition, schema)
elif child.type.any.processContents=='strict':
verror(child,"can't find a type for -matching element %s" % q,schema)
else:
validateElement(child, child.type, schema)
else:
verror(child,
"undeclared element %s" % QName(None,child.local,child.uri),
schema)
def validateKeys(decl,elt):
elt.keyTabs={}
validateKeys1(elt,decl.keys,1)
validateKeys1(elt,decl.uniques,0)
validateKeyRefs(elt,decl.keyrefs)
def validateKeys1(elt,kds,reqd):
for key in kds:
tab={}
sp=xpath.XPath(key.selector)
candidates=sp.find(elt)
if candidates:
fps=map(lambda f:xpath.XPath(f),key.field)
for s in candidates:
keyKey=buildKey(s,fps)
if reqd and not keyKey:
verror(s,
"missing one or more fields %s from key %s"%(key.field,
key.name),
schema)
break
if len(keyKey)>1:
keyKey=tuple(keyKey)
else:
keyKey=keyKey[0]
if tab.has_key(keyKey):
verror(s,"duplicate key %s, first appearance was"%str(keyKey),
key.schema)
where(tab[keyKey].where)
else:
tab[keyKey]=s
elt.keyTabs[key.name]=tab
def buildKey(s,fps):
keyKey=[]
for fp in fps:
kv=fp.find(s)
if kv:
if len(kv)>1:
vwarn(s,"oops, multiple field hits for %s at %s: %s"%(fp.str,s,kv))
if isinstance(kv[0],Element):
if (len(kv[0].children)>0 and
isinstance(kv[0].children[0],Pcdata)):
keyKey.append(kv[0].children[0].value)
else:
# XPath says in this case value is the empty string
pass
elif type(kv[0])==types.StringType:
keyKey.append(kv[0])
else:
vwarn(s,"oops, key value %s:%s"%(type(kv[0]),kv[0]))
else:
return None
return keyKey
def validateKeyRefs(elt,krds):
res=1
for ref in krds:
if elt.keyTabs.has_key(ref.refer):
keyTab=elt.keyTabs[ref.refer]
if keyTab=='bogus':
break
else:
elt.keyTabs[ref.refer]='bogus'
verror(ref.elt,
"No key or unique constraint named %s declared, refed by keyref %s"%(ref.refer,ref.name),
ref.schema)
break
sp=xpath.XPath(ref.selector)
candidates=sp.find(elt)
if candidates:
fps=map(lambda f:xpath.XPath(f),ref.field)
for s in candidates:
keyKey=buildKey(s,fps)
if not keyKey:
break
if len(keyKey)>1:
keyKey=tuple(keyKey)
else:
keyKey=keyKey[0]
if not keyTab.has_key(keyKey):
verror(s,"no key in %s for %s"%(ref.refer,str(keyKey)),ref.schema)
def findSchemaLocs(element):
pairs = []
for a in element.attrs.values():
if a.uri == xsi and a.local == "schemaLocation":
scls=string.split(a.value)
while scls:
pairs.append((scls[0], scls[1]))
scls=scls[2:]
for c in element.children:
if isinstance(c, Element):
pairs = pairs + findSchemaLocs(c)
return pairs
def runit(en,rns=[],k=0):
global s,e,t
s = None
sys.stderr.write("schema-validating %s using schemas %s\n"%(en,rns))
f=newFactory()
base=f.fileNames[0]
ren=resolveURL(base,en)
if rns:
s = fromFile(resolveURL(base,rns[0]),f)
for rn in rns[1:]:
ss=fromFile(resolveURL(base,rn),f)
else:
s = schema(f,None)
s.targetNS='##dummy'
e=readXML(en) # error return?
schemaLocs = findSchemaLocs(e)
sys.stderr.write("schemaLocations from instance: %s\n" % schemaLocs)
for (ns, sl) in schemaLocs:
checkinSchema(s, ns, sl,e,ren)
if (e.uri and
(e.uri not in ('http://www.w3.org/XML/1998/namespace',
'http://www.w3.org/1999/XMLSchema-instance')) and
not f.schemas.has_key(e.uri)):
try:
checkinSchema(s,e.uri,e.uri,e,ren)
sys.stderr.write("no schema yet for %s, trying namespace URI itself. . ."%
e.uri)
sys.stderr.write("ok.\n")
except XMLinter.error:
sys.stderr.write("no schema yet for %s, trying namespace URI itself. . ."%
e.uri)
sys.stderr.write("failed.\n")
ecount=prepare(f)
if ecount:
if k:
km="continuing"
else:
km="stopping without validating instance"
em="%d errors in schemas, %s"%(ecount,km)
if not k:
sys.stderr.write("%s\n"%em)
return
else:
em="Schema(s) OK"
sys.stderr.write("%s\n"%em)
cl=string.find(':',e.name)
if cl>-1:
prefix=e.name[0:cl]
else:
prefix=''
eltname = QName(prefix,e.local,e.uri)
if not s:
# any one will do
s = f.sfors
t=None
if s and s.vElementTable.has_key(eltname):
t=s.vElementTable[eltname].typeDefinition
if not t:
sys.stderr.write("can't validate, because can't find type for %s\n" % eltname)
return
if e and s:
if t.name:
sys.stderr.write("validating with type %s\n" % t.name)
else:
sys.stderr.write("validating with anonymous type\n")
validate(e, t, s)
if s.factory.errors:
sys.stderr.write("%d validation errors\n" % s.factory.errors)
return 1
else:
sys.stderr.write("No errors\n")
return 0
def verror(elt,message,schema,two=0):
sys.stderr.write("Validation error: ")
if two:
where(elt.where2)
else:
where(elt.where)
sys.stderr.write(" ")
sys.stderr.write(message)
sys.stderr.write("\n")
schema.factory.errors=schema.factory.errors+1
def vwarn(elt,message):
sys.stderr.write("Validation warning: ")
if elt:
where(elt.where)
sys.stderr.write(message)
sys.stderr.write("\n")
if __name__=='__main__':
argl=sys.argv[1:]
k=0
while argl:
if argl[0]=='-k':
k=1
else:
break
argl=argl[1:]
if argl:
runit(argl[0],argl[1:],k)
else:
runit("tiny.xml",["tiny.xsd"],k)
# $Log: applyschema.py,v $
# Revision 1.37 2000/04/24 09:41:43 ht
# clean up invocation some more, add k arg't to runit
#
# add version info to message
#
# Revision 1.38 2000/04/24 10:02:39 ht
# change invocation message
#
# Revision 1.37 2000/04/24 09:41:43 ht
# clean up invocation some more, add k arg't to runit
#
# Revision 1.36 2000/04/21 09:32:21 ht
# another dose of resolveURL
# use tiny only if run from command line
#
# Revision 1.35 2000/04/20 22:12:43 ht
# use resolveURL on input, schemaLocs
#
# Revision 1.34 2000/04/20 15:45:08 ht
# better handling of use of ns uri for loc
#
# Revision 1.33 2000/04/20 14:26:59 ht
# merge in private and comp branches
#
# Revision 1.32.2.5 2000/04/20 14:25:54 ht
# merge in comp branch
#
# Revision 1.32.2.4.2.9 2000/04/20 14:22:39 ht
# manage document validation schema creation and search better
#
# Revision 1.32.2.4.2.8 2000/04/20 12:03:21 ht
# Remove a few lingering effectiveTypes
# Allow better for absent types etc.
#
# Revision 1.32.2.4.2.7 2000/04/14 21:18:27 ht
# minor attr names/path changes to track schema
#
# Revision 1.32.2.4.2.6 2000/04/13 23:04:39 ht
# allow for urType as simple type (?)
# track Any->AnyWrap change
#
# Revision 1.32.2.4.2.5 2000/04/12 17:29:37 ht
# begin work on model merger,
#
# Revision 1.32.2.4.2.4 2000/04/11 18:13:17 ht
# interpolate attributeUse between complexType and attributeDeclaration,
# parallel to particle
#
# Revision 1.32.2.4.2.3 2000/04/10 15:48:46 ht
# put modest attribute validation in place
#
# Revision 1.32.2.4.2.2 2000/04/09 16:13:26 ht
# working on complex type, attribute;
# back out component.qname
#
# Revision 1.32.2.4.2.1 2000/04/05 12:12:36 ht
# accommodate changes in schema.py
#
# Revision 1.32.2.4 2000/04/01 18:01:25 ht
# various minor compatibility fixes
#
# Revision 1.32.2.3 2000/03/25 12:12:27 ht
# restructure error handling/reporting;
# allow for switching 208 on and off
#
# Revision 1.32.2.2 2000/03/21 15:57:23 ht
# fix bug in skip,
# allow 208 override
#
# Revision 1.32.2.1 2000/03/20 17:22:52 ht
# better coverage of , including beginning of processcontents
#
# Revision 1.33 2000/03/20 17:20:53 ht
# better coverage of , including beginning of processcontents
#
# Revision 1.32 2000/03/08 15:28:46 ht
# merge private branches back into public after 20000225 release
#
# Revision 1.31.2.3 2000/02/24 23:40:32 ht
# fix any bug
#
# Revision 1.31.2.2 2000/02/21 09:18:13 ht
# bug in handling
#
# Revision 1.31.2.1 2000/02/08 21:43:39 ht
# fork private branch to track internal drafts
# change calling sequence of checkinSchema
#
# Revision 1.31.1.1 2000/02/08 13:54:25 ht
# fork branch for non-public changes
# calling sequence to checkinSchema changed
#
# Revision 1.31 2000/01/13 16:55:42 richard
# Finally do something with xsi:type
#
# Revision 1.30 2000/01/10 17:36:34 richard
# changes for xsi:schemaLocation
#
# Revision 1.29 2000/01/08 23:33:50 ht
# towards support for xsi:schemaLocation
#
# Revision 1.28 2000/01/08 12:07:38 ht
# Change command-line arg sequence in preparation for use of schemaLocation!!!!!
# Add debug printout for schemaLocation for now
#
# Revision 1.27 2000/01/07 17:08:26 richard
# start on xsi:type
#
# Revision 1.26 2000/01/06 14:59:38 ht
# fix command line bug, display args on entry
#
# Revision 1.25 2000/01/06 14:38:56 ht
# detect cross-scope keyref and signal error
#
# Revision 1.24 2000/01/03 17:02:37 ht
# Include result of sub-ordinate key checking in overall result
# Accommodate new calling sequence for xpath.find
# add Log and Id
#
#