# Copyright (C) 2000 LTG -- See accompanying COPYRIGHT and COPYING files
# actually apply a schema to an instance
# $Id: applyschema.py,v 1.74.2.11 2000/12/06 09:21:05 ht Exp $
# TODO: enforce datatype constraints on xsi:type,
# null, (noNamespace)schemaLocation
from PyLTXML import *
import XML
import XMLInfoset
import LTXMLInfoset
import PSVInfoset
import os
import XMLSchema
import layer
import sys
import re
import xpath
import types
import string
from urlparse import urljoin
import tempfile
import traceback
import asInfoset
import time
whitespace = re.compile("^[ \t\r\n]*$")
xsi = "http://www.w3.org/2000/10/XMLSchema-instance"
vsraw="$Revision: 1.74.2.11 $ of $Date: 2000/12/06 09:21:05 $"
vss=string.split(vsraw)
vs="XSV %s/%s of %s %s"%(string.split(XMLSchema.versionString)[0],
vss[1],vss[5],vss[6])
dontWarn=1
def readXML(url):
global doc
try:
if url:
doc = LTXMLInfoset.documentFromURI(url)
else:
file = FOpen(sys.stdin,
NSL_read+NSL_read_all_bits+NSL_read_namespaces+
NSL_read_no_consume_prolog)
doc = LTXMLInfoset.documentFromFile(file)
Close(file)
return (doc.documentElement, doc.documentEntity.charset)
except LTXMLinter.error:
return (None, None)
def assess(element):
allfull = 1
allnone = 1
nochildren = 1
for c in element.chunkedChildren:
if isinstance(c, XMLInfoset.Element):
nochildren = 0
validationAttempted = c.__dict__.has_key("validationAttempted") and c.validationAttempted
if validationAttempted != 'full':
allfull = 0
if validationAttempted and c.validationAttempted != 'none':
allnone = 0
if nochildren:
if element.validatedType:
element.validationAttempted = 'full'
else:
element.validationAttempted = 'none'
else:
if allfull and element.validatedType:
element.validationAttempted = 'full'
elif allnone and not element.validatedType:
element.validationAttempted = 'none'
else:
element.validationAttempted = 'partial'
if element.errorCode:
element.validity = 'invalid'
else:
has_losing_child = 0
has_untyped_strict_child = 0
has_non_winning_typed_child = 0
for c in element.chunkedChildren:
if not isinstance(c, XMLInfoset.Element):
continue
strict = c.__dict__.has_key("strict") and c.strict
validatedType = c.__dict__.has_key("validatedType") and c.validatedType
validity = c.__dict__.has_key("validity") and c.validity
if validity == 'invalid':
has_losing_child = 1
if strict and not validatedType:
has_untyped_strict_child = 1
if validatedType and validity != 'valid':
has_non_winning_typed_child = 1
if has_losing_child or has_untyped_strict_child:
element.validity = 'invalid'
elif has_non_winning_typed_child:
element.validity = 'unknown'
else:
element.validity = 'valid'
if element.validatedType:
element.typeDefinition=PSVInfoset.ComplexTypeDefinition(element, element.validatedType)
def validate(element, typedef, schema, eltDecl):
if not hasattr(schema.factory,'errors'):
schema.factory.errors=0
validateElement(element, typedef, schema, eltDecl)
return schema.factory.errors
def validateElement(element, type, schema, eltDecl=None):
global vel, vtype
vel = element
vtype = type
# print "validating element %s against %s" % (element.originalName, ['type:',type])
element.validatedType = type
if not eltDecl:
eqn=XMLSchema.QName(None,element.localName,element.namespaceName or None)
if s.vElementTable.has_key(eqn):
eltDecl=s.vElementTable[eqn]
nullable = eltDecl and eltDecl.nullable # TODO: is this right if no eltDecl?
nulled = 0
if element.attributes.has_key((xsi, "null")):
if not nullable:
verror(element,
"xsi:null specified on non-nullable element %s" % element.originalName,
schema,"cvc-elt.1.1")
print "bad nullable: eltDecl = %s" % eltDecl
assess(element)
return
nulled = (element.attributes[(xsi, "null")].normalizedValue == "true")
if element.attributes.has_key((xsi, "type")):
t = element.attributes[(xsi, "type")].normalizedValue;
res = XMLSchema.QNameST.checkString(None, t, element)
if res:
verror(element, "xsi:type %s is not a valid qname: %s " % (t,res),
schema, "cvc-elt.2.1")
(tp,tl) = XMLSchema.splitQName(t)
qt = XMLSchema.QName(tp, tl, element.inScopeNamespaces[tp])
if schema.vTypeTable.has_key(qt):
xsitype=schema.vTypeTable[qt]
else:
verror(element,"xsi:type %s undefined" % qt,schema,"cvc-elt.2.2")
assess(element)
return
if type and not xsitype.isSubtype(type):
verror(element,
"xsi:type %s is not a subtype of the declared type %s"%(qt,
type.name),
schema,"cvc-elt.2.3")
assess(element)
return
if type:
vwarn(element,
"using xsi:type %s instead of original %s" % (qt, type.name))
else:
vwarn(element,"using xsi:type %s" % qt)
type = xsitype
element.validatedType = type
lax = not type
# might have none in case of recursive call inside , or at top level
if nulled:
validateElementNull(element, type, schema)
if type:
# TODO: check type is not abstract
if ((not type==XMLSchema.urType) and
(isinstance(type, XMLSchema.AbInitio) or
isinstance(type, XMLSchema.SimpleType))):
if not nulled:
validateElementSimple(element, type, schema)
if eltDecl:
validateKeys(eltDecl,element)
assess(element)
return
# a complexType
ad=type.attributeDeclarations
ps=type.prohibitedSubstitutions
et=type.elementTable
else:
ps=[]
ad={}
et={}
assignAttributeTypes(element, ad, ps, schema, lax)
validateAttributeTypes(element, element.attrTable, ad, schema)
# print "assigning types for %s" % element.originalName
if not nulled:
assignChildTypes(element.chunkedChildren, et, ps, schema, lax)
# we must look at the content model before checking the types, so that
# we know which children matched
if type:
validateContentModel(element, type, schema)
validateChildTypes(element.chunkedChildren, schema, lax)
if eltDecl:
validateKeys(eltDecl,element)
assess(element)
def validateElementNull(element, type, schema):
if len(element.chunkedChildren) != 0:
verror(element,"element %s is nulled but is not empty" % element.originalName,
schema,"cvc-elt.1.2.1")
# TODO: should check for fixed value constraint
def validateElementSimple(element, type, schema):
# check that:
# it has no attributes (except xsi: ones)
# it has one pcdata child, and if so
# the text of the pcdata matches the type
if element.attributes:
for a in element.attributes.values():
if a.namespaceName != xsi:
verror(element,
"element {%s}%s with simple type not allowed attributes"%
(element.namespaceName, element.localName),
schema,"cvc-elt.4.1.1")
return
return validateTextModel(element, type, schema)
def assignAttributeTypes(element, attrdefs, extendable, schema, lax):
# look up each attribute in attrdefs and assign its type
# error if attr declaration is not found and type is not extendable
# print "assigning attrs for %s {%s}%s" % (element.originalName, element.namespaceName, element.localName)
# print "declared attrs are:"
# for zz in attrdefs.keys():
# if isinstance(zz, XMLSchema.QName):
# print "{%s}%s " % (zz.uri, zz.local)
# else:
# print zz
element.attrTable={}
for a in element.attributes.values():
# print "assigning attr %s {%s}%s" % (a.originalName, a.namespaceName, a.localName)
an=XMLSchema.QName(None,a.localName,a.namespaceName or None)
element.attrTable[an]=a
if a.namespaceName == xsi:
if a.localName not in ('type','null','schemaLocation','noNamespaceSchemaLocation'):
verror(element,"unknown xsi attribute %s" % an,schema,
"cvc-complex-type.1.3")
elif attrdefs.has_key(an):
a.type = attrdefs[an].attributeDeclaration
elif lax:
if a.namespaceName and schema.vAttributeTable.has_key(an):
a.type=schema.vAttributeTable[an]
else:
a.type=None
elif (attrdefs.has_key("#any") and
attrdefs["#any"].attributeDeclaration.allows(a.namespaceName)):
a.type = attrdefs["#any"].attributeDeclaration
else:
verror(element,"undeclared attribute %s" % an,schema,
"cvc-complex-type.1.3")
a.type = None
return
def validateAttributeTypes(element,attrs, attrdefs, schema):
# check that each attribute matches its type
# check that all required attributes are present
# TODO: add defaulted attributes (shouldn't need to check their types)
# checked fixed values
for (adq,ad) in attrdefs.items():
if ad.minOccurs==1 and not attrs.has_key(adq):
verror(element,"required attribute %s not present"%adq,schema,
'cvc-complex-type.1.4')
for (an,a) in attrs.items():
if an.uri==xsi:
# TODO: whitespace!!!
a.schemaNormalizedValue=a.normalizedValue
elif a.type:
if isinstance(a.type,XMLSchema.Wildcard):
res=a.type.validate(a,schema,'attribute',element)
else:
if a.type.typeDefinition:
res=a.type.typeDefinition.validateText(a.normalizedValue,element, schema)
if a.type.valueConstraint and a.type.valueConstraint[0]=='fixed':
if a.normalizedValue!=a.type.valueConstraint[1]:
verror(element,"fixed value did not match for attribute %s: %s!=%s"%(an,a.normalizedValue,a.type.valueConstraint[1]),schema,"cvc-attribute.1.3")
else:
res=None
if res:
verror(element,"attribute type check failed for %s: %s%s"%(an,
a.normalizedValue,
res),
schema,'cvc-attribute.1.2',0,None,a)
else:
# TODO: whitespace!!!
a.schemaNormalizedValue=a.normalizedValue
def assignChildTypes(children, elementTable, extendable, schema, lax):
# look up each child tag and record the type
# (it may not be an error if it is not declared; we don't know that
# until we see what it matches in the content model)
for child in children:
if isinstance(child,XMLInfoset.Element):
qname = XMLSchema.QName(None,child.localName,child.namespaceName or None)
if elementTable.has_key(qname):
decl=elementTable[qname]
try:
child.type = decl.typeDefinition
except:
print decl
child.eltDecl = decl
elif lax and child.namespaceName and schema.vElementTable.has_key(qname):
decl=schema.vElementTable[qname]
child.type=decl.typeDefinition
child.eltDecl=decl
else:
child.type = None
child.eltDecl=None
return 1
def validateContentModel(element, type, schema):
# trace a path through the content model
# if a child matches an we need to indicate
# that that child should be validated with its xsd:type if it has one
# if a child matches some other kind of we need to indicate
# that it's not an error if we can't find its type
# print "validating model for %s content type %s" % (element.originalName, type.contentType)
if type.contentType == "empty":
validateEmptyModel(element, type, schema)
elif type.contentType == "textOnly":
validateTextModel(element, type.model, schema)
else:
validateElementModel(element, type.fsm,
type.contentType == "mixed", schema)
def validateEmptyModel(element, type, schema):
if len(element.chunkedChildren) != 0:
verror(element,"element %s must be empty but is not" % element.originalName,schema,
"cvc-complex-type.1.2")
def validateTextModel(element, type, schema):
# check that:
# it has one pcdata child, and if so
# the text of the pcdata matches the type
name = element.localName
n=0
for child in element.chunkedChildren:
if isinstance(child,XMLInfoset.Characters):
n=1
elif isinstance(child,XMLInfoset.Element):
verror(element,
"element {%s}%s with simple type not allowed element children"%
(element.namespaceName,name),schema,"cvc-complex-type.1.2.2")
# TODO: mark this (and any others) as not validated
return
else:
if n == 0:
text = ""
else:
text = element.chunkedChildren[0].characters
res=type.validateText(text, element, schema)
# TODO: whitespace
if res:
verror(element,"element content failed type check: %s%s"%(text,res),
schema,"cvc-complex-type.1.2.2")
else:
element.schemaNormalizedValue=text
def validateElementModel(element, fsm, mixed, schema):
# print "validating element model for %s" % element.originalName
n = fsm.startNode
for c in element.chunkedChildren:
if isinstance(c,XMLInfoset.Characters):
if (not mixed) and (not whitespace.match(c.characters)):
verror(c,
"text not allowed in element %s: |%s|" %
(element.originalName,c.characters),
schema,"cvc-complex-type.1.2.3")
return
elif isinstance(c,XMLInfoset.Element):
qname = XMLSchema.QName(None, c.localName, c.namespaceName or None)
next = None
anynext = None
for e in n.edges:
if e.label == qname:
next = e.dest
c.strict = 1
break
if isinstance(e.label, XMLSchema.Wildcard):
if e.label.allows(c.namespaceName):
anynext = e.dest
anylab = e.label
if not next:
if anynext:
n = anynext
c.strict = (anylab.processContents == 'strict')
# this is no longer an error, but something more complicated is XXX
# if c.type:
# where(child.where)
# print "element matched but had a type assigned"
# v = 0
# else:
# c.type = ""
c.type = anylab
else:
verror(c,
"element %s not allowed here in element %s:\n"%
(qname, XMLSchema.QName(None,element.localName,element.namespaceName or None)),
schema,"cvc-complex-type.1.2.4",0,fsm.asXML())
else:
n = next
if not n.isEndNode:
verror(element,
"content of %s is not allowed to end here:\n"%
element.originalName,
schema,"cvc-complex-type.1.2.4",1,fsm.asXML())
return
def validateChildTypes(children, schema, lax):
# validate each child element against its type, if we know it
# report an error if we don't know it and it's not in
v = 1
for child in children:
if isinstance(child,XMLInfoset.Element):
if child.type:
if child.eltDecl:
validateElement(child,child.type,schema,child.eltDecl)
else:
# child.type is actually a wildcard
child.type.validate(child,schema,'element',child)
elif lax:
# TODO: check that this branch ever happens at all
# TODO: record impact of missing type in PSVI
validateElement(child,None,schema) # will be lax because no type
else:
verror(child,
"undeclared element %s"%
XMLSchema.QName(None,child.localName,child.namespaceName or None),
schema,"src-resolve")
def validateKeys(decl,elt):
elt.keyTabs={}
validateKeys1(elt,decl.keys,1)
validateKeys1(elt,decl.uniques,0)
validateKeyRefs(elt,decl.keyrefs)
def validateKeys1(elt,kds,reqd):
for key in kds:
tab={}
sp=xpath.XPath(key.selector)
candidates=sp.find(elt)
if candidates:
fps=map(lambda f:xpath.XPath(f),key.fields)
for s in candidates:
keyKey=buildKey(s,fps)
if keyKey:
if len(keyKey)>1:
keyKey=tuple(keyKey)
else:
keyKey=keyKey[0]
else:
if reqd:
verror(s,
"missing one or more fields %s from key %s"%(key.fields,
key.name),
key.schema,"cvc-identity-constraint.2.2.2")
break
if tab.has_key(keyKey):
if reqd:
code="cvc-identity-constraint.2.2.3"
else:
code="cvc-identity-constraint.2.1.2"
verror(s,"duplicate key %s, first appearance was %s"%
(str(keyKey),
XMLSchema.whereString(tab[keyKey].where)),
key.schema,code)
else:
tab[keyKey]=s
elt.keyTabs[key.name]=tab
def buildKey(s,fps):
keyKey=[]
for fp in fps:
kv=fp.find(s)
if kv:
if len(kv)>1:
# TODO error or shouldnt?
vwarn(s,"oops, multiple field hits for %s at %s: %s"%(fp.str,s,kv))
if isinstance(kv[0],XMLInfoset.Element):
if (len(kv[0].chunkedChildren)>0 and
isinstance(kv[0].chunkedChildren[0],XMLInfoset.Characters)):
keyKey.append(kv[0].chunkedChildren[0].characters)
else:
# XPath says in this case value is the empty string
pass
elif XML.somestring(type(kv[0])):
keyKey.append(kv[0])
else:
# TODO error or shouldnt?
vwarn(s,"oops, key value %s:%s"%(type(kv[0]),kv[0]))
else:
return None
return keyKey
def validateKeyRefs(elt,krds):
res=1
for ref in krds:
if elt.keyTabs.has_key(ref.refer):
keyTab=elt.keyTabs[ref.refer]
if keyTab=='bogus':
break
else:
elt.keyTabs[ref.refer]='bogus'
verror(elt,
"No key or unique constraint named %s declared, refed by keyref %s"%(ref.refer,ref.name),
ref.schema,"cvc-identity-constraint.2.3.2")
break
sp=xpath.XPath(ref.selector)
candidates=sp.find(elt)
if candidates:
fps=map(lambda f:xpath.XPath(f),ref.fields)
for s in candidates:
keyKey=buildKey(s,fps)
if not keyKey:
break
if len(keyKey)>1:
keyKey=tuple(keyKey)
else:
keyKey=keyKey[0]
if not keyTab.has_key(keyKey):
verror(s,"no key in %s for %s"%(ref.refer,str(keyKey)),ref.schema,
"cvc-identity-constraint.2.3.2")
def findSchemaLocs(element,schema):
pairs = []
for a in element.attributes.values():
if a.namespaceName == xsi:
if a.localName == "schemaLocation":
scls=string.split(a.normalizedValue)
while scls:
if len(scls)>1:
pairs.append((scls[0], scls[1]))
else:
verror(element,"xsi:schemaLocation must be a list with an even number of members: %s"%string.split(a.normalizedValue),schema,"???")
scls=scls[2:]
elif a.localName == "noNamespaceSchemaLocation":
pairs.append((None,a.normalizedValue))
for c in element.chunkedChildren:
if isinstance(c, XMLInfoset.Element):
scl=findSchemaLocs(c,schema)
if scl:
pairs = pairs + scl
return pairs
def runitAndShow(en,rns=[],k=0,style=None,enInfo=None,outfile=None,dw=1,
timing=0):
global dontWarn
dontWarn=dw
if timing:
timing=time.time()
(res,encoding,errs)=runit(en,rns,k,timing)
if timing:
sys.stderr.write("Finished: %6.2f\n"%(time.time()-timing))
if not encoding:
encoding='UTF-8'
if outfile:
try:
outf=open(outfile,"w")
except:
sys.stderr.write("couldn't open %s for output, falling back to stderr"%
outfile)
outf=sys.stderr
else:
outf=sys.stderr
errout=OpenStream(outf,
CharacterEncodingNames[encoding],
NSL_write+NSL_write_plain)
if encoding!='UTF-8':
es=" encoding='%s'"%encoding
else:
es=""
PrintTextLiteral(errout,"\n"%es)
if style:
PrintTextLiteral(errout,
"\n"%style)
if enInfo:
for (k,v) in enInfo.items():
res.addAttr(k,v)
if errs:
res.addAttr("crash","true")
res.printme(errout)
PrintTextLiteral(errout,"\n")
Close(errout)
if errs:
return string.join(map(lambda es:string.join(es,''),errs),'')
else:
return
class SchemaValidationError(Exception):
def __init__(self,arg):
Exception.__init__(self,arg)
def runit(en,rns=[],k=0,timing=0):
global s,e,t,f,res,ed,btlist
btlist=[]
ss = s = None
f=XMLSchema.newFactory()
f.errors=0
base=f.fileNames[0]
if en:
ren=urljoin(base,en)
else:
ren=None
res=XML.Element("xsv")
f.resElt=res
res.addAttr("xmlns","http://www.w3.org/2000/05/xsv")
res.addAttr("version",vs)
res.addAttr("target",ren or "[stdin]")
if rns:
res.addAttr("schemaDocs",string.join(rns,' '))
rdn=tempfile.mktemp("xsverrs")
redirect=open(rdn,"w+")
savedstderr=os.dup(2) # save stderr
os.dup2(redirect.fileno(),2)
try:
(e,encoding)=readXML(ren)
if timing:
os.write(savedstderr,"target read: %6.2f\n"%(time.time()-timing))
except LTXMLinter.error:
pfe=XML.Element("bug")
pfe.children=[XML.Pcdata("validator crash during target reading")]
res.children.append(pfe)
e=None
encoding=None
if not e:
res.addAttr('outcome',"validation not attempted")
sys.stderr.flush()
registerRawErrors(redirect,res)
# put stderr back
os.dup2(savedstderr,2)
return (res,None,btlist)
# TODO: check each schema doc against schema for schemas, if possible,
# unless caller explicitly opts out (?)
if rns:
try:
s = XMLSchema.fromFile(urljoin(base,rns[0]),f)
if timing:
os.write(savedstderr,"schema read: %6.2f\n"%(time.time()-timing))
except:
pfe=XML.Element("bug")
pfe.children=[XML.Pcdata("validator crash during schema reading")]
res.children.append(pfe)
btlist.append(traceback.format_exception(sys.exc_type,
sys.exc_value,
sys.exc_traceback))
for rn in rns[1:]:
try:
ffr=XMLSchema.fromFile(urljoin(base,rn),f)
if timing:
os.write(savedstderr,"schema read: %6.2f\n"%(time.time()-timing))
ss=ss or ffr
except:
pfe=XML.Element("bug")
pfe.children=[XML.Pcdata("validator crash during schema reading")]
res.children.append(pfe)
btlist.append(traceback.format_exception(sys.exc_type,
sys.exc_value,
sys.exc_traceback))
if not s:
if ss:
s=ss
else:
s = XMLSchema.Schema(f,None)
s.targetNS='##dummy'
schemaLocs = findSchemaLocs(e,s)
res.addAttr('schemaLocs',string.join(map(lambda p:"%s -> %s"%(p[0] or 'None',p[1]),
schemaLocs),
'; '))
for (ns, sl) in schemaLocs:
try:
XMLSchema.checkinSchema(f, ns, sl,e,ren or "[stdin]")
if timing:
os.write(savedstderr,"schema read: %6.2f\n"%(time.time()-timing))
except:
pfe=XML.Element("bug")
pfe.children=[XML.Pcdata("validator crash during schema reading")]
res.children.append(pfe)
btlist.append(traceback.format_exception(sys.exc_type,
sys.exc_value,
sys.exc_traceback))
res.addAttr('docElt',"{%s}%s"%(e.namespaceName,e.localName))
if (e.namespaceName and
(e.namespaceName not in ('http://www.w3.org/XML/1998/namespace',xsi)) and
not f.schemas.has_key(e.namespaceName)):
try:
XMLSchema.checkinSchema(f,e.namespaceName,e.namespaceName,e,ren or "[stdin]")
if timing:
os.write(savedstderr,"schema read: %6.2f\n"%(time.time()-timing))
res.addAttr('nsURIDeref','success')
except:
pfe=XML.Element("bug")
pfe.children=[XML.Pcdata("validator crash during schema reading")]
res.children.append(pfe)
btlist.append(traceback.format_exception(sys.exc_type,
sys.exc_value,
sys.exc_traceback))
res.addAttr('nsURIDeref','failure')
sys.stderr.flush()
registerRawErrors(redirect,res)
# put stderr back
os.dup2(savedstderr,2)
try:
ecount=XMLSchema.prepare(f)
if timing:
sys.stderr.write("schemas prepared: %6.2f\n"%(time.time()-timing))
except:
ecount=-1
btlist.append(traceback.format_exception(sys.exc_type,
sys.exc_value,
sys.exc_traceback))
pfe=XML.Element("bug")
pfe.children=[XML.Pcdata("validator crash during factory preparation")]
res.children.append(pfe)
kgm="true"
kg=1
if ecount:
if ecount<0:
kg=0
else:
if not k:
kg=0
if not kg:
kgm="false"
res.addAttr('instanceAssessed',kgm)
if not kg:
if ecount<0:
ecount=0
for sch in f.schemas.values():
ecount=ecount+sch.errors
res.addAttr('schemaErrors',str(ecount))
return (res,encoding,btlist)
cl=string.find(':',e.originalName)
if cl>-1:
prefix=e.originalName[0:cl]
else:
prefix=''
eltname = XMLSchema.QName(prefix,e.localName,e.namespaceName or None)
if not s:
# any one will do
s = f.sfors
t=None
ed=None
if s and s.vElementTable.has_key(eltname):
ed=s.vElementTable[eltname]
t=ed.typeDefinition
if t:
if t.name:
if hasattr(t,'qname'):
tn=t.qname.string()
else:
tn=t.name
else:
tn='[Anonymous]'
res.addAttr('rootType',tn)
res.addAttr('validation','strict')
else:
res.addAttr('validation','lax')
if e and s:
try:
validate(e, t, s, ed)
e.schemaInformation = []
# XXX get all the namespaces and schemas
nsi = PSVInfoset.NamespaceSchemaInformation(e, 'dummy:namespace')
e.schemaInformation.append(nsi)
except:
btlist.append(traceback.format_exception(sys.exc_type,
sys.exc_value,
sys.exc_traceback))
pfe=XML.Element("bug")
pfe.children=[XML.Pcdata("validator crash during validation")]
res.children.append(pfe)
res.addAttr('instanceErrors',str(s.factory.errors))
ec=0
for sch in f.schemas.values():
ec=ec+sch.errors
res.addAttr('schemaErrors',str(ec))
return (res,encoding,btlist)
def registerRawErrors(redirect,res):
if redirect.tell():
redirect.seek(0)
ro=XML.Element("XMLMessages")
o="\n%s"%redirect.read()
ro.children=[XML.Pcdata(o)]
res.children.append(ro)
redirect.close()
def verror(elt,message,schema,code=None,two=0,daughter=None,iitem=None):
# code argument identifies CVC
ve=XML.Element("invalid")
ve.children=[XML.Pcdata(message)]
if code:
ve.addAttr("code",code)
if two:
XMLSchema.where(ve,elt.where2)
else:
XMLSchema.where(ve,elt.where)
if daughter:
ve.children.append(daughter)
res.children.append(ve)
schema.factory.errors=schema.factory.errors+1
if not iitem:
iitem=elt
if iitem.errorCode:
iitem.errorCode.append(" "+code)
else:
iitem.errorCode=[code]
def vwarn(elt,message):
if dontWarn:
return
ve=XML.Element("warning")
ve.children=[XML.Pcdata(message)]
if elt:
XMLSchema.where(ve,elt.where)
res.children.append(ve)
# validation methods for schema components
def av(self,child,schema,kind,elt):
q = XMLSchema.QName(None,child.localName,child.namespaceName or None)
vwarn(elt,"allowing %s because it matched wildcard(%s)" %
(q,self.allowed))
if self.processContents!='skip':
print "looking for decl for %s" % child.originalName
if schema.factory.schemas.has_key(child.namespaceName):
# only try if we might win -- needs work
try:
if kind=='element':
e = schema.vElementTable[q]
else:
e = schema.vAttributeTable[q]
except KeyError:
e=None
print "decl for %s is %s" % (child.originalName, e)
if e and e.typeDefinition:
vwarn(None,"validating it against %s" %
(e.typeDefinition.name or 'anonymous type'))
if kind=='element':
validateElement(child, e.typeDefinition, schema)
else:
return e.typeDefinition.validateText(child.normalizedValue,
elt, schema)
elif (self.processContents=='strict' and
not (kind=='element' and child.attributes.has_key((xsi, "type")))):
# TODO check this against actual def'n of missing component
verror(elt,
"can't find a type for wildcard-matching %s %s" %(kind, q),
schema,
"src-resolve")
child.validatedType = None
elif kind=='element':
vwarn(None,"validating it laxly")
child.validatedType = None
validateElement(child,None,schema)
XMLSchema.Wildcard.validate=av
def tv(self,child,schema,kind,elt):
validateElement(child, self, schema)
XMLSchema.Type.validate=XMLSchema.AbInitio.validate=tv
def validateText(self, text, context, schema):
if self==XMLSchema.urType:
return
else:
if self.variety=='atomic':
# ref may have failed
return (self.primitiveType and
self.primitiveType.checkString(text,context))
elif self.variety=='list':
it=self.itemType
# TODO: what about post-list facets?
if not it:
return
for substr in string.split(text):
res=it.validateText(substr,context,schema)
if res:
return res+' in list'
return
elif self.variety=='union':
mts=self.memberTypes
subres=[]
# TODO: what about post-union facets?
for mt in mts:
if mt:
res=mt.validateText(text,context,schema)
if res:
subres.append(res)
else:
# bingo
return
# no subtypes won, we lose
return " no members of union succeeded: %s"%subres
else:
XMLSchema.shouldnt('vv '+str(self.variety))
XMLSchema.SimpleType.validateText=validateText
def validateText(self, text, context, schema):
return self.checkString(text,context)
XMLSchema.AbInitio.validateText=validateText
# checkString methods
def checkString(self,str,context):
# TODO: rethink allowedFacets: not efficient
if 'enumeration' in self.allowedFacets and self.enumeration!=None:
for val in self.enumeration:
if val==str:
return
return " not in enumeration %s"%self.enumeration
XMLSchema.AbInitio.checkString = checkString
def checkString(self,str,context):
try:
if ('.' in str) or ('E' in str):
val=string.atof(str)
else:
val=string.atoi(str)
except ValueError:
return " does not represent a number"
if self.minInclusive!=None and valself.maxInclusive:
return ">%d"%self.maxInclusive
if self.maxExclusive!=None and val>=self.maxExclusive:
return ">=%d"%self.maxExclusive
return XMLSchema.AbInitio.checkString(self,str,context)
XMLSchema.DecimalST.checkString = checkString
def checkString(self,str,context):
# not complete by any means
parts=string.split(str,':')
if len(parts)>2:
return " has more than one colon"
if len(parts)==2 and not context.inScopeNamespaces.has_key(parts[0]):
return " has undeclared prefix: %s"%parts[0]
return XMLSchema.AbInitio.checkString(self,str,context)
XMLSchema.QNameST.checkString = checkString
def dumpInfoset(filename):
ff = open(filename, "w")
r = doc.reflect()
r.documentElement.inScopeNamespaces["psv"]=XMLInfoset.Namespace("psv",
PSVInfoset.infosetSchemaNamespace)
r.indent()
r.printme(ff)
ff.close()
# run at import if top
if __name__=='__main__':
argl=sys.argv[1:]
k=0
dw=1
timing=0
style=None
outfile=None
while argl:
if argl[0]=='-k':
k=1
elif argl[0]=='-s':
style=argl[1]
argl=argl[1:]
elif argl[0]=='-o':
outfile=argl[1]
argl=argl[1:]
elif argl[0]=='-w':
dw=0
elif argl[0]=='-t':
timing=1
elif argl[0][0]=='-':
sys.stderr.write("Usage: [-ktw] [-s stylesheet] [-o outputFile] file [schema1 schema2 . . .]\n")
sys.exit(-1)
else:
break
argl=argl[1:]
if argl:
res=runitAndShow(argl[0],argl[1:],k,style,None,outfile,dw,timing)
else:
res=runitAndShow(None,[],k,style,None,outfile,dw,timing)
if res:
raise SchemaValidationError,res
# $Log: applyschema.py,v $
# Revision 1.74.2.11 2000/12/06 09:21:05 ht
# add psv infoset namespace URI to reflected docapplyschema.py
#
# Revision 1.74.2.10 2000/12/04 22:31:03 ht
# stubs for schemaNormalizedValue in place
#
# Revision 1.74.2.9 2000/12/04 22:09:00 ht
# remove convert,
# accommodate change to importing XML,
# put attribute verror on right item
#
# Revision 1.74.2.8 2000/12/04 13:30:42 ht
# merge in main line fixes thru 1.82
#
# Revision 1.74.2.7 2000/10/13 12:48:42 richard
# more infoset contributions
#
# Revision 1.74.2.6 2000/10/02 13:33:28 richard
# update values for validity property
#
# Revision 1.74.2.5 2000/09/29 17:18:09 richard
# More towards PSV infoset
#
# Revision 1.74.2.4 2000/09/29 16:45:27 richard
# correct errorCode setting
#
# Revision 1.74.2.3 2000/09/29 16:04:24 richard
# More towards PSV infoset
#
# Revision 1.74.2.2 2000/09/29 14:16:15 ht
# towards PSVI contributions
#
# Revision 1.74.2.1 2000/09/27 17:21:20 richard
# Changes for infoset-based
#
# Revision 1.77 2000/09/28 15:54:50 ht
# schema error count includes all errors, not just those found at prep
# time
#
# Revision 1.76 2000/09/28 15:09:14 ht
# try catching and returning any crashes
#
# Revision 1.75 2000/09/28 08:41:57 ht
# add usage message
# add -o outfile cmd line arg
#
# Revision 1.82 2000/10/31 16:30:47 ht
# validate subordinate elements with eltdecl if available
# return schema error count if not attempting instance validation
#
# Revision 1.81 2000/10/27 15:33:30 ht
# Output timing info if -t on command line
#
# Revision 1.80 2000/10/18 15:54:58 ht
# make effort to check 'fixed' attribute values
#
# Revision 1.79 2000/10/17 13:35:41 ht
# put switch on warnings, default is don't
#
# Revision 1.78 2000/10/17 12:45:15 ht
# try to catch and log all crashes
# replace stale reference to atribute.characters
#
# Revision 1.77 2000/09/28 15:54:50 ht
# schema error count includes all errors, not just those found at prep
# time
#
# Revision 1.76 2000/09/28 15:09:14 ht
# try catching and returning any crashes
#
# Revision 1.75 2000/09/28 08:41:57 ht
# add usage message
# add -o outfile cmd line arg
#
# Revision 1.74 2000/09/27 13:48:47 richard
# Use infoset-like names for slots (now provided in XML.py) to reduce
# differences with infoset-based version.
#
# Revision 1.73 2000/09/27 12:22:22 richard
# correct element.name to element.local in an error message
#
# Revision 1.72 2000/09/26 14:29:36 richard
# Oops, didn't change AbInitio to XMLSchema.AbInitio when moving methods
#
# Revision 1.71 2000/09/26 14:05:28 richard
# Move checkString methods from XMLSchema.py, because they may need to look
# at *instance* in-scope namespaces
#
# Revision 1.70 2000/09/26 13:38:49 ht
# protect against undefined list itemType/union memberType
#
# Revision 1.69 2000/09/23 11:17:31 ht
# merge in CR branch
#
# Revision 1.68 2000/09/23 11:14:26 ht
# towards merge in CR branch
#
# Revision 1.66.2.3 2000/09/21 09:14:33 ht
# property name change
#
# Revision 1.66.2.2 2000/09/11 12:23:27 ht
# Move to branch: more debug in vv crash
#
# Revision 1.68 2000/09/03 15:57:23 ht
# more debug in vv crash
# Revision 1.67 2000/09/11 12:59:09 ht
# allow stdin,
# fix stupid bug missing third schema on command line
# Revision 1.67 2000/08/31 11:48:41 ht
# Direct support for validating lists and unions
# Revision 1.66 2000/08/22 13:11:30 ht
# handle type w/o qname as document validation type
# remove special treatment for AbInitio simple types on elements,
# thereby fixing list validation bug
# Revision 1.66.2.3 2000/09/21 09:14:33 ht
# property name change
#
# Revision 1.66.2.2 2000/09/11 12:23:27 ht
# Move to branch: more debug in vv crash
#
# Revision 1.68 2000/09/03 15:57:23 ht
# more debug in vv crash
#
# Revision 1.67 2000/08/31 11:48:41 ht
# Direct support for validating lists and unions
#
# Revision 1.66 2000/08/22 13:11:30 ht
# handle type w/o qname as document validation type
# remove special treatment for AbInitio simple types on elements,
# thereby fixing list validation bug
#
# Revision 1.65 2000/07/12 09:31:58 ht
# try harder to always have a schema
#
# Revision 1.64 2000/07/10 14:39:02 ht
# prepare for fileinfo to runit
#
# Revision 1.63 2000/07/05 09:05:37 ht
# change name to PyLTXML
#
# Revision 1.62 2000/07/03 09:37:38 ht
# bail out if textonly has elt daughter(s)
# add missing import
#
# Revision 1.61 2000/06/27 09:25:51 ht
# attempt to handle interaction between xsi:type and
#
# Revision 1.60 2000/06/24 11:17:07 ht
# fix bug in unqualified xsi:type
#
# Revision 1.59 2000/06/22 10:31:33 ht
# Bug in unique processing -- broke on missing field
#
# Revision 1.58 2000/06/20 08:07:42 ht
# merge xmlout branches back in to main line
#
# Revision 1.57 2000/05/18 08:01:25 ht
# fix bug in handling of xsi:type
#
# Revision 1.56 2000/05/14 12:19:34 ht
# add context to checkSting calls
#
# Revision 1.55 2000/05/11 11:55:57 ht
# just better handling of lax validation from other branch
#
# Revision 1.54.2.16 2000/06/15 16:03:20 ht
# cover several missing definition cases
#
# Revision 1.54.2.15 2000/06/03 16:29:30 ht
# oops, removing debugging comment
#
# Revision 1.54.2.14 2000/06/03 13:45:55 ht
# catch arity bug in xsi:schemaLoc
#
# Revision 1.54.2.13 2000/05/30 09:35:43 ht
# fix encoding bug when things break down altogether
#
# Revision 1.54.2.12 2000/05/29 08:46:53 ht
# strong enforcement of nullable
# add error codes to all errors
# remove remaining __class__ tests
# change error reporting wrt disallowed content
#
# Revision 1.54.2.11 2000/05/24 20:46:47 ht
# make validateText a method, split across SimpleType and AbInitio
#
# Revision 1.54.2.10 2000/05/24 12:03:28 ht
# modest effort to validate list types
# fix bug in noNamespaceSchemaLocation handling at validation time
#
# Revision 1.54.2.9 2000/05/22 16:11:52 ht
# use OpenStream, take more control of encoding
#
# Revision 1.54.2.8 2000/05/18 17:37:40 ht
# parameterise stylesheet,
# remove formatting from xsv:xsv attributes,
# add namespace decl
#
# Revision 1.54.2.7 2000/05/18 07:59:48 ht
# fix xsi:type validation bug
#
# Revision 1.54.2.6 2000/05/16 16:31:11 ht
# fix bug handling un-typed element declarations == urType validation
#
# Revision 1.54.2.5 2000/05/14 12:29:59 ht
# merge QName checking from main branch
#
# Revision 1.54.2.4 2000/05/12 15:15:01 ht
# process keys even if type is simple,
# add a few codes to get started
#
# Revision 1.54.2.3 2000/05/11 13:59:11 ht
# convert verror/vwarn to produce elements
# eliminate a few special error outputs in favour of special
# sub-elements
#
# Revision 1.54.2.2 2000/05/11 11:14:00 ht
# more error protection
# handle lax recursively and at the start
#
# Revision 1.54.2.1 2000/05/10 11:36:47 ht
# begin converting to XML output
#
# Revision 1.56 2000/05/14 12:19:34 ht
# add context to checkSting calls
#
# Revision 1.55 2000/05/11 11:55:57 ht
# just better handling of lax validation from other branch
#
# Revision 1.54 2000/05/09 14:52:52 ht
# Check for strings in a way that works with or without 16-bit support
#
# Revision 1.53 2000/05/09 12:27:58 ht
# replace our hack with python's url parsing stuff
# make f global for debugging
#
# Revision 1.52 2000/05/05 15:15:45 richard
# wrong (?) elt arg to verror in validateKeyRefs
#
# Revision 1.51 2000/05/04 07:56:35 ht
# Fix typo in opportunistic attribute validation
#
# Revision 1.50 2000/05/01 15:07:00 richard
# bug fix schema -> key.schema
#
# Revision 1.49 2000/05/01 10:05:43 ht
# catch various missing file errors more gracefully
#
# Revision 1.48 2000/04/28 15:40:01 richard
# Implement xsi:null (still don't check nullable)
#
# Revision 1.47 2000/04/28 15:11:23 richard
# allow xsi: attributes on simple type
# moved eltDecl code up validateElement ready for implementing xsi:null
#
# Revision 1.46 2000/04/27 09:41:18 ht
# remove raw types from error messages
#
# Revision 1.45 2000/04/27 09:30:21 ht
# check that inputs are actually schemas,
# remove schema arg to doImport, checkInSchema
#
# Revision 1.44 2000/04/26 13:00:40 ht
# add copyright
#
# Revision 1.43 2000/04/24 20:46:40 ht
# cleanup residual bugs with massive rename,
# rename Any to Wildcard,
# replace AnyAttribute with Wildcard,
# get validation of Wildcard working in both element and attribute contexts
#
# Revision 1.42 2000/04/24 15:08:34 ht
# minor glitches, tiny.xml works again
#
# Revision 1.41 2000/04/24 15:00:09 ht
# wholesale name changes -- init. caps for all classes,
# schema.py -> XMLSchema.py
#
# Revision 1.40 2000/04/24 11:09:17 ht
# make version string universally available
#
# Revision 1.39 2000/04/24 10:06:59 ht
# add version info to message
#
# Revision 1.38 2000/04/24 10:02:39 ht
# change invocation message
#
# Revision 1.37 2000/04/24 09:41:43 ht
# clean up invocation some more, add k arg't to runit
#
# Revision 1.36 2000/04/21 09:32:21 ht
# another dose of resolveURL
# use tiny only if run from command line
#
# Revision 1.35 2000/04/20 22:12:43 ht
# use resolveURL on input, schemaLocs
#
# Revision 1.34 2000/04/20 15:45:08 ht
# better handling of use of ns uri for loc
#
# Revision 1.33 2000/04/20 14:26:59 ht
# merge in private and comp branches
#
# Revision 1.32.2.5 2000/04/20 14:25:54 ht
# merge in comp branch
#
# Revision 1.32.2.4.2.9 2000/04/20 14:22:39 ht
# manage document validation schema creation and search better
#
# Revision 1.32.2.4.2.8 2000/04/20 12:03:21 ht
# Remove a few lingering effectiveTypes
# Allow better for absent types etc.
#
# Revision 1.32.2.4.2.7 2000/04/14 21:18:27 ht
# minor attr names/path changes to track schema
#
# Revision 1.32.2.4.2.6 2000/04/13 23:04:39 ht
# allow for urType as simple type (?)
# track Any->AnyWrap change
#
# Revision 1.32.2.4.2.5 2000/04/12 17:29:37 ht
# begin work on model merger,
#
# Revision 1.32.2.4.2.4 2000/04/11 18:13:17 ht
# interpolate attributeUse between complexType and attributeDeclaration,
# parallel to particle
#
# Revision 1.32.2.4.2.3 2000/04/10 15:48:46 ht
# put modest attribute validation in place
#
# Revision 1.32.2.4.2.2 2000/04/09 16:13:26 ht
# working on complex type, attribute;
# back out component.qname
#
# Revision 1.32.2.4.2.1 2000/04/05 12:12:36 ht
# accommodate changes in schema.py
#
# Revision 1.32.2.4 2000/04/01 18:01:25 ht
# various minor compatibility fixes
#
# Revision 1.32.2.3 2000/03/25 12:12:27 ht
# restructure error handling/reporting;
# allow for switching 208 on and off
#
# Revision 1.32.2.2 2000/03/21 15:57:23 ht
# fix bug in skip,
# allow 208 override
#
# Revision 1.32.2.1 2000/03/20 17:22:52 ht
# better coverage of , including beginning of processcontents
#
# Revision 1.33 2000/03/20 17:20:53 ht
# better coverage of , including beginning of processcontents
#
# Revision 1.32 2000/03/08 15:28:46 ht
# merge private branches back into public after 20000225 release
#
# Revision 1.31.2.3 2000/02/24 23:40:32 ht
# fix any bug
#
# Revision 1.31.2.2 2000/02/21 09:18:13 ht
# bug in handling
#
# Revision 1.31.2.1 2000/02/08 21:43:39 ht
# fork private branch to track internal drafts
# change calling sequence of checkinSchema
#
# Revision 1.31.1.1 2000/02/08 13:54:25 ht
# fork branch for non-public changes
# calling sequence to checkinSchema changed
#
# Revision 1.31 2000/01/13 16:55:42 richard
# Finally do something with xsi:type
#
# Revision 1.30 2000/01/10 17:36:34 richard
# changes for xsi:schemaLocation
#
# Revision 1.29 2000/01/08 23:33:50 ht
# towards support for xsi:schemaLocation
#
# Revision 1.28 2000/01/08 12:07:38 ht
# Change command-line arg sequence in preparation for use of schemaLocation!!!!!
# Add debug printout for schemaLocation for now
#
# Revision 1.27 2000/01/07 17:08:26 richard
# start on xsi:type
#
# Revision 1.26 2000/01/06 14:59:38 ht
# fix command line bug, display args on entry
#
# Revision 1.25 2000/01/06 14:38:56 ht
# detect cross-scope keyref and signal error
#
# Revision 1.24 2000/01/03 17:02:37 ht
# Include result of sub-ordinate key checking in overall result
# Accommodate new calling sequence for xpath.find
# add Log and Id
#
#