# Copyright (C) 2000 LTG -- See accompanying COPYRIGHT and COPYING files # actually apply a schema to an instance # $Id: applyschema.py,v 1.74.2.11 2000/12/06 09:21:05 ht Exp $ # TODO: enforce datatype constraints on xsi:type, # null, (noNamespace)schemaLocation from PyLTXML import * import XML import XMLInfoset import LTXMLInfoset import PSVInfoset import os import XMLSchema import layer import sys import re import xpath import types import string from urlparse import urljoin import tempfile import traceback import asInfoset import time whitespace = re.compile("^[ \t\r\n]*$") xsi = "http://www.w3.org/2000/10/XMLSchema-instance" vsraw="$Revision: 1.74.2.11 $ of $Date: 2000/12/06 09:21:05 $" vss=string.split(vsraw) vs="XSV %s/%s of %s %s"%(string.split(XMLSchema.versionString)[0], vss[1],vss[5],vss[6]) dontWarn=1 def readXML(url): global doc try: if url: doc = LTXMLInfoset.documentFromURI(url) else: file = FOpen(sys.stdin, NSL_read+NSL_read_all_bits+NSL_read_namespaces+ NSL_read_no_consume_prolog) doc = LTXMLInfoset.documentFromFile(file) Close(file) return (doc.documentElement, doc.documentEntity.charset) except LTXMLinter.error: return (None, None) def assess(element): allfull = 1 allnone = 1 nochildren = 1 for c in element.chunkedChildren: if isinstance(c, XMLInfoset.Element): nochildren = 0 validationAttempted = c.__dict__.has_key("validationAttempted") and c.validationAttempted if validationAttempted != 'full': allfull = 0 if validationAttempted and c.validationAttempted != 'none': allnone = 0 if nochildren: if element.validatedType: element.validationAttempted = 'full' else: element.validationAttempted = 'none' else: if allfull and element.validatedType: element.validationAttempted = 'full' elif allnone and not element.validatedType: element.validationAttempted = 'none' else: element.validationAttempted = 'partial' if element.errorCode: element.validity = 'invalid' else: has_losing_child = 0 has_untyped_strict_child = 0 has_non_winning_typed_child = 0 for c in element.chunkedChildren: if not isinstance(c, XMLInfoset.Element): continue strict = c.__dict__.has_key("strict") and c.strict validatedType = c.__dict__.has_key("validatedType") and c.validatedType validity = c.__dict__.has_key("validity") and c.validity if validity == 'invalid': has_losing_child = 1 if strict and not validatedType: has_untyped_strict_child = 1 if validatedType and validity != 'valid': has_non_winning_typed_child = 1 if has_losing_child or has_untyped_strict_child: element.validity = 'invalid' elif has_non_winning_typed_child: element.validity = 'unknown' else: element.validity = 'valid' if element.validatedType: element.typeDefinition=PSVInfoset.ComplexTypeDefinition(element, element.validatedType) def validate(element, typedef, schema, eltDecl): if not hasattr(schema.factory,'errors'): schema.factory.errors=0 validateElement(element, typedef, schema, eltDecl) return schema.factory.errors def validateElement(element, type, schema, eltDecl=None): global vel, vtype vel = element vtype = type # print "validating element %s against %s" % (element.originalName, ['type:',type]) element.validatedType = type if not eltDecl: eqn=XMLSchema.QName(None,element.localName,element.namespaceName or None) if s.vElementTable.has_key(eqn): eltDecl=s.vElementTable[eqn] nullable = eltDecl and eltDecl.nullable # TODO: is this right if no eltDecl? nulled = 0 if element.attributes.has_key((xsi, "null")): if not nullable: verror(element, "xsi:null specified on non-nullable element %s" % element.originalName, schema,"cvc-elt.1.1") print "bad nullable: eltDecl = %s" % eltDecl assess(element) return nulled = (element.attributes[(xsi, "null")].normalizedValue == "true") if element.attributes.has_key((xsi, "type")): t = element.attributes[(xsi, "type")].normalizedValue; res = XMLSchema.QNameST.checkString(None, t, element) if res: verror(element, "xsi:type %s is not a valid qname: %s " % (t,res), schema, "cvc-elt.2.1") (tp,tl) = XMLSchema.splitQName(t) qt = XMLSchema.QName(tp, tl, element.inScopeNamespaces[tp]) if schema.vTypeTable.has_key(qt): xsitype=schema.vTypeTable[qt] else: verror(element,"xsi:type %s undefined" % qt,schema,"cvc-elt.2.2") assess(element) return if type and not xsitype.isSubtype(type): verror(element, "xsi:type %s is not a subtype of the declared type %s"%(qt, type.name), schema,"cvc-elt.2.3") assess(element) return if type: vwarn(element, "using xsi:type %s instead of original %s" % (qt, type.name)) else: vwarn(element,"using xsi:type %s" % qt) type = xsitype element.validatedType = type lax = not type # might have none in case of recursive call inside , or at top level if nulled: validateElementNull(element, type, schema) if type: # TODO: check type is not abstract if ((not type==XMLSchema.urType) and (isinstance(type, XMLSchema.AbInitio) or isinstance(type, XMLSchema.SimpleType))): if not nulled: validateElementSimple(element, type, schema) if eltDecl: validateKeys(eltDecl,element) assess(element) return # a complexType ad=type.attributeDeclarations ps=type.prohibitedSubstitutions et=type.elementTable else: ps=[] ad={} et={} assignAttributeTypes(element, ad, ps, schema, lax) validateAttributeTypes(element, element.attrTable, ad, schema) # print "assigning types for %s" % element.originalName if not nulled: assignChildTypes(element.chunkedChildren, et, ps, schema, lax) # we must look at the content model before checking the types, so that # we know which children matched if type: validateContentModel(element, type, schema) validateChildTypes(element.chunkedChildren, schema, lax) if eltDecl: validateKeys(eltDecl,element) assess(element) def validateElementNull(element, type, schema): if len(element.chunkedChildren) != 0: verror(element,"element %s is nulled but is not empty" % element.originalName, schema,"cvc-elt.1.2.1") # TODO: should check for fixed value constraint def validateElementSimple(element, type, schema): # check that: # it has no attributes (except xsi: ones) # it has one pcdata child, and if so # the text of the pcdata matches the type if element.attributes: for a in element.attributes.values(): if a.namespaceName != xsi: verror(element, "element {%s}%s with simple type not allowed attributes"% (element.namespaceName, element.localName), schema,"cvc-elt.4.1.1") return return validateTextModel(element, type, schema) def assignAttributeTypes(element, attrdefs, extendable, schema, lax): # look up each attribute in attrdefs and assign its type # error if attr declaration is not found and type is not extendable # print "assigning attrs for %s {%s}%s" % (element.originalName, element.namespaceName, element.localName) # print "declared attrs are:" # for zz in attrdefs.keys(): # if isinstance(zz, XMLSchema.QName): # print "{%s}%s " % (zz.uri, zz.local) # else: # print zz element.attrTable={} for a in element.attributes.values(): # print "assigning attr %s {%s}%s" % (a.originalName, a.namespaceName, a.localName) an=XMLSchema.QName(None,a.localName,a.namespaceName or None) element.attrTable[an]=a if a.namespaceName == xsi: if a.localName not in ('type','null','schemaLocation','noNamespaceSchemaLocation'): verror(element,"unknown xsi attribute %s" % an,schema, "cvc-complex-type.1.3") elif attrdefs.has_key(an): a.type = attrdefs[an].attributeDeclaration elif lax: if a.namespaceName and schema.vAttributeTable.has_key(an): a.type=schema.vAttributeTable[an] else: a.type=None elif (attrdefs.has_key("#any") and attrdefs["#any"].attributeDeclaration.allows(a.namespaceName)): a.type = attrdefs["#any"].attributeDeclaration else: verror(element,"undeclared attribute %s" % an,schema, "cvc-complex-type.1.3") a.type = None return def validateAttributeTypes(element,attrs, attrdefs, schema): # check that each attribute matches its type # check that all required attributes are present # TODO: add defaulted attributes (shouldn't need to check their types) # checked fixed values for (adq,ad) in attrdefs.items(): if ad.minOccurs==1 and not attrs.has_key(adq): verror(element,"required attribute %s not present"%adq,schema, 'cvc-complex-type.1.4') for (an,a) in attrs.items(): if an.uri==xsi: # TODO: whitespace!!! a.schemaNormalizedValue=a.normalizedValue elif a.type: if isinstance(a.type,XMLSchema.Wildcard): res=a.type.validate(a,schema,'attribute',element) else: if a.type.typeDefinition: res=a.type.typeDefinition.validateText(a.normalizedValue,element, schema) if a.type.valueConstraint and a.type.valueConstraint[0]=='fixed': if a.normalizedValue!=a.type.valueConstraint[1]: verror(element,"fixed value did not match for attribute %s: %s!=%s"%(an,a.normalizedValue,a.type.valueConstraint[1]),schema,"cvc-attribute.1.3") else: res=None if res: verror(element,"attribute type check failed for %s: %s%s"%(an, a.normalizedValue, res), schema,'cvc-attribute.1.2',0,None,a) else: # TODO: whitespace!!! a.schemaNormalizedValue=a.normalizedValue def assignChildTypes(children, elementTable, extendable, schema, lax): # look up each child tag and record the type # (it may not be an error if it is not declared; we don't know that # until we see what it matches in the content model) for child in children: if isinstance(child,XMLInfoset.Element): qname = XMLSchema.QName(None,child.localName,child.namespaceName or None) if elementTable.has_key(qname): decl=elementTable[qname] try: child.type = decl.typeDefinition except: print decl child.eltDecl = decl elif lax and child.namespaceName and schema.vElementTable.has_key(qname): decl=schema.vElementTable[qname] child.type=decl.typeDefinition child.eltDecl=decl else: child.type = None child.eltDecl=None return 1 def validateContentModel(element, type, schema): # trace a path through the content model # if a child matches an we need to indicate # that that child should be validated with its xsd:type if it has one # if a child matches some other kind of we need to indicate # that it's not an error if we can't find its type # print "validating model for %s content type %s" % (element.originalName, type.contentType) if type.contentType == "empty": validateEmptyModel(element, type, schema) elif type.contentType == "textOnly": validateTextModel(element, type.model, schema) else: validateElementModel(element, type.fsm, type.contentType == "mixed", schema) def validateEmptyModel(element, type, schema): if len(element.chunkedChildren) != 0: verror(element,"element %s must be empty but is not" % element.originalName,schema, "cvc-complex-type.1.2") def validateTextModel(element, type, schema): # check that: # it has one pcdata child, and if so # the text of the pcdata matches the type name = element.localName n=0 for child in element.chunkedChildren: if isinstance(child,XMLInfoset.Characters): n=1 elif isinstance(child,XMLInfoset.Element): verror(element, "element {%s}%s with simple type not allowed element children"% (element.namespaceName,name),schema,"cvc-complex-type.1.2.2") # TODO: mark this (and any others) as not validated return else: if n == 0: text = "" else: text = element.chunkedChildren[0].characters res=type.validateText(text, element, schema) # TODO: whitespace if res: verror(element,"element content failed type check: %s%s"%(text,res), schema,"cvc-complex-type.1.2.2") else: element.schemaNormalizedValue=text def validateElementModel(element, fsm, mixed, schema): # print "validating element model for %s" % element.originalName n = fsm.startNode for c in element.chunkedChildren: if isinstance(c,XMLInfoset.Characters): if (not mixed) and (not whitespace.match(c.characters)): verror(c, "text not allowed in element %s: |%s|" % (element.originalName,c.characters), schema,"cvc-complex-type.1.2.3") return elif isinstance(c,XMLInfoset.Element): qname = XMLSchema.QName(None, c.localName, c.namespaceName or None) next = None anynext = None for e in n.edges: if e.label == qname: next = e.dest c.strict = 1 break if isinstance(e.label, XMLSchema.Wildcard): if e.label.allows(c.namespaceName): anynext = e.dest anylab = e.label if not next: if anynext: n = anynext c.strict = (anylab.processContents == 'strict') # this is no longer an error, but something more complicated is XXX # if c.type: # where(child.where) # print "element matched but had a type assigned" # v = 0 # else: # c.type = "" c.type = anylab else: verror(c, "element %s not allowed here in element %s:\n"% (qname, XMLSchema.QName(None,element.localName,element.namespaceName or None)), schema,"cvc-complex-type.1.2.4",0,fsm.asXML()) else: n = next if not n.isEndNode: verror(element, "content of %s is not allowed to end here:\n"% element.originalName, schema,"cvc-complex-type.1.2.4",1,fsm.asXML()) return def validateChildTypes(children, schema, lax): # validate each child element against its type, if we know it # report an error if we don't know it and it's not in v = 1 for child in children: if isinstance(child,XMLInfoset.Element): if child.type: if child.eltDecl: validateElement(child,child.type,schema,child.eltDecl) else: # child.type is actually a wildcard child.type.validate(child,schema,'element',child) elif lax: # TODO: check that this branch ever happens at all # TODO: record impact of missing type in PSVI validateElement(child,None,schema) # will be lax because no type else: verror(child, "undeclared element %s"% XMLSchema.QName(None,child.localName,child.namespaceName or None), schema,"src-resolve") def validateKeys(decl,elt): elt.keyTabs={} validateKeys1(elt,decl.keys,1) validateKeys1(elt,decl.uniques,0) validateKeyRefs(elt,decl.keyrefs) def validateKeys1(elt,kds,reqd): for key in kds: tab={} sp=xpath.XPath(key.selector) candidates=sp.find(elt) if candidates: fps=map(lambda f:xpath.XPath(f),key.fields) for s in candidates: keyKey=buildKey(s,fps) if keyKey: if len(keyKey)>1: keyKey=tuple(keyKey) else: keyKey=keyKey[0] else: if reqd: verror(s, "missing one or more fields %s from key %s"%(key.fields, key.name), key.schema,"cvc-identity-constraint.2.2.2") break if tab.has_key(keyKey): if reqd: code="cvc-identity-constraint.2.2.3" else: code="cvc-identity-constraint.2.1.2" verror(s,"duplicate key %s, first appearance was %s"% (str(keyKey), XMLSchema.whereString(tab[keyKey].where)), key.schema,code) else: tab[keyKey]=s elt.keyTabs[key.name]=tab def buildKey(s,fps): keyKey=[] for fp in fps: kv=fp.find(s) if kv: if len(kv)>1: # TODO error or shouldnt? vwarn(s,"oops, multiple field hits for %s at %s: %s"%(fp.str,s,kv)) if isinstance(kv[0],XMLInfoset.Element): if (len(kv[0].chunkedChildren)>0 and isinstance(kv[0].chunkedChildren[0],XMLInfoset.Characters)): keyKey.append(kv[0].chunkedChildren[0].characters) else: # XPath says in this case value is the empty string pass elif XML.somestring(type(kv[0])): keyKey.append(kv[0]) else: # TODO error or shouldnt? vwarn(s,"oops, key value %s:%s"%(type(kv[0]),kv[0])) else: return None return keyKey def validateKeyRefs(elt,krds): res=1 for ref in krds: if elt.keyTabs.has_key(ref.refer): keyTab=elt.keyTabs[ref.refer] if keyTab=='bogus': break else: elt.keyTabs[ref.refer]='bogus' verror(elt, "No key or unique constraint named %s declared, refed by keyref %s"%(ref.refer,ref.name), ref.schema,"cvc-identity-constraint.2.3.2") break sp=xpath.XPath(ref.selector) candidates=sp.find(elt) if candidates: fps=map(lambda f:xpath.XPath(f),ref.fields) for s in candidates: keyKey=buildKey(s,fps) if not keyKey: break if len(keyKey)>1: keyKey=tuple(keyKey) else: keyKey=keyKey[0] if not keyTab.has_key(keyKey): verror(s,"no key in %s for %s"%(ref.refer,str(keyKey)),ref.schema, "cvc-identity-constraint.2.3.2") def findSchemaLocs(element,schema): pairs = [] for a in element.attributes.values(): if a.namespaceName == xsi: if a.localName == "schemaLocation": scls=string.split(a.normalizedValue) while scls: if len(scls)>1: pairs.append((scls[0], scls[1])) else: verror(element,"xsi:schemaLocation must be a list with an even number of members: %s"%string.split(a.normalizedValue),schema,"???") scls=scls[2:] elif a.localName == "noNamespaceSchemaLocation": pairs.append((None,a.normalizedValue)) for c in element.chunkedChildren: if isinstance(c, XMLInfoset.Element): scl=findSchemaLocs(c,schema) if scl: pairs = pairs + scl return pairs def runitAndShow(en,rns=[],k=0,style=None,enInfo=None,outfile=None,dw=1, timing=0): global dontWarn dontWarn=dw if timing: timing=time.time() (res,encoding,errs)=runit(en,rns,k,timing) if timing: sys.stderr.write("Finished: %6.2f\n"%(time.time()-timing)) if not encoding: encoding='UTF-8' if outfile: try: outf=open(outfile,"w") except: sys.stderr.write("couldn't open %s for output, falling back to stderr"% outfile) outf=sys.stderr else: outf=sys.stderr errout=OpenStream(outf, CharacterEncodingNames[encoding], NSL_write+NSL_write_plain) if encoding!='UTF-8': es=" encoding='%s'"%encoding else: es="" PrintTextLiteral(errout,"\n"%es) if style: PrintTextLiteral(errout, "\n"%style) if enInfo: for (k,v) in enInfo.items(): res.addAttr(k,v) if errs: res.addAttr("crash","true") res.printme(errout) PrintTextLiteral(errout,"\n") Close(errout) if errs: return string.join(map(lambda es:string.join(es,''),errs),'') else: return class SchemaValidationError(Exception): def __init__(self,arg): Exception.__init__(self,arg) def runit(en,rns=[],k=0,timing=0): global s,e,t,f,res,ed,btlist btlist=[] ss = s = None f=XMLSchema.newFactory() f.errors=0 base=f.fileNames[0] if en: ren=urljoin(base,en) else: ren=None res=XML.Element("xsv") f.resElt=res res.addAttr("xmlns","http://www.w3.org/2000/05/xsv") res.addAttr("version",vs) res.addAttr("target",ren or "[stdin]") if rns: res.addAttr("schemaDocs",string.join(rns,' ')) rdn=tempfile.mktemp("xsverrs") redirect=open(rdn,"w+") savedstderr=os.dup(2) # save stderr os.dup2(redirect.fileno(),2) try: (e,encoding)=readXML(ren) if timing: os.write(savedstderr,"target read: %6.2f\n"%(time.time()-timing)) except LTXMLinter.error: pfe=XML.Element("bug") pfe.children=[XML.Pcdata("validator crash during target reading")] res.children.append(pfe) e=None encoding=None if not e: res.addAttr('outcome',"validation not attempted") sys.stderr.flush() registerRawErrors(redirect,res) # put stderr back os.dup2(savedstderr,2) return (res,None,btlist) # TODO: check each schema doc against schema for schemas, if possible, # unless caller explicitly opts out (?) if rns: try: s = XMLSchema.fromFile(urljoin(base,rns[0]),f) if timing: os.write(savedstderr,"schema read: %6.2f\n"%(time.time()-timing)) except: pfe=XML.Element("bug") pfe.children=[XML.Pcdata("validator crash during schema reading")] res.children.append(pfe) btlist.append(traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback)) for rn in rns[1:]: try: ffr=XMLSchema.fromFile(urljoin(base,rn),f) if timing: os.write(savedstderr,"schema read: %6.2f\n"%(time.time()-timing)) ss=ss or ffr except: pfe=XML.Element("bug") pfe.children=[XML.Pcdata("validator crash during schema reading")] res.children.append(pfe) btlist.append(traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback)) if not s: if ss: s=ss else: s = XMLSchema.Schema(f,None) s.targetNS='##dummy' schemaLocs = findSchemaLocs(e,s) res.addAttr('schemaLocs',string.join(map(lambda p:"%s -> %s"%(p[0] or 'None',p[1]), schemaLocs), '; ')) for (ns, sl) in schemaLocs: try: XMLSchema.checkinSchema(f, ns, sl,e,ren or "[stdin]") if timing: os.write(savedstderr,"schema read: %6.2f\n"%(time.time()-timing)) except: pfe=XML.Element("bug") pfe.children=[XML.Pcdata("validator crash during schema reading")] res.children.append(pfe) btlist.append(traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback)) res.addAttr('docElt',"{%s}%s"%(e.namespaceName,e.localName)) if (e.namespaceName and (e.namespaceName not in ('http://www.w3.org/XML/1998/namespace',xsi)) and not f.schemas.has_key(e.namespaceName)): try: XMLSchema.checkinSchema(f,e.namespaceName,e.namespaceName,e,ren or "[stdin]") if timing: os.write(savedstderr,"schema read: %6.2f\n"%(time.time()-timing)) res.addAttr('nsURIDeref','success') except: pfe=XML.Element("bug") pfe.children=[XML.Pcdata("validator crash during schema reading")] res.children.append(pfe) btlist.append(traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback)) res.addAttr('nsURIDeref','failure') sys.stderr.flush() registerRawErrors(redirect,res) # put stderr back os.dup2(savedstderr,2) try: ecount=XMLSchema.prepare(f) if timing: sys.stderr.write("schemas prepared: %6.2f\n"%(time.time()-timing)) except: ecount=-1 btlist.append(traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback)) pfe=XML.Element("bug") pfe.children=[XML.Pcdata("validator crash during factory preparation")] res.children.append(pfe) kgm="true" kg=1 if ecount: if ecount<0: kg=0 else: if not k: kg=0 if not kg: kgm="false" res.addAttr('instanceAssessed',kgm) if not kg: if ecount<0: ecount=0 for sch in f.schemas.values(): ecount=ecount+sch.errors res.addAttr('schemaErrors',str(ecount)) return (res,encoding,btlist) cl=string.find(':',e.originalName) if cl>-1: prefix=e.originalName[0:cl] else: prefix='' eltname = XMLSchema.QName(prefix,e.localName,e.namespaceName or None) if not s: # any one will do s = f.sfors t=None ed=None if s and s.vElementTable.has_key(eltname): ed=s.vElementTable[eltname] t=ed.typeDefinition if t: if t.name: if hasattr(t,'qname'): tn=t.qname.string() else: tn=t.name else: tn='[Anonymous]' res.addAttr('rootType',tn) res.addAttr('validation','strict') else: res.addAttr('validation','lax') if e and s: try: validate(e, t, s, ed) e.schemaInformation = [] # XXX get all the namespaces and schemas nsi = PSVInfoset.NamespaceSchemaInformation(e, 'dummy:namespace') e.schemaInformation.append(nsi) except: btlist.append(traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback)) pfe=XML.Element("bug") pfe.children=[XML.Pcdata("validator crash during validation")] res.children.append(pfe) res.addAttr('instanceErrors',str(s.factory.errors)) ec=0 for sch in f.schemas.values(): ec=ec+sch.errors res.addAttr('schemaErrors',str(ec)) return (res,encoding,btlist) def registerRawErrors(redirect,res): if redirect.tell(): redirect.seek(0) ro=XML.Element("XMLMessages") o="\n%s"%redirect.read() ro.children=[XML.Pcdata(o)] res.children.append(ro) redirect.close() def verror(elt,message,schema,code=None,two=0,daughter=None,iitem=None): # code argument identifies CVC ve=XML.Element("invalid") ve.children=[XML.Pcdata(message)] if code: ve.addAttr("code",code) if two: XMLSchema.where(ve,elt.where2) else: XMLSchema.where(ve,elt.where) if daughter: ve.children.append(daughter) res.children.append(ve) schema.factory.errors=schema.factory.errors+1 if not iitem: iitem=elt if iitem.errorCode: iitem.errorCode.append(" "+code) else: iitem.errorCode=[code] def vwarn(elt,message): if dontWarn: return ve=XML.Element("warning") ve.children=[XML.Pcdata(message)] if elt: XMLSchema.where(ve,elt.where) res.children.append(ve) # validation methods for schema components def av(self,child,schema,kind,elt): q = XMLSchema.QName(None,child.localName,child.namespaceName or None) vwarn(elt,"allowing %s because it matched wildcard(%s)" % (q,self.allowed)) if self.processContents!='skip': print "looking for decl for %s" % child.originalName if schema.factory.schemas.has_key(child.namespaceName): # only try if we might win -- needs work try: if kind=='element': e = schema.vElementTable[q] else: e = schema.vAttributeTable[q] except KeyError: e=None print "decl for %s is %s" % (child.originalName, e) if e and e.typeDefinition: vwarn(None,"validating it against %s" % (e.typeDefinition.name or 'anonymous type')) if kind=='element': validateElement(child, e.typeDefinition, schema) else: return e.typeDefinition.validateText(child.normalizedValue, elt, schema) elif (self.processContents=='strict' and not (kind=='element' and child.attributes.has_key((xsi, "type")))): # TODO check this against actual def'n of missing component verror(elt, "can't find a type for wildcard-matching %s %s" %(kind, q), schema, "src-resolve") child.validatedType = None elif kind=='element': vwarn(None,"validating it laxly") child.validatedType = None validateElement(child,None,schema) XMLSchema.Wildcard.validate=av def tv(self,child,schema,kind,elt): validateElement(child, self, schema) XMLSchema.Type.validate=XMLSchema.AbInitio.validate=tv def validateText(self, text, context, schema): if self==XMLSchema.urType: return else: if self.variety=='atomic': # ref may have failed return (self.primitiveType and self.primitiveType.checkString(text,context)) elif self.variety=='list': it=self.itemType # TODO: what about post-list facets? if not it: return for substr in string.split(text): res=it.validateText(substr,context,schema) if res: return res+' in list' return elif self.variety=='union': mts=self.memberTypes subres=[] # TODO: what about post-union facets? for mt in mts: if mt: res=mt.validateText(text,context,schema) if res: subres.append(res) else: # bingo return # no subtypes won, we lose return " no members of union succeeded: %s"%subres else: XMLSchema.shouldnt('vv '+str(self.variety)) XMLSchema.SimpleType.validateText=validateText def validateText(self, text, context, schema): return self.checkString(text,context) XMLSchema.AbInitio.validateText=validateText # checkString methods def checkString(self,str,context): # TODO: rethink allowedFacets: not efficient if 'enumeration' in self.allowedFacets and self.enumeration!=None: for val in self.enumeration: if val==str: return return " not in enumeration %s"%self.enumeration XMLSchema.AbInitio.checkString = checkString def checkString(self,str,context): try: if ('.' in str) or ('E' in str): val=string.atof(str) else: val=string.atoi(str) except ValueError: return " does not represent a number" if self.minInclusive!=None and valself.maxInclusive: return ">%d"%self.maxInclusive if self.maxExclusive!=None and val>=self.maxExclusive: return ">=%d"%self.maxExclusive return XMLSchema.AbInitio.checkString(self,str,context) XMLSchema.DecimalST.checkString = checkString def checkString(self,str,context): # not complete by any means parts=string.split(str,':') if len(parts)>2: return " has more than one colon" if len(parts)==2 and not context.inScopeNamespaces.has_key(parts[0]): return " has undeclared prefix: %s"%parts[0] return XMLSchema.AbInitio.checkString(self,str,context) XMLSchema.QNameST.checkString = checkString def dumpInfoset(filename): ff = open(filename, "w") r = doc.reflect() r.documentElement.inScopeNamespaces["psv"]=XMLInfoset.Namespace("psv", PSVInfoset.infosetSchemaNamespace) r.indent() r.printme(ff) ff.close() # run at import if top if __name__=='__main__': argl=sys.argv[1:] k=0 dw=1 timing=0 style=None outfile=None while argl: if argl[0]=='-k': k=1 elif argl[0]=='-s': style=argl[1] argl=argl[1:] elif argl[0]=='-o': outfile=argl[1] argl=argl[1:] elif argl[0]=='-w': dw=0 elif argl[0]=='-t': timing=1 elif argl[0][0]=='-': sys.stderr.write("Usage: [-ktw] [-s stylesheet] [-o outputFile] file [schema1 schema2 . . .]\n") sys.exit(-1) else: break argl=argl[1:] if argl: res=runitAndShow(argl[0],argl[1:],k,style,None,outfile,dw,timing) else: res=runitAndShow(None,[],k,style,None,outfile,dw,timing) if res: raise SchemaValidationError,res # $Log: applyschema.py,v $ # Revision 1.74.2.11 2000/12/06 09:21:05 ht # add psv infoset namespace URI to reflected docapplyschema.py # # Revision 1.74.2.10 2000/12/04 22:31:03 ht # stubs for schemaNormalizedValue in place # # Revision 1.74.2.9 2000/12/04 22:09:00 ht # remove convert, # accommodate change to importing XML, # put attribute verror on right item # # Revision 1.74.2.8 2000/12/04 13:30:42 ht # merge in main line fixes thru 1.82 # # Revision 1.74.2.7 2000/10/13 12:48:42 richard # more infoset contributions # # Revision 1.74.2.6 2000/10/02 13:33:28 richard # update values for validity property # # Revision 1.74.2.5 2000/09/29 17:18:09 richard # More towards PSV infoset # # Revision 1.74.2.4 2000/09/29 16:45:27 richard # correct errorCode setting # # Revision 1.74.2.3 2000/09/29 16:04:24 richard # More towards PSV infoset # # Revision 1.74.2.2 2000/09/29 14:16:15 ht # towards PSVI contributions # # Revision 1.74.2.1 2000/09/27 17:21:20 richard # Changes for infoset-based # # Revision 1.77 2000/09/28 15:54:50 ht # schema error count includes all errors, not just those found at prep # time # # Revision 1.76 2000/09/28 15:09:14 ht # try catching and returning any crashes # # Revision 1.75 2000/09/28 08:41:57 ht # add usage message # add -o outfile cmd line arg # # Revision 1.82 2000/10/31 16:30:47 ht # validate subordinate elements with eltdecl if available # return schema error count if not attempting instance validation # # Revision 1.81 2000/10/27 15:33:30 ht # Output timing info if -t on command line # # Revision 1.80 2000/10/18 15:54:58 ht # make effort to check 'fixed' attribute values # # Revision 1.79 2000/10/17 13:35:41 ht # put switch on warnings, default is don't # # Revision 1.78 2000/10/17 12:45:15 ht # try to catch and log all crashes # replace stale reference to atribute.characters # # Revision 1.77 2000/09/28 15:54:50 ht # schema error count includes all errors, not just those found at prep # time # # Revision 1.76 2000/09/28 15:09:14 ht # try catching and returning any crashes # # Revision 1.75 2000/09/28 08:41:57 ht # add usage message # add -o outfile cmd line arg # # Revision 1.74 2000/09/27 13:48:47 richard # Use infoset-like names for slots (now provided in XML.py) to reduce # differences with infoset-based version. # # Revision 1.73 2000/09/27 12:22:22 richard # correct element.name to element.local in an error message # # Revision 1.72 2000/09/26 14:29:36 richard # Oops, didn't change AbInitio to XMLSchema.AbInitio when moving methods # # Revision 1.71 2000/09/26 14:05:28 richard # Move checkString methods from XMLSchema.py, because they may need to look # at *instance* in-scope namespaces # # Revision 1.70 2000/09/26 13:38:49 ht # protect against undefined list itemType/union memberType # # Revision 1.69 2000/09/23 11:17:31 ht # merge in CR branch # # Revision 1.68 2000/09/23 11:14:26 ht # towards merge in CR branch # # Revision 1.66.2.3 2000/09/21 09:14:33 ht # property name change # # Revision 1.66.2.2 2000/09/11 12:23:27 ht # Move to branch: more debug in vv crash # # Revision 1.68 2000/09/03 15:57:23 ht # more debug in vv crash # Revision 1.67 2000/09/11 12:59:09 ht # allow stdin, # fix stupid bug missing third schema on command line # Revision 1.67 2000/08/31 11:48:41 ht # Direct support for validating lists and unions # Revision 1.66 2000/08/22 13:11:30 ht # handle type w/o qname as document validation type # remove special treatment for AbInitio simple types on elements, # thereby fixing list validation bug # Revision 1.66.2.3 2000/09/21 09:14:33 ht # property name change # # Revision 1.66.2.2 2000/09/11 12:23:27 ht # Move to branch: more debug in vv crash # # Revision 1.68 2000/09/03 15:57:23 ht # more debug in vv crash # # Revision 1.67 2000/08/31 11:48:41 ht # Direct support for validating lists and unions # # Revision 1.66 2000/08/22 13:11:30 ht # handle type w/o qname as document validation type # remove special treatment for AbInitio simple types on elements, # thereby fixing list validation bug # # Revision 1.65 2000/07/12 09:31:58 ht # try harder to always have a schema # # Revision 1.64 2000/07/10 14:39:02 ht # prepare for fileinfo to runit # # Revision 1.63 2000/07/05 09:05:37 ht # change name to PyLTXML # # Revision 1.62 2000/07/03 09:37:38 ht # bail out if textonly has elt daughter(s) # add missing import # # Revision 1.61 2000/06/27 09:25:51 ht # attempt to handle interaction between xsi:type and # # Revision 1.60 2000/06/24 11:17:07 ht # fix bug in unqualified xsi:type # # Revision 1.59 2000/06/22 10:31:33 ht # Bug in unique processing -- broke on missing field # # Revision 1.58 2000/06/20 08:07:42 ht # merge xmlout branches back in to main line # # Revision 1.57 2000/05/18 08:01:25 ht # fix bug in handling of xsi:type # # Revision 1.56 2000/05/14 12:19:34 ht # add context to checkSting calls # # Revision 1.55 2000/05/11 11:55:57 ht # just better handling of lax validation from other branch # # Revision 1.54.2.16 2000/06/15 16:03:20 ht # cover several missing definition cases # # Revision 1.54.2.15 2000/06/03 16:29:30 ht # oops, removing debugging comment # # Revision 1.54.2.14 2000/06/03 13:45:55 ht # catch arity bug in xsi:schemaLoc # # Revision 1.54.2.13 2000/05/30 09:35:43 ht # fix encoding bug when things break down altogether # # Revision 1.54.2.12 2000/05/29 08:46:53 ht # strong enforcement of nullable # add error codes to all errors # remove remaining __class__ tests # change error reporting wrt disallowed content # # Revision 1.54.2.11 2000/05/24 20:46:47 ht # make validateText a method, split across SimpleType and AbInitio # # Revision 1.54.2.10 2000/05/24 12:03:28 ht # modest effort to validate list types # fix bug in noNamespaceSchemaLocation handling at validation time # # Revision 1.54.2.9 2000/05/22 16:11:52 ht # use OpenStream, take more control of encoding # # Revision 1.54.2.8 2000/05/18 17:37:40 ht # parameterise stylesheet, # remove formatting from xsv:xsv attributes, # add namespace decl # # Revision 1.54.2.7 2000/05/18 07:59:48 ht # fix xsi:type validation bug # # Revision 1.54.2.6 2000/05/16 16:31:11 ht # fix bug handling un-typed element declarations == urType validation # # Revision 1.54.2.5 2000/05/14 12:29:59 ht # merge QName checking from main branch # # Revision 1.54.2.4 2000/05/12 15:15:01 ht # process keys even if type is simple, # add a few codes to get started # # Revision 1.54.2.3 2000/05/11 13:59:11 ht # convert verror/vwarn to produce elements # eliminate a few special error outputs in favour of special # sub-elements # # Revision 1.54.2.2 2000/05/11 11:14:00 ht # more error protection # handle lax recursively and at the start # # Revision 1.54.2.1 2000/05/10 11:36:47 ht # begin converting to XML output # # Revision 1.56 2000/05/14 12:19:34 ht # add context to checkSting calls # # Revision 1.55 2000/05/11 11:55:57 ht # just better handling of lax validation from other branch # # Revision 1.54 2000/05/09 14:52:52 ht # Check for strings in a way that works with or without 16-bit support # # Revision 1.53 2000/05/09 12:27:58 ht # replace our hack with python's url parsing stuff # make f global for debugging # # Revision 1.52 2000/05/05 15:15:45 richard # wrong (?) elt arg to verror in validateKeyRefs # # Revision 1.51 2000/05/04 07:56:35 ht # Fix typo in opportunistic attribute validation # # Revision 1.50 2000/05/01 15:07:00 richard # bug fix schema -> key.schema # # Revision 1.49 2000/05/01 10:05:43 ht # catch various missing file errors more gracefully # # Revision 1.48 2000/04/28 15:40:01 richard # Implement xsi:null (still don't check nullable) # # Revision 1.47 2000/04/28 15:11:23 richard # allow xsi: attributes on simple type # moved eltDecl code up validateElement ready for implementing xsi:null # # Revision 1.46 2000/04/27 09:41:18 ht # remove raw types from error messages # # Revision 1.45 2000/04/27 09:30:21 ht # check that inputs are actually schemas, # remove schema arg to doImport, checkInSchema # # Revision 1.44 2000/04/26 13:00:40 ht # add copyright # # Revision 1.43 2000/04/24 20:46:40 ht # cleanup residual bugs with massive rename, # rename Any to Wildcard, # replace AnyAttribute with Wildcard, # get validation of Wildcard working in both element and attribute contexts # # Revision 1.42 2000/04/24 15:08:34 ht # minor glitches, tiny.xml works again # # Revision 1.41 2000/04/24 15:00:09 ht # wholesale name changes -- init. caps for all classes, # schema.py -> XMLSchema.py # # Revision 1.40 2000/04/24 11:09:17 ht # make version string universally available # # Revision 1.39 2000/04/24 10:06:59 ht # add version info to message # # Revision 1.38 2000/04/24 10:02:39 ht # change invocation message # # Revision 1.37 2000/04/24 09:41:43 ht # clean up invocation some more, add k arg't to runit # # Revision 1.36 2000/04/21 09:32:21 ht # another dose of resolveURL # use tiny only if run from command line # # Revision 1.35 2000/04/20 22:12:43 ht # use resolveURL on input, schemaLocs # # Revision 1.34 2000/04/20 15:45:08 ht # better handling of use of ns uri for loc # # Revision 1.33 2000/04/20 14:26:59 ht # merge in private and comp branches # # Revision 1.32.2.5 2000/04/20 14:25:54 ht # merge in comp branch # # Revision 1.32.2.4.2.9 2000/04/20 14:22:39 ht # manage document validation schema creation and search better # # Revision 1.32.2.4.2.8 2000/04/20 12:03:21 ht # Remove a few lingering effectiveTypes # Allow better for absent types etc. # # Revision 1.32.2.4.2.7 2000/04/14 21:18:27 ht # minor attr names/path changes to track schema # # Revision 1.32.2.4.2.6 2000/04/13 23:04:39 ht # allow for urType as simple type (?) # track Any->AnyWrap change # # Revision 1.32.2.4.2.5 2000/04/12 17:29:37 ht # begin work on model merger, # # Revision 1.32.2.4.2.4 2000/04/11 18:13:17 ht # interpolate attributeUse between complexType and attributeDeclaration, # parallel to particle # # Revision 1.32.2.4.2.3 2000/04/10 15:48:46 ht # put modest attribute validation in place # # Revision 1.32.2.4.2.2 2000/04/09 16:13:26 ht # working on complex type, attribute; # back out component.qname # # Revision 1.32.2.4.2.1 2000/04/05 12:12:36 ht # accommodate changes in schema.py # # Revision 1.32.2.4 2000/04/01 18:01:25 ht # various minor compatibility fixes # # Revision 1.32.2.3 2000/03/25 12:12:27 ht # restructure error handling/reporting; # allow for switching 208 on and off # # Revision 1.32.2.2 2000/03/21 15:57:23 ht # fix bug in skip, # allow 208 override # # Revision 1.32.2.1 2000/03/20 17:22:52 ht # better coverage of , including beginning of processcontents # # Revision 1.33 2000/03/20 17:20:53 ht # better coverage of , including beginning of processcontents # # Revision 1.32 2000/03/08 15:28:46 ht # merge private branches back into public after 20000225 release # # Revision 1.31.2.3 2000/02/24 23:40:32 ht # fix any bug # # Revision 1.31.2.2 2000/02/21 09:18:13 ht # bug in handling # # Revision 1.31.2.1 2000/02/08 21:43:39 ht # fork private branch to track internal drafts # change calling sequence of checkinSchema # # Revision 1.31.1.1 2000/02/08 13:54:25 ht # fork branch for non-public changes # calling sequence to checkinSchema changed # # Revision 1.31 2000/01/13 16:55:42 richard # Finally do something with xsi:type # # Revision 1.30 2000/01/10 17:36:34 richard # changes for xsi:schemaLocation # # Revision 1.29 2000/01/08 23:33:50 ht # towards support for xsi:schemaLocation # # Revision 1.28 2000/01/08 12:07:38 ht # Change command-line arg sequence in preparation for use of schemaLocation!!!!! # Add debug printout for schemaLocation for now # # Revision 1.27 2000/01/07 17:08:26 richard # start on xsi:type # # Revision 1.26 2000/01/06 14:59:38 ht # fix command line bug, display args on entry # # Revision 1.25 2000/01/06 14:38:56 ht # detect cross-scope keyref and signal error # # Revision 1.24 2000/01/03 17:02:37 ht # Include result of sub-ordinate key checking in overall result # Accommodate new calling sequence for xpath.find # add Log and Id # #