# actually apply a schema to an instance # $Id: applyschema.py,v 1.37 2000/04/24 09:41:43 ht Exp $ from PyXML import * from XML import * from schema import * import layer import sys import re import xpath import types whitespace = re.compile("^[ \t\r\n]*$") xsi = "http://www.w3.org/1999/XMLSchema-instance" def readXML(url): input = Open(url, NSL_read|NSL_read_namespaces|NSL_read_defaulted_attributes) # item = GetNextQueryItem(input, ParseQuery(input.doctype, ".")) # elem = Element(item, input.doctype) elem = Element(input, 1) Close(input) return elem # error return?? def validate(element, typedef, schema): schema.factory.errors=0 validateElement(element, typedef, schema) return schema.factory.errors def validateElement(element, type, schema,eltDecl=None): global vel, vtype vel = element vtype = type # print "validating element %s against %s" % (element.name, type) if element.nsattrs.has_key((xsi, "type")): t = element.nsattrs[(xsi, "type")].value; try: qt = QName(t, element.nsdict) except SchemaError: verror(element,"namespace not found for xsi:type %s" % t,schema) return if schema.vComplexTypeTable.has_key(qt): xsitype=schema.vComplexTypeTable[qt] elif schema.vSimpleTypeTable.has_key(qt): xsitype=schema.vSimpleTypeTable[qt] else: verror(element,"xsi:type %s undefined" % qt,schema) return if not xsitype.isSubtype(type): verror(element,"xsi:type %s is not a subtype of the declared type %s" % (qt, type.name),schema) return vwarn(element,"using xsi:type %s instead of original %s" % (qt, type.name)) type = xsitype if type: # might have none in case of recursive call inside if isinstance(type, AbInitio): return validateElementSimple(element, type, schema) if isinstance(type, simpleType): return validateElementSimple(element, type.primitiveType, schema) assignAttributeTypes(element, type.attributeDeclarations, type.prohibitedSubstitutions, schema) validateAttributeTypes(element, element.attrTable, type.attributeDeclarations, schema) # print "assigning types for %s" % element.name assignChildTypes(element.children, type.elementTable, type.prohibitedSubstitutions, schema) # we must look at the content model before checking the types, so that # we know which children matched validateContentModel(element, type, schema) validateChildTypes(element.children, schema) eqn=QName(None,element.local,element.uri) if not eltDecl and s.vElementTable.has_key(eqn): eltDecl=s.vElementTable[eqn] if eltDecl: validateKeys(eltDecl,element) def validateElementSimple(element, type, schema): # check that: # it has no attributes # it has one pcdata child, and if so # the text of the pcdata matches the type name = element.name if element.attrs: verror(element,"element %s has attributes %s but has type %s" % (name, element.attrs, type),schema) return return validateTextModel(element, type, schema) def validateText(text, type, schema): if isinstance(type,simpleType): if type==urType: return else: return type.primitiveType.checkString(text) else: return type.checkString(text) def assignAttributeTypes(element, attrdefs, extendable, schema): # look up each attribute in attrdefs and assign its type # error if attr declaration is not found and type is not extendable # print "assigning attrs for %s {%s}%s" % (element.name, element.uri, element.local) element.attrTable={} for a in element.attrs.values(): # print "assigning attr %s {%s}%s" % (a.name, a.uri, a.local) an=QName(None,a.local,a.uri) element.attrTable[an]=a if a.uri == xsi: if a.local == "type": # we've already handled it pass elif a.local == "schemaLocation": # we've already handled it pass else: verror(element,"unknown xsi attribute %s" % an,schema) elif attrdefs.has_key(an): a.type = attrdefs[an] elif attrdefs.has_key("#any"): # XXX check the namespaces vwarn(element,"allowing undeclared attribute %s because anyAttribute(%s)" % (an, attrdefs["#any"])) a.type = None else: verror(element,"undeclared attribute %s" % an,schema) a.type = None return def validateAttributeTypes(element,attrs, attrdefs, schema): # check that each attribute matches its type # check that all required attributes are present # add defaulted attributes (shouldn't need to check their types) for (adq,ad) in attrdefs.items(): if ad.minOccurs==1 and not attrs.has_key(adq): verror(element,"required attribute %s not present"%adq,schema) for (an,a) in attrs.items(): if an.uri!=xsi and a.type: res=validateText(a.value,a.type.attributeDeclaration.typeDefinition, schema) if res: verror(element,"attribute type check failed for %s: %s%s"%(an, a.value, res), schema) def assignChildTypes(children, elementTable, extendable, schema): # look up each child tag and record the type # (it may not be an error if it is not declared; we don't know that # until we see what it matches in the content model) for child in children: if child.__class__ == Element: qname = QName(None,child.local,child.uri) if elementTable.has_key(qname): child.type = elementTable[qname][1] else: child.type = None return 1 def validateContentModel(element, type, schema): # trace a path through the content model # if a child matches an we need to indicate # that that child should be validated with its xsd:type if it has one # if a child matches some other kind of we need to indicate # that it's not an error if we can't find its type # print "validating model for %s content %s" % (element.name, type.content) if type.contentType == "empty": validateEmptyModel(element, type, schema) elif type.contentType == "textOnly": validateTextModel(element, type.model, schema) else: validateElementModel(element, type.fsm, type.contentType == "mixed", schema) def validateEmptyModel(element, type, schema): if len(element.children) != 0: verror(element,"element %s must be empty but is not" % element.name,schema) def validateTextModel(element, type, schema): # check that: # it has one pcdata child, and if so # the text of the pcdata matches the type name = element.name n = len(element.children) if n > 1: verror(element,"element %s has %s (> 1) children but has type %s" % (name, n, type),schema) return elif n > 0 and element.children[0].__class__ != Pcdata: verror(element,"element %s has non-text children but has type %s" % (name, type),schema) return else: if n == 0: text = "" else: text = element.children[0].value res=validateText(text, type, schema) if res: verror(element,"element content failed type check: %s%s"%(text,res), schema) def validateElementModel(element, fsm, mixed, schema): # print "validating element model for %s" % element.name n = fsm.startNode for c in element.children: if c.__class__ == Pcdata: if (not mixed) and (not whitespace.match(c.value)): verror(c,"text not allowed in element %s: |%s|" % (element.name,c.value),schema) return elif c.__class__ == Element: qname = QName(None, c.local, c.uri) next = None anynext = None for e in n.edges: if e.label == qname: next = e.dest break if isinstance(e.label, AnyWrap): # XXX check the namespaces anynext = e.dest anylab = e.label if not next: if anynext: n = anynext # this is no longer an error, but something more complicated is XXX # if c.type: # where(child.where) # print "element matched but had a type assigned" # v = 0 # else: # c.type = "" c.type = anylab else: verror(c,"element %s not allowed here in element %s" % (qname, QName(None,element.local,element.uri)),schema) fsm.printme(sys.stderr) else: n = next if not n.isEndNode: verror(element,"content of %s is not allowed to end here" % element.name, schema,1) fsm.printme(sys.stderr) return def validateChildTypes(children, schema): # validate each child element against its type, if we know it # report an error if we don't know it and it's not in v = 1 for child in children: if child.__class__ == Element: if child.type: if child.type.__class__ == AnyWrap: q = QName(None,child.local,child.uri) vwarn(child,"allowing %s because it matched " % q) if child.type.any.processContents!='skip': if schema.factory.schemas.has_key(child.uri): # only try if we might win -- needs work try: e = schema.vElementTable[q] except KeyError: e=None if e: vwarn(None,"validating it against %s" % e) validateElement(child, e.typeDefinition, schema) elif child.type.any.processContents=='strict': verror(child,"can't find a type for -matching element %s" % q,schema) else: validateElement(child, child.type, schema) else: verror(child, "undeclared element %s" % QName(None,child.local,child.uri), schema) def validateKeys(decl,elt): elt.keyTabs={} validateKeys1(elt,decl.keys,1) validateKeys1(elt,decl.uniques,0) validateKeyRefs(elt,decl.keyrefs) def validateKeys1(elt,kds,reqd): for key in kds: tab={} sp=xpath.XPath(key.selector) candidates=sp.find(elt) if candidates: fps=map(lambda f:xpath.XPath(f),key.field) for s in candidates: keyKey=buildKey(s,fps) if reqd and not keyKey: verror(s, "missing one or more fields %s from key %s"%(key.field, key.name), schema) break if len(keyKey)>1: keyKey=tuple(keyKey) else: keyKey=keyKey[0] if tab.has_key(keyKey): verror(s,"duplicate key %s, first appearance was"%str(keyKey), key.schema) where(tab[keyKey].where) else: tab[keyKey]=s elt.keyTabs[key.name]=tab def buildKey(s,fps): keyKey=[] for fp in fps: kv=fp.find(s) if kv: if len(kv)>1: vwarn(s,"oops, multiple field hits for %s at %s: %s"%(fp.str,s,kv)) if isinstance(kv[0],Element): if (len(kv[0].children)>0 and isinstance(kv[0].children[0],Pcdata)): keyKey.append(kv[0].children[0].value) else: # XPath says in this case value is the empty string pass elif type(kv[0])==types.StringType: keyKey.append(kv[0]) else: vwarn(s,"oops, key value %s:%s"%(type(kv[0]),kv[0])) else: return None return keyKey def validateKeyRefs(elt,krds): res=1 for ref in krds: if elt.keyTabs.has_key(ref.refer): keyTab=elt.keyTabs[ref.refer] if keyTab=='bogus': break else: elt.keyTabs[ref.refer]='bogus' verror(ref.elt, "No key or unique constraint named %s declared, refed by keyref %s"%(ref.refer,ref.name), ref.schema) break sp=xpath.XPath(ref.selector) candidates=sp.find(elt) if candidates: fps=map(lambda f:xpath.XPath(f),ref.field) for s in candidates: keyKey=buildKey(s,fps) if not keyKey: break if len(keyKey)>1: keyKey=tuple(keyKey) else: keyKey=keyKey[0] if not keyTab.has_key(keyKey): verror(s,"no key in %s for %s"%(ref.refer,str(keyKey)),ref.schema) def findSchemaLocs(element): pairs = [] for a in element.attrs.values(): if a.uri == xsi and a.local == "schemaLocation": scls=string.split(a.value) while scls: pairs.append((scls[0], scls[1])) scls=scls[2:] for c in element.children: if isinstance(c, Element): pairs = pairs + findSchemaLocs(c) return pairs def runit(en,rns=[],k=0): global s,e,t s = None sys.stderr.write("schema-validating %s using schemas %s\n"%(en,rns)) f=newFactory() base=f.fileNames[0] ren=resolveURL(base,en) if rns: s = fromFile(resolveURL(base,rns[0]),f) for rn in rns[1:]: ss=fromFile(resolveURL(base,rn),f) else: s = schema(f,None) s.targetNS='##dummy' e=readXML(en) # error return? schemaLocs = findSchemaLocs(e) sys.stderr.write("schemaLocations from instance: %s\n" % schemaLocs) for (ns, sl) in schemaLocs: checkinSchema(s, ns, sl,e,ren) if (e.uri and (e.uri not in ('http://www.w3.org/XML/1998/namespace', 'http://www.w3.org/1999/XMLSchema-instance')) and not f.schemas.has_key(e.uri)): try: checkinSchema(s,e.uri,e.uri,e,ren) sys.stderr.write("no schema yet for %s, trying namespace URI itself. . ."% e.uri) sys.stderr.write("ok.\n") except XMLinter.error: sys.stderr.write("no schema yet for %s, trying namespace URI itself. . ."% e.uri) sys.stderr.write("failed.\n") ecount=prepare(f) if ecount: if k: km="continuing" else: km="stopping without validating instance" em="%d errors in schemas, %s"%(ecount,km) if not k: sys.stderr.write("%s\n"%em) return else: em="Schema(s) OK" sys.stderr.write("%s\n"%em) cl=string.find(':',e.name) if cl>-1: prefix=e.name[0:cl] else: prefix='' eltname = QName(prefix,e.local,e.uri) if not s: # any one will do s = f.sfors t=None if s and s.vElementTable.has_key(eltname): t=s.vElementTable[eltname].typeDefinition if not t: sys.stderr.write("can't validate, because can't find type for %s\n" % eltname) return if e and s: if t.name: sys.stderr.write("validating with type %s\n" % t.name) else: sys.stderr.write("validating with anonymous type\n") validate(e, t, s) if s.factory.errors: sys.stderr.write("%d validation errors\n" % s.factory.errors) return 1 else: sys.stderr.write("No errors\n") return 0 def verror(elt,message,schema,two=0): sys.stderr.write("Validation error: ") if two: where(elt.where2) else: where(elt.where) sys.stderr.write(" ") sys.stderr.write(message) sys.stderr.write("\n") schema.factory.errors=schema.factory.errors+1 def vwarn(elt,message): sys.stderr.write("Validation warning: ") if elt: where(elt.where) sys.stderr.write(message) sys.stderr.write("\n") if __name__=='__main__': argl=sys.argv[1:] k=0 while argl: if argl[0]=='-k': k=1 else: break argl=argl[1:] if argl: runit(argl[0],argl[1:],k) else: runit("tiny.xml",["tiny.xsd"],k) # $Log: applyschema.py,v $ # Revision 1.37 2000/04/24 09:41:43 ht # clean up invocation some more, add k arg't to runit # # add version info to message # # Revision 1.38 2000/04/24 10:02:39 ht # change invocation message # # Revision 1.37 2000/04/24 09:41:43 ht # clean up invocation some more, add k arg't to runit # # Revision 1.36 2000/04/21 09:32:21 ht # another dose of resolveURL # use tiny only if run from command line # # Revision 1.35 2000/04/20 22:12:43 ht # use resolveURL on input, schemaLocs # # Revision 1.34 2000/04/20 15:45:08 ht # better handling of use of ns uri for loc # # Revision 1.33 2000/04/20 14:26:59 ht # merge in private and comp branches # # Revision 1.32.2.5 2000/04/20 14:25:54 ht # merge in comp branch # # Revision 1.32.2.4.2.9 2000/04/20 14:22:39 ht # manage document validation schema creation and search better # # Revision 1.32.2.4.2.8 2000/04/20 12:03:21 ht # Remove a few lingering effectiveTypes # Allow better for absent types etc. # # Revision 1.32.2.4.2.7 2000/04/14 21:18:27 ht # minor attr names/path changes to track schema # # Revision 1.32.2.4.2.6 2000/04/13 23:04:39 ht # allow for urType as simple type (?) # track Any->AnyWrap change # # Revision 1.32.2.4.2.5 2000/04/12 17:29:37 ht # begin work on model merger, # # Revision 1.32.2.4.2.4 2000/04/11 18:13:17 ht # interpolate attributeUse between complexType and attributeDeclaration, # parallel to particle # # Revision 1.32.2.4.2.3 2000/04/10 15:48:46 ht # put modest attribute validation in place # # Revision 1.32.2.4.2.2 2000/04/09 16:13:26 ht # working on complex type, attribute; # back out component.qname # # Revision 1.32.2.4.2.1 2000/04/05 12:12:36 ht # accommodate changes in schema.py # # Revision 1.32.2.4 2000/04/01 18:01:25 ht # various minor compatibility fixes # # Revision 1.32.2.3 2000/03/25 12:12:27 ht # restructure error handling/reporting; # allow for switching 208 on and off # # Revision 1.32.2.2 2000/03/21 15:57:23 ht # fix bug in skip, # allow 208 override # # Revision 1.32.2.1 2000/03/20 17:22:52 ht # better coverage of , including beginning of processcontents # # Revision 1.33 2000/03/20 17:20:53 ht # better coverage of , including beginning of processcontents # # Revision 1.32 2000/03/08 15:28:46 ht # merge private branches back into public after 20000225 release # # Revision 1.31.2.3 2000/02/24 23:40:32 ht # fix any bug # # Revision 1.31.2.2 2000/02/21 09:18:13 ht # bug in handling # # Revision 1.31.2.1 2000/02/08 21:43:39 ht # fork private branch to track internal drafts # change calling sequence of checkinSchema # # Revision 1.31.1.1 2000/02/08 13:54:25 ht # fork branch for non-public changes # calling sequence to checkinSchema changed # # Revision 1.31 2000/01/13 16:55:42 richard # Finally do something with xsi:type # # Revision 1.30 2000/01/10 17:36:34 richard # changes for xsi:schemaLocation # # Revision 1.29 2000/01/08 23:33:50 ht # towards support for xsi:schemaLocation # # Revision 1.28 2000/01/08 12:07:38 ht # Change command-line arg sequence in preparation for use of schemaLocation!!!!! # Add debug printout for schemaLocation for now # # Revision 1.27 2000/01/07 17:08:26 richard # start on xsi:type # # Revision 1.26 2000/01/06 14:59:38 ht # fix command line bug, display args on entry # # Revision 1.25 2000/01/06 14:38:56 ht # detect cross-scope keyref and signal error # # Revision 1.24 2000/01/03 17:02:37 ht # Include result of sub-ordinate key checking in overall result # Accommodate new calling sequence for xpath.find # add Log and Id # #