from PyXML import * from XML import * from schema import * import sys import re whitespace = re.compile("^[ \t\r\n]*$") def where(w): if w: print "In %s at line %d char %d of %s:" % w def readXML(url): input = Open(url, NSL_read|NSL_read_namespaces|NSL_read_defaulted_attributes) # item = GetNextQueryItem(input, ParseQuery(input.doctype, ".")) # elem = Element(item, input.doctype) elem = Element(input, 1) Close(input) return elem def validate(element, type, schema): return validateElement(element, type, schema) def validateElement(element, type, schema): global vel, vtype vel = element vtype = type # print "validating element %s against %s" % (element.name, type) if isinstance(type, AbInitio): return validateElementSimple(element, type, schema) v1 = assignAttributeTypes(element, type.attrTable, type.extendable, schema) if v1: v1 = validateAttributeTypes(element.attrs, type.attrTable, schema) # print "assigning types for %s" % element.name v2 = assignChildTypes(element.children, type.elementTable(), type.extendable, schema) # we must look at the content model before checking the types, so that # we know which children matched v3 = validateContentModel(element, type, schema) if v2: v2 = validateChildTypes(element.children, schema) return v1 and v2 and v3 def validateElementSimple(element, type, schema): # check that: # it has no attributes # it has one pcdata child, and if so # the text of the pcdata matches the type name = element.name if element.attrs: where(element.where) print "element %s has attributes %s but has type %s" % (name, element.attrs, type) return 0 return validateTextModel(element, type, schema) def validateText(text, type, schema): return 1 def assignAttributeTypes(element, attrdefs, extendable, schema): # look up each attribute in attrdefs and assign its type # error if attr declaration is not found and type is not extendable # print "assigning attrs for %s {%s}%s" % (element.name, element.uri, element.local) v = 1 for a in element.attrs.values(): # print "assigning attr %s {%s}%s" % (a.name, a.uri, a.local) an=QName(None,a.local,a.uri) if attrdefs.has_key(an): a.type = attrdefs[an].effectiveType else: where(element.where) print "undeclared attribute %s" % an a.type = None v = 0 return v def validateAttributeTypes(attrs, attrdefs, schema): # check that each attribute matches its type # check that all required attributes are present # add defaulted attributes (shouldn't need to check their types) return 1 def assignChildTypes(children, elementTable, extendable, schema): # look up each child tag and record the type # (it may not be an error if it is not declared; we don't know that # until we see what it matches in the content model) for child in children: if child.__class__ == Element: qname = QName(None,child.local,child.uri) if elementTable.has_key(qname): child.type = elementTable[qname][1] else: child.type = None return 1 def validateContentModel(element, type, schema): # trace a path through the content model # if a child matches an we need to indicate # that that child should be validated with its xsd:type if it has one # if a child matches some other kind of we need to indicate # that it's not an error if we can't find its type # print "validating model for %s content %s" % (element.name, type.content) if type.content == "empty": return validateEmptyModel(element, type, schema) elif type.content == "textOnly": return validateTextModel(element, type, schema) return validateElementModel(element, type.fsm, type.content == "mixed", schema) def validateEmptyModel(element, type, schema): if len(element.children) != 0: where(element.where) print "element %s must be empty but is not" % element.name return 0 return 1 def validateTextModel(element, type, schema): # check that: # it has one pcdata child, and if so # the text of the pcdata matches the type name = element.name n = len(element.children) if n > 1: where(element.where) print "element %s has %s (> 1) children but has type %s" % (name, n, type) return 0 elif n > 0 and element.children[0].__class__ != Pcdata: where(element.where) print "element %s has non-text children but has type %s" % (name, type) return 0 else: if n == 0: text = "" else: text = element.children[0].value validateText(text, type, schema) return 1 def validateElementModel(element, fsm, mixed, schema): # print "validating element model for %s" % element.name v = 1 n = fsm.startNode for c in element.children: if c.__class__ == Pcdata: if not mixed and not whitespace.match(c.value): where(c.where) print "text not allowed in element %s" % element.name return 0 elif c.__class__ == Element: qname = QName(None, c.local, c.uri) next = None anynext = None for e in n.edges: if e.label == qname: next = e.dest break if isinstance(e.label, Gensym): anynext = e.dest if not next: if anynext: n = anynext if c.type: where(child.where) print "element matched but had a type assigned" v = 0 else: c.type = "" else: where(c.where) print "element %s not allowed here in element %s" % (qname, QName(None,element.local,element.uri)) fsm.printme() return 0 else: n = next if not n.isEndNode: where(element.where2) print "content of %s is not allowed to end here" % element.name fsm.printme() return 0 return v def validateChildTypes(children, schema): # validate each child element against its type, if we know it # report an error if we don't know it and it's not in v = 1 for child in children: if child.__class__ == Element: if child.type == "": q = QName(prefix,child.local,child.uri) print "allowing %s because it matched " % q e = schema.vElementTable[q] if e: print "validating it against %s" % e if not validateElement(child, e.effectiveType, schema): v = 0 else: where(child.where) print "can't for a type for -matching element %s" % q v = 0 elif child.type: if not validateElement(child, child.type, schema): v = 0 else: where(child.where) print "undeclared element %s" % QName(None,child.local,child.uri) v = 0 return v t=None if len(sys.argv)>1: s=fromFile(sys.argv[1]) e=readXML(sys.argv[2]) cl=string.find(':',e.name) if cl>-1: prefix=e.name[0:cl] else: prefix='' n=len(sys.argv)-3 while n: fromFile(sys.argv[n+2],s.factory) n=n-1 prepare(s.factory) t=s.vElementTable[QName(prefix,e.local,e.uri)].effectiveType else: s = fromFile("triv.xsd") e = readXML("triv.xml") rt=QName('t','toy','http://foo') prepare(s.factory) t = s.vComplexTypeTable[rt].effectiveType print "validate returns %s" % validate(e, t, s) # fix command line bug, display args on entry # # Revision 1.25 2000/01/06 14:38:56 ht # detect cross-scope keyref and signal error # # Revision 1.24 2000/01/03 17:02:37 ht # Include result of sub-ordinate key checking in overall result # Accommodate new calling sequence for xpath.find # add Log and Id # #