Annotation of xmlschema/applyschema.py, revision 1.31
1.24 ht 1: # actually apply a schema to an instance
1.31 ! richard 2: # $Id: applyschema.py,v 1.30 2000/01/10 17:36:34 richard Exp $
1.24 ht 3:
1.2 ht 4: from PyXML import *
5: from XML import *
6: from schema import *
1.28 ht 7: import layer
1.3 aqw 8: import sys
1.4 richard 9: import re
1.18 ht 10: import xpath
1.20 ht 11: import types
1.4 richard 12:
13: whitespace = re.compile("^[ \t\r\n]*$")
1.27 richard 14: xsi = "http://www.w3.org/1999/XMLSchema/instance"
1.2 ht 15:
1.6 richard 16: def where(w):
17: if w:
18: print "In %s at line %d char %d of %s:" % w
1.5 richard 19:
1.2 ht 20: def readXML(url):
1.14 ht 21: input = Open(url, NSL_read|NSL_read_namespaces|NSL_read_defaulted_attributes)
1.5 richard 22: # item = GetNextQueryItem(input, ParseQuery(input.doctype, "."))
23: # elem = Element(item, input.doctype)
24: elem = Element(input, 1)
1.2 ht 25: Close(input)
26: return elem
27:
1.20 ht 28: def validate(element, typedef, schema):
29: res=validateElement(element, typedef, schema)
1.18 ht 30: if schema.keys:
31: # hack
32: x=Element("#root")
33: x.children.append(element)
1.21 ht 34: res=validateKeys(schema,x) and res
1.18 ht 35: return res
1.2 ht 36:
1.21 ht 37: def validateElement(element, type, schema,eltDecl=None):
1.4 richard 38: global vel, vtype
39: vel = element
40: vtype = type
1.2 ht 41: # print "validating element %s against %s" % (element.name, type)
1.31 ! richard 42: if element.nsattrs.has_key((xsi, "type")):
! 43: t = element.nsattrs[(xsi, "type")].value;
! 44: try:
! 45: qt = QName(t, element.nsdict)
! 46: except SchemaError:
! 47: print "namespace not found for xsi:type %s" % t
! 48: return 0
! 49: if schema.vComplexTypeTable.has_key(qt):
! 50: xsitype=schema.vComplexTypeTable[qt]
! 51: elif schema.vSimpleTypeTable.has_key(qt):
! 52: xsitype=schema.vSimpleTypeTable[qt]
! 53: else:
! 54: print "xsi:type %s undefined" % qt
! 55: return 0
! 56: if not xsitype.isSubtype(type):
! 57: print "xsi:type %s is not a subtype of the declared type %s" % (qt, type.name)
! 58: return 0
! 59: print "using xsi:type %s instead of original %s" % (qt, type.name)
! 60: type = xsitype.effectiveType
1.4 richard 61: if isinstance(type, AbInitio):
1.2 ht 62: return validateElementSimple(element, type, schema)
1.9 ht 63: v1 = assignAttributeTypes(element, type.attrTable, type.extendable, schema)
1.7 richard 64: if v1:
65: v1 = validateAttributeTypes(element.attrs, type.attrTable, schema)
1.12 richard 66: # print "assigning types for %s" % element.name
1.7 richard 67: v2 = assignChildTypes(element.children, type.elementTable(), type.extendable, schema)
1.13 richard 68: # we must look at the content model before checking the types, so that
69: # we know which children matched <any>
70: v3 = validateContentModel(element, type, schema)
1.7 richard 71: if v2:
72: v2 = validateChildTypes(element.children, schema)
1.21 ht 73: eqn=QName(None,element.local,element.uri)
74: if not eltDecl and s.vElementTable.has_key(eqn):
75: eltDecl=s.vElementTable[eqn]
76: if eltDecl:
77: v4=validateKeys(eltDecl,element)
1.24 ht 78: else:
79: v4=1
80: return v1 and v2 and v3 and v4
1.2 ht 81:
82: def validateElementSimple(element, type, schema):
83: # check that:
84: # it has no attributes
85: # it has one pcdata child, and if so
86: # the text of the pcdata matches the type
87: name = element.name
88: if element.attrs:
1.6 richard 89: where(element.where)
1.2 ht 90: print "element %s has attributes %s but has type %s" % (name, element.attrs, type)
1.7 richard 91: return 0
1.2 ht 92: return validateTextModel(element, type, schema)
93:
94: def validateText(text, type, schema):
95: return 1
96:
1.9 ht 97: def assignAttributeTypes(element, attrdefs, extendable, schema):
1.2 ht 98: # look up each attribute in attrdefs and assign its type
99: # error if attr declaration is not found and type is not extendable
1.15 richard 100: # print "assigning attrs for %s {%s}%s" % (element.name, element.uri, element.local)
1.7 richard 101: v = 1
1.9 ht 102: for a in element.attrs.values():
1.15 richard 103: # print "assigning attr %s {%s}%s" % (a.name, a.uri, a.local)
1.16 ht 104: an=QName(None,a.local,a.uri)
1.27 richard 105: if a.uri == xsi:
106: if a.local == "type":
1.31 ! richard 107: # we've already handled it
! 108: pass
1.27 richard 109: elif a.local == "schemaLocation":
1.30 richard 110: # we've already handled it
111: pass
1.27 richard 112: else:
113: print "unknown xsi attribute %s" % an
114: v = 0
115: elif attrdefs.has_key(an):
1.14 ht 116: a.type = attrdefs[an].effectiveType
1.17 richard 117: elif attrdefs.has_key("#any"):
118: # XXX check the namespaces
119: print "allowing undeclared attribute %s because anyAttribute(%s)" % (an, attrdefs["#any"])
120: a.type = None
1.2 ht 121: else:
1.9 ht 122: where(element.where)
1.14 ht 123: print "undeclared attribute %s" % an
1.2 ht 124: a.type = None
1.7 richard 125: v = 0
126: return v
1.2 ht 127:
128: def validateAttributeTypes(attrs, attrdefs, schema):
129: # check that each attribute matches its type
130: # check that all required attributes are present
131: # add defaulted attributes (shouldn't need to check their types)
132: return 1
133:
134: def assignChildTypes(children, elementTable, extendable, schema):
135: # look up each child tag and record the type
136: # (it may not be an error if it is not declared; we don't know that
137: # until we see what it matches in the content model)
138: for child in children:
139: if child.__class__ == Element:
1.10 richard 140: qname = QName(None,child.local,child.uri)
141: if elementTable.has_key(qname):
142: child.type = elementTable[qname][1]
1.2 ht 143: else:
144: child.type = None
145: return 1
146:
147: def validateContentModel(element, type, schema):
148: # trace a path through the content model
149: # if a child matches an <any tag=... type=...> we need to indicate
150: # that that child should be validated with its xsd:type if it has one
151: # if a child matches some other kind of <any> we need to indicate
152: # that it's not an error if we can't find its type
153:
1.4 richard 154: # print "validating model for %s content %s" % (element.name, type.content)
1.2 ht 155: if type.content == "empty":
156: return validateEmptyModel(element, type, schema)
157: elif type.content == "textOnly":
158: return validateTextModel(element, type, schema)
159:
1.4 richard 160: return validateElementModel(element, type.fsm,
161: type.content == "mixed", schema)
1.2 ht 162:
163: def validateEmptyModel(element, type, schema):
164: if len(element.children) != 0:
1.6 richard 165: where(element.where)
1.2 ht 166: print "element %s must be empty but is not" % element.name
1.7 richard 167: return 0
1.2 ht 168: return 1
169:
170: def validateTextModel(element, type, schema):
171: # check that:
172: # it has one pcdata child, and if so
173: # the text of the pcdata matches the type
174: name = element.name
175: n = len(element.children)
176: if n > 1:
1.6 richard 177: where(element.where)
1.2 ht 178: print "element %s has %s (> 1) children but has type %s" % (name, n, type)
1.7 richard 179: return 0
1.2 ht 180: elif n > 0 and element.children[0].__class__ != Pcdata:
1.6 richard 181: where(element.where)
1.2 ht 182: print "element %s has non-text children but has type %s" % (name, type)
1.7 richard 183: return 0
1.2 ht 184: else:
185: if n == 0:
186: text = ""
187: else:
188: text = element.children[0].value
189: validateText(text, type, schema)
190:
191: return 1
192:
1.4 richard 193: def validateElementModel(element, fsm, mixed, schema):
194: # print "validating element model for %s" % element.name
1.13 richard 195: v = 1
1.4 richard 196: n = fsm.startNode
197: for c in element.children:
198: if c.__class__ == Pcdata:
1.19 ht 199: if (not mixed) and (not whitespace.match(c.value)):
1.6 richard 200: where(c.where)
1.19 ht 201: print "text not allowed in element %s: |%s|" % (element.name,c.value)
1.4 richard 202: return 0
203: elif c.__class__ == Element:
1.10 richard 204: qname = QName(None, c.local, c.uri)
1.8 richard 205: next = None
1.13 richard 206: anynext = None
1.4 richard 207: for e in n.edges:
1.10 richard 208: if e.label == qname:
1.8 richard 209: next = e.dest
1.4 richard 210: break
1.8 richard 211: if isinstance(e.label, Gensym):
1.17 richard 212: # XXX check the namespaces
1.13 richard 213: anynext = e.dest
1.8 richard 214: if not next:
1.13 richard 215: if anynext:
216: n = anynext
1.17 richard 217: # this is no longer an error, but something more complicated is XXX
218: # if c.type:
219: # where(child.where)
220: # print "element matched <any> but had a type assigned"
221: # v = 0
222: # else:
223: # c.type = "<any>"
224: c.type = "<any>"
1.13 richard 225: else:
226: where(c.where)
227: print "element %s not allowed here in element %s" % (qname, QName(None,element.local,element.uri))
228: fsm.printme()
229: return 0
230: else:
231: n = next
1.4 richard 232: if not n.isEndNode:
1.6 richard 233: where(element.where2)
1.4 richard 234: print "content of %s is not allowed to end here" % element.name
1.13 richard 235: fsm.printme()
1.7 richard 236: return 0
1.13 richard 237: return v
1.2 ht 238:
239: def validateChildTypes(children, schema):
240: # validate each child element against its type, if we know it
241: # report an error if we don't know it and it's not in <any>
1.7 richard 242: v = 1
1.2 ht 243: for child in children:
244: if child.__class__ == Element:
1.13 richard 245: if child.type == "<any>":
246: q = QName(prefix,child.local,child.uri)
247: print "allowing %s because it matched <any>" % q
1.19 ht 248: if schema.factory.schemas.has_key(child.uri):
249: # only try if we might win -- needs work
250: e = schema.vElementTable[q]
251: if e:
252: print "validating it against %s" % e
253: if not validateElement(child, e.effectiveType, schema):
254: v = 0
255: else:
256: where(child.where)
257: print "can't for a type for <any>-matching element %s" % q
258: v = 0
1.13 richard 259: elif child.type:
1.7 richard 260: if not validateElement(child, child.type, schema):
261: v = 0
1.2 ht 262: else:
1.6 richard 263: where(child.where)
1.12 richard 264: print "undeclared element %s" % QName(None,child.local,child.uri)
1.7 richard 265: v = 0
266: return v
1.2 ht 267:
1.21 ht 268: def validateKeys(decl,elt):
1.22 ht 269: elt.keyTabs={}
270: r1=validateKeys1(elt,decl.keys,1)
271: r2=validateKeys1(elt,decl.uniques,0)
272: r3=validateKeyRefs(elt,decl.keyrefs)
273: return r1 and r2 and r3
274:
275: def validateKeys1(elt,kds,reqd):
1.21 ht 276: res=1
1.22 ht 277: for key in kds:
1.21 ht 278: tab={}
279: sp=xpath.XPath(key.selector)
1.24 ht 280: candidates=sp.find(elt)
1.21 ht 281: if candidates:
282: fps=map(lambda f:xpath.XPath(f),key.field)
283: for s in candidates:
1.22 ht 284: keyKey=buildKey(s,fps)
285: if reqd and not keyKey:
1.24 ht 286: where(s.where)
287: print "missing one or more fields %s from key %s"%(key.field,
288: key.name)
1.22 ht 289: res=0
290: break
1.21 ht 291: if len(keyKey)>1:
292: keyKey=tuple(keyKey)
293: else:
294: keyKey=keyKey[0]
295: if tab.has_key(keyKey):
1.23 ht 296: where(s.where)
297: print "duplicate key %s, first appearance was"%str(keyKey),
1.21 ht 298: where(tab[keyKey].where)
299: res=0
300: else:
301: tab[keyKey]=s
1.22 ht 302: elt.keyTabs[key.name]=tab
303: return res
304:
305: def buildKey(s,fps):
306: keyKey=[]
307: for fp in fps:
1.24 ht 308: kv=fp.find(s)
1.22 ht 309: if kv:
310: if len(kv)>1:
311: print "oops, multiple field hits for %s at %s: %s"%(fp.str,s,kv)
312: if isinstance(kv[0],Element):
313: if (len(kv[0].children)>0 and
314: isinstance(kv[0].children[0],Pcdata)):
315: keyKey.append(kv[0].children[0].value)
316: else:
317: # XPath says in this case value is the empty string
318: pass
319: elif type(kv[0])==types.StringType:
320: keyKey.append(kv[0])
321: else:
322: print "oops, key value %s:%s"%(type(kv[0]),kv[0])
323: else:
324: return None
325: return keyKey
326:
327: def validateKeyRefs(elt,krds):
328: res=1
329: for ref in krds:
1.25 ht 330: if elt.keyTabs.has_key(ref.refer):
331: keyTab=elt.keyTabs[ref.refer]
332: if keyTab=='bogus':
333: break
334: else:
335: elt.keyTabs[ref.refer]='bogus'
336: print "No key or unique constraint named %s declared, refed by keyref %s"%(ref.refer,ref.name)
337: res=0
338: break
1.22 ht 339: sp=xpath.XPath(ref.selector)
1.24 ht 340: candidates=sp.find(elt)
1.22 ht 341: if candidates:
342: fps=map(lambda f:xpath.XPath(f),ref.field)
343: for s in candidates:
344: keyKey=buildKey(s,fps)
345: if not keyKey:
346: break
347: if len(keyKey)>1:
348: keyKey=tuple(keyKey)
349: else:
350: keyKey=keyKey[0]
1.25 ht 351: if not keyTab.has_key(keyKey):
1.23 ht 352: where(s.where)
1.22 ht 353: print "no key in %s for %s"%(ref.refer,str(keyKey))
354: res=0
1.21 ht 355: return res
356:
1.30 richard 357: def findSchemaLocs(element):
358: pairs = []
359: for a in element.attrs.values():
360: if a.uri == xsi and a.local == "schemaLocation":
361: scls=string.split(a.value)
362: while scls:
363: pairs.append((scls[0], scls[1]))
364: scls=scls[2:]
365: for c in element.children:
366: if isinstance(c, Element):
367: pairs = pairs + findSchemaLocs(c)
368: return pairs
369:
1.28 ht 370: def runit(en,rns=[]):
1.21 ht 371: global s,e,t
1.30 richard 372: s = None
373:
1.28 ht 374: print "schema-validating %s using schemas %s"%(en,rns)
1.30 richard 375:
376: f=newFactory()
377:
378: e=readXML(en)
379:
1.28 ht 380: if rns:
1.30 richard 381: s = fromFile(rns[0],f)
382: for rn in rns[1:]:
383: fromFile(rn,f)
384:
385: schemaLocs = findSchemaLocs(e)
386: print "schemaLocations from instance: %s" % schemaLocs
387: for (ns, sl) in schemaLocs:
388: checkinSchema(f, ns, sl)
389:
390: if not rns and not schemaLocs:
391: print "didn't find any schemas!"
392:
393: prepare(f)
394:
1.28 ht 395: cl=string.find(':',e.name)
396: if cl>-1:
397: prefix=e.name[0:cl]
398: else:
399: prefix=''
400: typename = QName(prefix,e.local,e.uri)
1.30 richard 401:
402: if not s:
403: # any one will do
404: s = f.schema
405:
1.28 ht 406: if s and s.vElementTable.has_key(typename):
407: t=s.vElementTable[typename].effectiveType
1.21 ht 408: else:
1.28 ht 409: print "can't validate, because can't find type for %s" % typename
410: t=None
1.27 richard 411: if e and t and s:
1.31 ! richard 412: print "validating with type %s" % typename
1.27 richard 413: print "validate returns %s" % validate(e, t, s)
1.2 ht 414:
1.3 aqw 415: if len(sys.argv)>1:
1.28 ht 416: runit(sys.argv[1],sys.argv[2:])
1.3 aqw 417: else:
1.28 ht 418: runit("triv.xml",["triv.xsd"])
1.24 ht 419:
1.25 ht 420: # $Log: applyschema.py,v $
1.31 ! richard 421: # Revision 1.31.2.1 2000/02/08 21:43:39 ht
! 422: # fork private branch to track internal drafts
! 423: # change calling sequence of checkinSchema
1.30 richard 424: #
425: # Revision 1.31.1.1 2000/02/08 13:54:25 ht
426: # fork branch for non-public changes
1.29 ht 427: # calling sequence to checkinSchema changed
428: #
429: # Revision 1.31 2000/01/13 16:55:42 richard
430: # Finally do something with xsi:type
1.28 ht 431: #
432: # Revision 1.30 2000/01/10 17:36:34 richard
433: # changes for xsi:schemaLocation
1.27 richard 434: #
435: # Revision 1.29 2000/01/08 23:33:50 ht
436: # towards support for xsi:schemaLocation
1.26 ht 437: #
438: # Revision 1.28 2000/01/08 12:07:38 ht
439: # Change command-line arg sequence in preparation for use of schemaLocation!!!!!
1.25 ht 440: # Add debug printout for schemaLocation for now
441: #
442: # Revision 1.27 2000/01/07 17:08:26 richard
443: # start on xsi:type
444: #
1.24 ht 445: # Revision 1.26 2000/01/06 14:59:38 ht
1.1 ht 446: # fix command line bug, display args on entry
447: #
448: # Revision 1.25 2000/01/06 14:38:56 ht
449: # detect cross-scope keyref and signal error
450: #
451: # Revision 1.24 2000/01/03 17:02:37 ht
452: # Include result of sub-ordinate key checking in overall result
453: # Accommodate new calling sequence for xpath.find
454: # add Log and Id
455: #
456: #
Webmaster