Annotation of xmlschema/applyschema.py, revision 1.32.2.4.2.2
1.24 ht 1: # actually apply a schema to an instance
1.32.2.4.2.2! ht 2: # $Id: applyschema.py,v 1.32.2.4.2.1 2000/04/05 12:12:36 ht Exp $
1.24 ht 3:
1.2 ht 4: from PyXML import *
5: from XML import *
6: from schema import *
1.28 ht 7: import layer
1.3 aqw 8: import sys
1.4 richard 9: import re
1.18 ht 10: import xpath
1.20 ht 11: import types
1.4 richard 12:
13: whitespace = re.compile("^[ \t\r\n]*$")
1.27 richard 14: xsi = "http://www.w3.org/1999/XMLSchema/instance"
1.2 ht 15:
16: def readXML(url):
1.14 ht 17: input = Open(url, NSL_read|NSL_read_namespaces|NSL_read_defaulted_attributes)
1.5 richard 18: # item = GetNextQueryItem(input, ParseQuery(input.doctype, "."))
19: # elem = Element(item, input.doctype)
20: elem = Element(input, 1)
1.2 ht 21: Close(input)
1.32.2.3 ht 22: return elem # error return??
1.2 ht 23:
1.20 ht 24: def validate(element, typedef, schema):
1.32.2.3 ht 25: schema.factory.errors=0
1.32.2.4.2.1 ht 26: validateElement(element, typedef, schema)
1.32.2.3 ht 27: return schema.factory.errors
1.2 ht 28:
1.21 ht 29: def validateElement(element, type, schema,eltDecl=None):
1.4 richard 30: global vel, vtype
31: vel = element
32: vtype = type
1.2 ht 33: # print "validating element %s against %s" % (element.name, type)
1.31 richard 34: if element.nsattrs.has_key((xsi, "type")):
35: t = element.nsattrs[(xsi, "type")].value;
36: try:
37: qt = QName(t, element.nsdict)
38: except SchemaError:
1.32.2.3 ht 39: verror(element,"namespace not found for xsi:type %s" % t,schema)
40: return
1.31 richard 41: if schema.vComplexTypeTable.has_key(qt):
42: xsitype=schema.vComplexTypeTable[qt]
43: elif schema.vSimpleTypeTable.has_key(qt):
44: xsitype=schema.vSimpleTypeTable[qt]
45: else:
1.32.2.3 ht 46: verror(element,"xsi:type %s undefined" % qt,schema)
47: return
1.31 richard 48: if not xsitype.isSubtype(type):
1.32.2.3 ht 49: verror(element,"xsi:type %s is not a subtype of the declared type %s" % (qt, type.name),schema)
50: return
51: vwarn(element,"using xsi:type %s instead of original %s" % (qt, type.name))
1.31 richard 52: type = xsitype.effectiveType
1.4 richard 53: if isinstance(type, AbInitio):
1.2 ht 54: return validateElementSimple(element, type, schema)
1.32.2.4.2.1 ht 55: if isinstance(type, simpleType):
56: return validateElementSimple(element, type.primitiveType, schema)
57: v1 = assignAttributeTypes(element, type.attributeDeclarations,
58: type.prohibitedSubstitutions, schema)
1.7 richard 59: if v1:
1.32.2.4.2.1 ht 60: v1 = validateAttributeTypes(element.attrs, type.attributeDeclarations,
61: schema)
1.12 richard 62: # print "assigning types for %s" % element.name
1.32.2.4.2.1 ht 63: v2 = assignChildTypes(element.children, type.elementTable(),
64: type.prohibitedSubstitutions, schema)
1.13 richard 65: # we must look at the content model before checking the types, so that
66: # we know which children matched <any>
67: v3 = validateContentModel(element, type, schema)
1.7 richard 68: if v2:
69: v2 = validateChildTypes(element.children, schema)
1.21 ht 70: eqn=QName(None,element.local,element.uri)
71: if not eltDecl and s.vElementTable.has_key(eqn):
72: eltDecl=s.vElementTable[eqn]
73: if eltDecl:
74: v4=validateKeys(eltDecl,element)
1.24 ht 75: else:
76: v4=1
77: return v1 and v2 and v3 and v4
1.2 ht 78:
79: def validateElementSimple(element, type, schema):
80: # check that:
81: # it has no attributes
82: # it has one pcdata child, and if so
83: # the text of the pcdata matches the type
84: name = element.name
85: if element.attrs:
1.32.2.3 ht 86: verror(element,"element %s has attributes %s but has type %s" % (name, element.attrs, type),schema)
87: return
1.2 ht 88: return validateTextModel(element, type, schema)
89:
90: def validateText(text, type, schema):
91: return 1
92:
1.9 ht 93: def assignAttributeTypes(element, attrdefs, extendable, schema):
1.2 ht 94: # look up each attribute in attrdefs and assign its type
95: # error if attr declaration is not found and type is not extendable
1.15 richard 96: # print "assigning attrs for %s {%s}%s" % (element.name, element.uri, element.local)
1.9 ht 97: for a in element.attrs.values():
1.15 richard 98: # print "assigning attr %s {%s}%s" % (a.name, a.uri, a.local)
1.16 ht 99: an=QName(None,a.local,a.uri)
1.27 richard 100: if a.uri == xsi:
101: if a.local == "type":
1.31 richard 102: # we've already handled it
103: pass
1.27 richard 104: elif a.local == "schemaLocation":
1.30 richard 105: # we've already handled it
106: pass
1.27 richard 107: else:
1.32.2.3 ht 108: verror(element,"unknown xsi attribute %s" % an,schema)
1.27 richard 109: elif attrdefs.has_key(an):
1.32.2.4.2.2! ht 110: a.type = attrdefs[an]
1.17 richard 111: elif attrdefs.has_key("#any"):
112: # XXX check the namespaces
1.32.2.3 ht 113: vwarn(element,"allowing undeclared attribute %s because anyAttribute(%s)" % (an, attrdefs["#any"]))
1.17 richard 114: a.type = None
1.2 ht 115: else:
1.32.2.3 ht 116: verror(element,"undeclared attribute %s" % an,schema)
1.2 ht 117: a.type = None
1.32.2.3 ht 118: return
1.2 ht 119:
120: def validateAttributeTypes(attrs, attrdefs, schema):
121: # check that each attribute matches its type
122: # check that all required attributes are present
123: # add defaulted attributes (shouldn't need to check their types)
1.32.2.3 ht 124: return
1.2 ht 125:
126: def assignChildTypes(children, elementTable, extendable, schema):
127: # look up each child tag and record the type
128: # (it may not be an error if it is not declared; we don't know that
129: # until we see what it matches in the content model)
130: for child in children:
131: if child.__class__ == Element:
1.10 richard 132: qname = QName(None,child.local,child.uri)
133: if elementTable.has_key(qname):
134: child.type = elementTable[qname][1]
1.2 ht 135: else:
136: child.type = None
137: return 1
138:
139: def validateContentModel(element, type, schema):
140: # trace a path through the content model
141: # if a child matches an <any tag=... type=...> we need to indicate
142: # that that child should be validated with its xsd:type if it has one
143: # if a child matches some other kind of <any> we need to indicate
144: # that it's not an error if we can't find its type
145:
1.4 richard 146: # print "validating model for %s content %s" % (element.name, type.content)
1.2 ht 147: if type.content == "empty":
148: return validateEmptyModel(element, type, schema)
149: elif type.content == "textOnly":
150: return validateTextModel(element, type, schema)
151:
1.4 richard 152: return validateElementModel(element, type.fsm,
153: type.content == "mixed", schema)
1.2 ht 154:
155: def validateEmptyModel(element, type, schema):
156: if len(element.children) != 0:
1.32.2.3 ht 157: verror(element,"element %s must be empty but is not" % element.name,schema)
1.2 ht 158:
159: def validateTextModel(element, type, schema):
160: # check that:
161: # it has one pcdata child, and if so
162: # the text of the pcdata matches the type
163: name = element.name
164: n = len(element.children)
165: if n > 1:
1.32.2.3 ht 166: verror(element,"element %s has %s (> 1) children but has type %s" % (name, n, type),schema)
167: return
1.2 ht 168: elif n > 0 and element.children[0].__class__ != Pcdata:
1.32.2.3 ht 169: verror(element,"element %s has non-text children but has type %s" % (name, type),schema)
170: return
1.2 ht 171: else:
172: if n == 0:
173: text = ""
174: else:
175: text = element.children[0].value
176: validateText(text, type, schema)
177:
178: return 1
179:
1.4 richard 180: def validateElementModel(element, fsm, mixed, schema):
1.32.2.4 ht 181: # print "validating element model for %s" % element.name
1.4 richard 182: n = fsm.startNode
183: for c in element.children:
184: if c.__class__ == Pcdata:
1.19 ht 185: if (not mixed) and (not whitespace.match(c.value)):
1.32.2.3 ht 186: verror(c,"text not allowed in element %s: |%s|" % (element.name,c.value),schema)
187: return
1.4 richard 188: elif c.__class__ == Element:
1.10 richard 189: qname = QName(None, c.local, c.uri)
1.8 richard 190: next = None
1.13 richard 191: anynext = None
1.4 richard 192: for e in n.edges:
1.10 richard 193: if e.label == qname:
1.8 richard 194: next = e.dest
1.4 richard 195: break
1.32.2.1 ht 196: if isinstance(e.label, Any):
1.17 richard 197: # XXX check the namespaces
1.13 richard 198: anynext = e.dest
1.32.2.1 ht 199: anylab = e.label
1.8 richard 200: if not next:
1.13 richard 201: if anynext:
202: n = anynext
1.17 richard 203: # this is no longer an error, but something more complicated is XXX
204: # if c.type:
205: # where(child.where)
206: # print "element matched <any> but had a type assigned"
207: # v = 0
208: # else:
209: # c.type = "<any>"
1.32.2.1 ht 210: c.type = anylab
1.13 richard 211: else:
1.32.2.3 ht 212: verror(c,"element %s not allowed here in element %s" % (qname, QName(None,element.local,element.uri)),schema)
1.32.2.4 ht 213: fsm.printme(sys.stderr)
1.13 richard 214: else:
215: n = next
1.4 richard 216: if not n.isEndNode:
1.32.2.3 ht 217: verror(element,"content of %s is not allowed to end here" % element.name,
218: schema,1)
1.32.2.4 ht 219: fsm.printme(sys.stderr)
1.32.2.3 ht 220: return
1.2 ht 221:
222: def validateChildTypes(children, schema):
223: # validate each child element against its type, if we know it
224: # report an error if we don't know it and it's not in <any>
1.7 richard 225: v = 1
1.2 ht 226: for child in children:
227: if child.__class__ == Element:
1.32.2.2 ht 228: if child.type:
229: if child.type.__class__ == Any:
230: q = QName(None,child.local,child.uri)
1.32.2.3 ht 231: vwarn(child,"allowing %s because it matched <any>" % q)
1.32.2.2 ht 232: if child.type.any.processContents!='skip':
233: if schema.factory.schemas.has_key(child.uri):
234: # only try if we might win -- needs work
235: try:
236: e = schema.vElementTable[q]
237: except KeyError:
238: e=None
239: if e:
1.32.2.3 ht 240: vwarn(None,"validating it against %s" % e)
241: validateElement(child, e.effectiveType, schema)
1.32.2.2 ht 242: elif child.type.any.processContents=='strict':
1.32.2.3 ht 243: verror(child,"can't find a type for <any>-matching element %s" % q,schema)
244: else:
245: validateElement(child, child.type, schema)
1.2 ht 246: else:
1.32.2.3 ht 247: verror(child,
248: "undeclared element %s" % QName(None,child.local,child.uri),
249: schema)
1.2 ht 250:
1.21 ht 251: def validateKeys(decl,elt):
1.22 ht 252: elt.keyTabs={}
1.32.2.3 ht 253: validateKeys1(elt,decl.keys,1)
254: validateKeys1(elt,decl.uniques,0)
255: validateKeyRefs(elt,decl.keyrefs)
1.22 ht 256:
257: def validateKeys1(elt,kds,reqd):
258: for key in kds:
1.21 ht 259: tab={}
260: sp=xpath.XPath(key.selector)
1.24 ht 261: candidates=sp.find(elt)
1.21 ht 262: if candidates:
263: fps=map(lambda f:xpath.XPath(f),key.field)
264: for s in candidates:
1.22 ht 265: keyKey=buildKey(s,fps)
266: if reqd and not keyKey:
1.32.2.3 ht 267: verror(s,
268: "missing one or more fields %s from key %s"%(key.field,
269: key.name),
270: schema)
1.22 ht 271: break
1.21 ht 272: if len(keyKey)>1:
273: keyKey=tuple(keyKey)
274: else:
275: keyKey=keyKey[0]
276: if tab.has_key(keyKey):
1.32.2.3 ht 277: verror(s,"duplicate key %s, first appearance was"%str(keyKey),
278: key.schema)
1.21 ht 279: where(tab[keyKey].where)
280: else:
281: tab[keyKey]=s
1.22 ht 282: elt.keyTabs[key.name]=tab
283:
284: def buildKey(s,fps):
285: keyKey=[]
286: for fp in fps:
1.24 ht 287: kv=fp.find(s)
1.22 ht 288: if kv:
289: if len(kv)>1:
1.32.2.3 ht 290: vwarn(s,"oops, multiple field hits for %s at %s: %s"%(fp.str,s,kv))
1.22 ht 291: if isinstance(kv[0],Element):
292: if (len(kv[0].children)>0 and
293: isinstance(kv[0].children[0],Pcdata)):
294: keyKey.append(kv[0].children[0].value)
295: else:
296: # XPath says in this case value is the empty string
297: pass
298: elif type(kv[0])==types.StringType:
299: keyKey.append(kv[0])
300: else:
1.32.2.3 ht 301: vwarn(s,"oops, key value %s:%s"%(type(kv[0]),kv[0]))
1.22 ht 302: else:
303: return None
304: return keyKey
305:
306: def validateKeyRefs(elt,krds):
307: res=1
308: for ref in krds:
1.25 ht 309: if elt.keyTabs.has_key(ref.refer):
310: keyTab=elt.keyTabs[ref.refer]
311: if keyTab=='bogus':
312: break
313: else:
314: elt.keyTabs[ref.refer]='bogus'
1.32.2.3 ht 315: verror(ref.elt,
316: "No key or unique constraint named %s declared, refed by keyref %s"%(ref.refer,ref.name),
317: ref.schema)
1.25 ht 318: break
1.22 ht 319: sp=xpath.XPath(ref.selector)
1.24 ht 320: candidates=sp.find(elt)
1.22 ht 321: if candidates:
322: fps=map(lambda f:xpath.XPath(f),ref.field)
323: for s in candidates:
324: keyKey=buildKey(s,fps)
325: if not keyKey:
326: break
327: if len(keyKey)>1:
328: keyKey=tuple(keyKey)
329: else:
330: keyKey=keyKey[0]
1.25 ht 331: if not keyTab.has_key(keyKey):
1.32.2.3 ht 332: verror(s,"no key in %s for %s"%(ref.refer,str(keyKey)),ref.schema)
1.21 ht 333:
1.30 richard 334: def findSchemaLocs(element):
335: pairs = []
336: for a in element.attrs.values():
337: if a.uri == xsi and a.local == "schemaLocation":
338: scls=string.split(a.value)
339: while scls:
340: pairs.append((scls[0], scls[1]))
341: scls=scls[2:]
342: for c in element.children:
343: if isinstance(c, Element):
344: pairs = pairs + findSchemaLocs(c)
345: return pairs
346:
1.32.2.4 ht 347: def runit(en,rns=[]):
1.21 ht 348: global s,e,t
1.32.2.2 ht 349:
1.30 richard 350: s = None
1.32.2.2 ht 351:
1.32.2.3 ht 352: sys.stderr.write("schema-validating %s using schemas %s\n"%(en,rns))
1.30 richard 353:
354: f=newFactory()
355:
1.28 ht 356: if rns:
1.30 richard 357: s = fromFile(rns[0],f)
358: for rn in rns[1:]:
1.32.2.3 ht 359: ss=fromFile(rn,f)
360:
361: e=readXML(en) # error return?
1.30 richard 362:
363: schemaLocs = findSchemaLocs(e)
1.32.2.3 ht 364: sys.stderr.write("schemaLocations from instance: %s\n" % schemaLocs)
1.30 richard 365: for (ns, sl) in schemaLocs:
1.32.2.4 ht 366: checkinSchema(s, ns, sl,e)
1.30 richard 367:
368: if not rns and not schemaLocs:
1.32.2.3 ht 369: sys.stderr.write("didn't find any schemas!\n")
1.30 richard 370:
1.32.2.3 ht 371: ecount=prepare(f)
372:
373: if ecount:
374: if k:
375: km="continuing"
376: else:
377: km="stopping without validating instance"
378: em="%d errors in schemas, %s"%(ecount,km)
379: if not k:
380: sys.stderr.write("%s\n"%em)
381: return
382: else:
383: em="Schema(s) OK"
1.32.2.4 ht 384: sys.stderr.write("%s\n"%em)
1.30 richard 385:
1.28 ht 386: cl=string.find(':',e.name)
387: if cl>-1:
388: prefix=e.name[0:cl]
389: else:
390: prefix=''
1.32.2.4.2.1 ht 391: eltname = QName(prefix,e.local,e.uri)
1.30 richard 392:
393: if not s:
394: # any one will do
395: s = f.schema
396:
1.32.2.3 ht 397: t=None
1.32.2.4.2.1 ht 398: if s and s.vElementTable.has_key(eltname):
399: t=s.vElementTable[eltname].typeDefinition
1.32.2.3 ht 400: if not t:
1.32.2.4.2.1 ht 401: sys.stderr.write("can't validate, because can't find type for %s\n" % eltname)
1.32.2.3 ht 402: return
403:
404: if e and s:
1.32.2.4.2.1 ht 405: sys.stderr.write("validating with type %s\n" % t)
1.32.2.3 ht 406: validate(e, t, s)
407: if s.factory.errors:
408: sys.stderr.write("%d validation errors\n" % s.factory.errors)
409: return 1
410: else:
411: sys.stderr.write("No errors\n")
412: return 0
413:
414: def verror(elt,message,schema,two=0):
415: sys.stderr.write("Validation error: ")
416: if two:
417: where(elt.where2)
1.21 ht 418: else:
1.32.2.3 ht 419: where(elt.where)
420: sys.stderr.write(" ")
421: sys.stderr.write(message)
422: sys.stderr.write("\n")
423: schema.factory.errors=schema.factory.errors+1
424:
425: def vwarn(elt,message):
426: sys.stderr.write("Validation warning: ")
427: if elt:
428: where(elt.where)
429: sys.stderr.write(message)
430: sys.stderr.write("\n")
431:
432: argl=sys.argv[1:]
433:
434: k=0
435: while argl:
1.32.2.4 ht 436: if argl[0]=='-k':
1.32.2.3 ht 437: k=1
438: else:
439: break
1.32.2.2 ht 440: argl=argl[1:]
441:
1.32.2.3 ht 442: if argl:
1.32.2.4 ht 443: runit(argl[0],argl[1:])
1.3 aqw 444: else:
1.32.2.4.2.1 ht 445: runit("tiny.xml",["tiny.xsd"])
1.24 ht 446:
1.25 ht 447: # $Log: applyschema.py,v $
1.32.2.4.2.2! ht 448: # Revision 1.32.2.4.2.1 2000/04/05 12:12:36 ht
! 449: # accommodate changes in schema.py
! 450: #
1.32.2.4.2.1 ht 451: # Revision 1.32.2.4 2000/04/01 18:01:25 ht
452: # various minor compatibility fixes
453: #
1.32.2.4 ht 454: # Revision 1.32.2.3 2000/03/25 12:12:27 ht
455: # restructure error handling/reporting;
456: # allow for switching 208 on and off
457: #
1.32.2.3 ht 458: # Revision 1.32.2.2 2000/03/21 15:57:23 ht
459: # fix bug in skip,
460: # allow 208 override
461: #
1.32.2.2 ht 462: # Revision 1.32.2.1 2000/03/20 17:22:52 ht
463: # better coverage of <any>, including beginning of processcontents
464: #
1.32.2.1 ht 465: # Revision 1.33 2000/03/20 17:20:53 ht
466: # better coverage of <any>, including beginning of processcontents
467: #
1.32 ht 468: # allow 208 override
469: #
470: # Revision 1.32.2.1 2000/03/20 17:22:52 ht
471: # better coverage of <any>, including beginning of processcontents
472: #
473: # Revision 1.33 2000/03/20 17:20:53 ht
474: # better coverage of <any>, including beginning of processcontents
475: #
476: # Revision 1.32 2000/03/08 15:28:46 ht
477: # merge private branches back into public after 20000225 release
478: #
479: # Revision 1.31.2.3 2000/02/24 23:40:32 ht
480: # fix any bug
481: #
482: # Revision 1.31.2.2 2000/02/21 09:18:13 ht
483: # bug in <any> handling
484: #
1.31 richard 485: # Revision 1.31.2.1 2000/02/08 21:43:39 ht
486: # fork private branch to track internal drafts
487: # change calling sequence of checkinSchema
1.30 richard 488: #
489: # Revision 1.31.1.1 2000/02/08 13:54:25 ht
490: # fork branch for non-public changes
1.29 ht 491: # calling sequence to checkinSchema changed
492: #
493: # Revision 1.31 2000/01/13 16:55:42 richard
494: # Finally do something with xsi:type
1.28 ht 495: #
496: # Revision 1.30 2000/01/10 17:36:34 richard
497: # changes for xsi:schemaLocation
1.27 richard 498: #
499: # Revision 1.29 2000/01/08 23:33:50 ht
500: # towards support for xsi:schemaLocation
1.26 ht 501: #
502: # Revision 1.28 2000/01/08 12:07:38 ht
503: # Change command-line arg sequence in preparation for use of schemaLocation!!!!!
1.25 ht 504: # Add debug printout for schemaLocation for now
505: #
506: # Revision 1.27 2000/01/07 17:08:26 richard
507: # start on xsi:type
508: #
1.24 ht 509: # Revision 1.26 2000/01/06 14:59:38 ht
1.1 ht 510: # fix command line bug, display args on entry
511: #
512: # Revision 1.25 2000/01/06 14:38:56 ht
513: # detect cross-scope keyref and signal error
514: #
515: # Revision 1.24 2000/01/03 17:02:37 ht
516: # Include result of sub-ordinate key checking in overall result
517: # Accommodate new calling sequence for xpath.find
518: # add Log and Id
519: #
520: #
Webmaster