Annotation of xmlschema/applyschema.py, revision 1.35
1.24 ht 1: # actually apply a schema to an instance
1.35 ! ht 2: # $Id: applyschema.py,v 1.34 2000/04/20 15:45:08 ht Exp $
1.24 ht 3:
1.2 ht 4: from PyXML import *
5: from XML import *
6: from schema import *
1.28 ht 7: import layer
1.3 aqw 8: import sys
1.4 richard 9: import re
1.18 ht 10: import xpath
1.20 ht 11: import types
1.4 richard 12:
13: whitespace = re.compile("^[ \t\r\n]*$")
1.33 ht 14: xsi = "http://www.w3.org/1999/XMLSchema-instance"
1.5 richard 15:
1.2 ht 16: def readXML(url):
1.14 ht 17: input = Open(url, NSL_read|NSL_read_namespaces|NSL_read_defaulted_attributes)
1.5 richard 18: # item = GetNextQueryItem(input, ParseQuery(input.doctype, "."))
19: # elem = Element(item, input.doctype)
20: elem = Element(input, 1)
1.2 ht 21: Close(input)
1.33 ht 22: return elem # error return??
1.2 ht 23:
1.20 ht 24: def validate(element, typedef, schema):
1.33 ht 25: schema.factory.errors=0
26: validateElement(element, typedef, schema)
27: return schema.factory.errors
1.2 ht 28:
1.21 ht 29: def validateElement(element, type, schema,eltDecl=None):
1.4 richard 30: global vel, vtype
31: vel = element
32: vtype = type
1.2 ht 33: # print "validating element %s against %s" % (element.name, type)
1.31 richard 34: if element.nsattrs.has_key((xsi, "type")):
35: t = element.nsattrs[(xsi, "type")].value;
36: try:
37: qt = QName(t, element.nsdict)
38: except SchemaError:
1.33 ht 39: verror(element,"namespace not found for xsi:type %s" % t,schema)
40: return
1.31 richard 41: if schema.vComplexTypeTable.has_key(qt):
42: xsitype=schema.vComplexTypeTable[qt]
43: elif schema.vSimpleTypeTable.has_key(qt):
44: xsitype=schema.vSimpleTypeTable[qt]
45: else:
1.33 ht 46: verror(element,"xsi:type %s undefined" % qt,schema)
47: return
1.31 richard 48: if not xsitype.isSubtype(type):
1.33 ht 49: verror(element,"xsi:type %s is not a subtype of the declared type %s" % (qt, type.name),schema)
50: return
51: vwarn(element,"using xsi:type %s instead of original %s" % (qt, type.name))
52: type = xsitype
53: if type:
54: # might have none in case of recursive call inside <any/>
55: if isinstance(type, AbInitio):
56: return validateElementSimple(element, type, schema)
57: if isinstance(type, simpleType):
58: return validateElementSimple(element, type.primitiveType, schema)
59: assignAttributeTypes(element, type.attributeDeclarations,
60: type.prohibitedSubstitutions, schema)
61: validateAttributeTypes(element, element.attrTable,
62: type.attributeDeclarations, schema)
63: # print "assigning types for %s" % element.name
64: assignChildTypes(element.children, type.elementTable,
65: type.prohibitedSubstitutions, schema)
66: # we must look at the content model before checking the types, so that
67: # we know which children matched <any>
68: validateContentModel(element, type, schema)
69: validateChildTypes(element.children, schema)
1.21 ht 70: eqn=QName(None,element.local,element.uri)
71: if not eltDecl and s.vElementTable.has_key(eqn):
72: eltDecl=s.vElementTable[eqn]
73: if eltDecl:
1.33 ht 74: validateKeys(eltDecl,element)
1.2 ht 75:
76: def validateElementSimple(element, type, schema):
77: # check that:
78: # it has no attributes
79: # it has one pcdata child, and if so
80: # the text of the pcdata matches the type
81: name = element.name
82: if element.attrs:
1.33 ht 83: verror(element,"element %s has attributes %s but has type %s" % (name, element.attrs, type),schema)
84: return
1.2 ht 85: return validateTextModel(element, type, schema)
86:
87: def validateText(text, type, schema):
1.33 ht 88: if isinstance(type,simpleType):
89: if type==urType:
90: return
91: else:
92: return type.primitiveType.checkString(text)
93: else:
94: return type.checkString(text)
1.2 ht 95:
1.9 ht 96: def assignAttributeTypes(element, attrdefs, extendable, schema):
1.2 ht 97: # look up each attribute in attrdefs and assign its type
98: # error if attr declaration is not found and type is not extendable
1.15 richard 99: # print "assigning attrs for %s {%s}%s" % (element.name, element.uri, element.local)
1.33 ht 100: element.attrTable={}
1.9 ht 101: for a in element.attrs.values():
1.15 richard 102: # print "assigning attr %s {%s}%s" % (a.name, a.uri, a.local)
1.16 ht 103: an=QName(None,a.local,a.uri)
1.33 ht 104: element.attrTable[an]=a
1.27 richard 105: if a.uri == xsi:
106: if a.local == "type":
1.31 richard 107: # we've already handled it
108: pass
1.27 richard 109: elif a.local == "schemaLocation":
1.30 richard 110: # we've already handled it
111: pass
1.27 richard 112: else:
1.33 ht 113: verror(element,"unknown xsi attribute %s" % an,schema)
1.27 richard 114: elif attrdefs.has_key(an):
1.33 ht 115: a.type = attrdefs[an]
1.17 richard 116: elif attrdefs.has_key("#any"):
117: # XXX check the namespaces
1.33 ht 118: vwarn(element,"allowing undeclared attribute %s because anyAttribute(%s)" % (an, attrdefs["#any"]))
1.17 richard 119: a.type = None
1.2 ht 120: else:
1.33 ht 121: verror(element,"undeclared attribute %s" % an,schema)
1.2 ht 122: a.type = None
1.33 ht 123: return
1.2 ht 124:
1.33 ht 125: def validateAttributeTypes(element,attrs, attrdefs, schema):
1.2 ht 126: # check that each attribute matches its type
127: # check that all required attributes are present
128: # add defaulted attributes (shouldn't need to check their types)
1.33 ht 129: for (adq,ad) in attrdefs.items():
130: if ad.minOccurs==1 and not attrs.has_key(adq):
131: verror(element,"required attribute %s not present"%adq,schema)
132: for (an,a) in attrs.items():
133: if an.uri!=xsi and a.type:
134: res=validateText(a.value,a.type.attributeDeclaration.typeDefinition,
135: schema)
136: if res:
137: verror(element,"attribute type check failed for %s: %s%s"%(an,
138: a.value,
139: res),
140: schema)
1.2 ht 141:
142: def assignChildTypes(children, elementTable, extendable, schema):
143: # look up each child tag and record the type
144: # (it may not be an error if it is not declared; we don't know that
145: # until we see what it matches in the content model)
146: for child in children:
147: if child.__class__ == Element:
1.10 richard 148: qname = QName(None,child.local,child.uri)
149: if elementTable.has_key(qname):
150: child.type = elementTable[qname][1]
1.2 ht 151: else:
152: child.type = None
153: return 1
154:
155: def validateContentModel(element, type, schema):
156: # trace a path through the content model
157: # if a child matches an <any tag=... type=...> we need to indicate
158: # that that child should be validated with its xsd:type if it has one
159: # if a child matches some other kind of <any> we need to indicate
160: # that it's not an error if we can't find its type
161:
1.4 richard 162: # print "validating model for %s content %s" % (element.name, type.content)
1.33 ht 163: if type.contentType == "empty":
164: validateEmptyModel(element, type, schema)
165: elif type.contentType == "textOnly":
166: validateTextModel(element, type.model, schema)
167: else:
168: validateElementModel(element, type.fsm,
169: type.contentType == "mixed", schema)
1.2 ht 170:
171: def validateEmptyModel(element, type, schema):
172: if len(element.children) != 0:
1.33 ht 173: verror(element,"element %s must be empty but is not" % element.name,schema)
1.2 ht 174:
175: def validateTextModel(element, type, schema):
176: # check that:
177: # it has one pcdata child, and if so
178: # the text of the pcdata matches the type
179: name = element.name
180: n = len(element.children)
181: if n > 1:
1.33 ht 182: verror(element,"element %s has %s (> 1) children but has type %s" % (name, n, type),schema)
183: return
1.2 ht 184: elif n > 0 and element.children[0].__class__ != Pcdata:
1.33 ht 185: verror(element,"element %s has non-text children but has type %s" % (name, type),schema)
186: return
1.2 ht 187: else:
188: if n == 0:
189: text = ""
190: else:
191: text = element.children[0].value
1.33 ht 192: res=validateText(text, type, schema)
193: if res:
194: verror(element,"element content failed type check: %s%s"%(text,res),
195: schema)
1.2 ht 196:
1.4 richard 197: def validateElementModel(element, fsm, mixed, schema):
1.33 ht 198: # print "validating element model for %s" % element.name
1.4 richard 199: n = fsm.startNode
200: for c in element.children:
201: if c.__class__ == Pcdata:
1.19 ht 202: if (not mixed) and (not whitespace.match(c.value)):
1.33 ht 203: verror(c,"text not allowed in element %s: |%s|" % (element.name,c.value),schema)
204: return
1.4 richard 205: elif c.__class__ == Element:
1.10 richard 206: qname = QName(None, c.local, c.uri)
1.8 richard 207: next = None
1.13 richard 208: anynext = None
1.4 richard 209: for e in n.edges:
1.10 richard 210: if e.label == qname:
1.8 richard 211: next = e.dest
1.4 richard 212: break
1.33 ht 213: if isinstance(e.label, AnyWrap):
1.17 richard 214: # XXX check the namespaces
1.13 richard 215: anynext = e.dest
1.33 ht 216: anylab = e.label
1.8 richard 217: if not next:
1.13 richard 218: if anynext:
219: n = anynext
1.17 richard 220: # this is no longer an error, but something more complicated is XXX
221: # if c.type:
222: # where(child.where)
223: # print "element matched <any> but had a type assigned"
224: # v = 0
225: # else:
226: # c.type = "<any>"
1.33 ht 227: c.type = anylab
1.13 richard 228: else:
1.33 ht 229: verror(c,"element %s not allowed here in element %s" % (qname, QName(None,element.local,element.uri)),schema)
230: fsm.printme(sys.stderr)
1.13 richard 231: else:
232: n = next
1.4 richard 233: if not n.isEndNode:
1.33 ht 234: verror(element,"content of %s is not allowed to end here" % element.name,
235: schema,1)
236: fsm.printme(sys.stderr)
237: return
1.2 ht 238:
239: def validateChildTypes(children, schema):
240: # validate each child element against its type, if we know it
241: # report an error if we don't know it and it's not in <any>
1.7 richard 242: v = 1
1.2 ht 243: for child in children:
244: if child.__class__ == Element:
1.33 ht 245: if child.type:
246: if child.type.__class__ == AnyWrap:
247: q = QName(None,child.local,child.uri)
248: vwarn(child,"allowing %s because it matched <any>" % q)
249: if child.type.any.processContents!='skip':
250: if schema.factory.schemas.has_key(child.uri):
251: # only try if we might win -- needs work
252: try:
253: e = schema.vElementTable[q]
254: except KeyError:
255: e=None
256: if e:
257: vwarn(None,"validating it against %s" % e)
258: validateElement(child, e.typeDefinition, schema)
259: elif child.type.any.processContents=='strict':
260: verror(child,"can't find a type for <any>-matching element %s" % q,schema)
261: else:
262: validateElement(child, child.type, schema)
1.2 ht 263: else:
1.33 ht 264: verror(child,
265: "undeclared element %s" % QName(None,child.local,child.uri),
266: schema)
1.2 ht 267:
1.21 ht 268: def validateKeys(decl,elt):
1.22 ht 269: elt.keyTabs={}
1.33 ht 270: validateKeys1(elt,decl.keys,1)
271: validateKeys1(elt,decl.uniques,0)
272: validateKeyRefs(elt,decl.keyrefs)
1.22 ht 273:
274: def validateKeys1(elt,kds,reqd):
275: for key in kds:
1.21 ht 276: tab={}
277: sp=xpath.XPath(key.selector)
1.24 ht 278: candidates=sp.find(elt)
1.21 ht 279: if candidates:
280: fps=map(lambda f:xpath.XPath(f),key.field)
281: for s in candidates:
1.22 ht 282: keyKey=buildKey(s,fps)
283: if reqd and not keyKey:
1.33 ht 284: verror(s,
285: "missing one or more fields %s from key %s"%(key.field,
286: key.name),
287: schema)
1.22 ht 288: break
1.21 ht 289: if len(keyKey)>1:
290: keyKey=tuple(keyKey)
291: else:
292: keyKey=keyKey[0]
293: if tab.has_key(keyKey):
1.33 ht 294: verror(s,"duplicate key %s, first appearance was"%str(keyKey),
295: key.schema)
1.21 ht 296: where(tab[keyKey].where)
297: else:
298: tab[keyKey]=s
1.22 ht 299: elt.keyTabs[key.name]=tab
300:
301: def buildKey(s,fps):
302: keyKey=[]
303: for fp in fps:
1.24 ht 304: kv=fp.find(s)
1.22 ht 305: if kv:
306: if len(kv)>1:
1.33 ht 307: vwarn(s,"oops, multiple field hits for %s at %s: %s"%(fp.str,s,kv))
1.22 ht 308: if isinstance(kv[0],Element):
309: if (len(kv[0].children)>0 and
310: isinstance(kv[0].children[0],Pcdata)):
311: keyKey.append(kv[0].children[0].value)
312: else:
313: # XPath says in this case value is the empty string
314: pass
315: elif type(kv[0])==types.StringType:
316: keyKey.append(kv[0])
317: else:
1.33 ht 318: vwarn(s,"oops, key value %s:%s"%(type(kv[0]),kv[0]))
1.22 ht 319: else:
320: return None
321: return keyKey
322:
323: def validateKeyRefs(elt,krds):
324: res=1
325: for ref in krds:
1.25 ht 326: if elt.keyTabs.has_key(ref.refer):
327: keyTab=elt.keyTabs[ref.refer]
328: if keyTab=='bogus':
329: break
330: else:
331: elt.keyTabs[ref.refer]='bogus'
1.33 ht 332: verror(ref.elt,
333: "No key or unique constraint named %s declared, refed by keyref %s"%(ref.refer,ref.name),
334: ref.schema)
1.25 ht 335: break
1.22 ht 336: sp=xpath.XPath(ref.selector)
1.24 ht 337: candidates=sp.find(elt)
1.22 ht 338: if candidates:
339: fps=map(lambda f:xpath.XPath(f),ref.field)
340: for s in candidates:
341: keyKey=buildKey(s,fps)
342: if not keyKey:
343: break
344: if len(keyKey)>1:
345: keyKey=tuple(keyKey)
346: else:
347: keyKey=keyKey[0]
1.25 ht 348: if not keyTab.has_key(keyKey):
1.33 ht 349: verror(s,"no key in %s for %s"%(ref.refer,str(keyKey)),ref.schema)
1.21 ht 350:
1.30 richard 351: def findSchemaLocs(element):
352: pairs = []
353: for a in element.attrs.values():
354: if a.uri == xsi and a.local == "schemaLocation":
355: scls=string.split(a.value)
356: while scls:
357: pairs.append((scls[0], scls[1]))
358: scls=scls[2:]
359: for c in element.children:
360: if isinstance(c, Element):
361: pairs = pairs + findSchemaLocs(c)
362: return pairs
363:
1.28 ht 364: def runit(en,rns=[]):
1.21 ht 365: global s,e,t
1.33 ht 366:
1.30 richard 367: s = None
1.33 ht 368:
369: sys.stderr.write("schema-validating %s using schemas %s\n"%(en,rns))
1.30 richard 370:
371: f=newFactory()
1.35 ! ht 372: ren=resolveURL(f.fileNames[0],en)
1.30 richard 373:
1.28 ht 374: if rns:
1.30 richard 375: s = fromFile(rns[0],f)
376: for rn in rns[1:]:
1.33 ht 377: ss=fromFile(rn,f)
378: else:
379: s = schema(f,None)
380: s.targetNS='##dummy'
1.30 richard 381:
1.33 ht 382: e=readXML(en) # error return?
383:
1.30 richard 384: schemaLocs = findSchemaLocs(e)
1.33 ht 385: sys.stderr.write("schemaLocations from instance: %s\n" % schemaLocs)
1.30 richard 386: for (ns, sl) in schemaLocs:
1.35 ! ht 387: checkinSchema(s, ns, sl,e,ren)
1.30 richard 388:
1.34 ht 389: if (e.uri and
390: (e.uri not in ('http://www.w3.org/XML/1998/namespace',
391: 'http://www.w3.org/1999/XMLSchema-instance')) and
392: not f.schemas.has_key(e.uri)):
393: try:
1.35 ! ht 394: checkinSchema(s,e.uri,e.uri,e,ren)
1.34 ht 395: sys.stderr.write("no schema yet for %s, trying namespace URI itself. . ."%
396: e.uri)
397: sys.stderr.write("ok.\n")
398: except XMLinter.error:
399: sys.stderr.write("no schema yet for %s, trying namespace URI itself. . ."%
400: e.uri)
401: sys.stderr.write("failed.\n")
1.30 richard 402:
1.33 ht 403: ecount=prepare(f)
404:
405: if ecount:
406: if k:
407: km="continuing"
408: else:
409: km="stopping without validating instance"
410: em="%d errors in schemas, %s"%(ecount,km)
411: if not k:
412: sys.stderr.write("%s\n"%em)
413: return
414: else:
415: em="Schema(s) OK"
416: sys.stderr.write("%s\n"%em)
1.30 richard 417:
1.28 ht 418: cl=string.find(':',e.name)
419: if cl>-1:
420: prefix=e.name[0:cl]
421: else:
422: prefix=''
1.33 ht 423: eltname = QName(prefix,e.local,e.uri)
1.30 richard 424:
425: if not s:
426: # any one will do
1.33 ht 427: s = f.sfors
428: t=None
429:
430: if s and s.vElementTable.has_key(eltname):
431: t=s.vElementTable[eltname].typeDefinition
432: if not t:
433: sys.stderr.write("can't validate, because can't find type for %s\n" % eltname)
434: return
435:
436: if e and s:
437: if t.name:
438: sys.stderr.write("validating with type %s\n" % t.name)
439: else:
440: sys.stderr.write("validating with anonymous type\n")
441: validate(e, t, s)
442: if s.factory.errors:
443: sys.stderr.write("%d validation errors\n" % s.factory.errors)
444: return 1
445: else:
446: sys.stderr.write("No errors\n")
447: return 0
1.30 richard 448:
1.33 ht 449: def verror(elt,message,schema,two=0):
450: sys.stderr.write("Validation error: ")
451: if two:
452: where(elt.where2)
453: else:
454: where(elt.where)
455: sys.stderr.write(" ")
456: sys.stderr.write(message)
457: sys.stderr.write("\n")
458: schema.factory.errors=schema.factory.errors+1
459:
460: def vwarn(elt,message):
461: sys.stderr.write("Validation warning: ")
462: if elt:
463: where(elt.where)
464: sys.stderr.write(message)
465: sys.stderr.write("\n")
466:
467: argl=sys.argv[1:]
468:
469: k=0
470: while argl:
471: if argl[0]=='-k':
472: k=1
1.21 ht 473: else:
1.33 ht 474: break
475: argl=argl[1:]
1.2 ht 476:
1.33 ht 477: if argl:
478: runit(argl[0],argl[1:])
1.3 aqw 479: else:
1.33 ht 480: runit("tiny.xml",["tiny.xsd"])
1.24 ht 481:
1.25 ht 482: # $Log: applyschema.py,v $
1.35 ! ht 483: #
! 484: # Revision 1.36 2000/04/21 09:32:21 ht
! 485: # another dose of resolveURL
1.34 ht 486: # use tiny only if run from command line
487: #
488: # Revision 1.35 2000/04/20 22:12:43 ht
1.33 ht 489: # use resolveURL on input, schemaLocs
490: #
491: # Revision 1.34 2000/04/20 15:45:08 ht
492: # better handling of use of ns uri for loc
493: #
494: # Revision 1.33 2000/04/20 14:26:59 ht
495: # merge in private and comp branches
496: #
497: # Revision 1.32.2.5 2000/04/20 14:25:54 ht
498: # merge in comp branch
499: #
500: # Revision 1.32.2.4.2.9 2000/04/20 14:22:39 ht
501: # manage document validation schema creation and search better
502: #
503: # Revision 1.32.2.4.2.8 2000/04/20 12:03:21 ht
504: # Remove a few lingering effectiveTypes
505: # Allow better for absent types etc.
506: #
507: # Revision 1.32.2.4.2.7 2000/04/14 21:18:27 ht
508: # minor attr names/path changes to track schema
509: #
510: # Revision 1.32.2.4.2.6 2000/04/13 23:04:39 ht
511: # allow for urType as simple type (?)
512: # track Any->AnyWrap change
513: #
514: # Revision 1.32.2.4.2.5 2000/04/12 17:29:37 ht
515: # begin work on model merger,
516: #
517: # Revision 1.32.2.4.2.4 2000/04/11 18:13:17 ht
518: # interpolate attributeUse between complexType and attributeDeclaration,
519: # parallel to particle
520: #
521: # Revision 1.32.2.4.2.3 2000/04/10 15:48:46 ht
522: # put modest attribute validation in place
523: #
524: # Revision 1.32.2.4.2.2 2000/04/09 16:13:26 ht
525: # working on complex type, attribute;
526: # back out component.qname
527: #
528: # Revision 1.32.2.4.2.1 2000/04/05 12:12:36 ht
529: # accommodate changes in schema.py
530: #
531: # Revision 1.32.2.4 2000/04/01 18:01:25 ht
532: # various minor compatibility fixes
533: #
534: # Revision 1.32.2.3 2000/03/25 12:12:27 ht
535: # restructure error handling/reporting;
536: # allow for switching 208 on and off
537: #
538: # Revision 1.32.2.2 2000/03/21 15:57:23 ht
539: # fix bug in skip,
1.32 ht 540: # allow 208 override
541: #
542: # Revision 1.32.2.1 2000/03/20 17:22:52 ht
543: # better coverage of <any>, including beginning of processcontents
544: #
545: # Revision 1.33 2000/03/20 17:20:53 ht
546: # better coverage of <any>, including beginning of processcontents
547: #
548: # Revision 1.32 2000/03/08 15:28:46 ht
549: # merge private branches back into public after 20000225 release
550: #
551: # Revision 1.31.2.3 2000/02/24 23:40:32 ht
552: # fix any bug
553: #
554: # Revision 1.31.2.2 2000/02/21 09:18:13 ht
555: # bug in <any> handling
556: #
1.31 richard 557: # Revision 1.31.2.1 2000/02/08 21:43:39 ht
558: # fork private branch to track internal drafts
559: # change calling sequence of checkinSchema
1.30 richard 560: #
561: # Revision 1.31.1.1 2000/02/08 13:54:25 ht
562: # fork branch for non-public changes
1.29 ht 563: # calling sequence to checkinSchema changed
564: #
565: # Revision 1.31 2000/01/13 16:55:42 richard
566: # Finally do something with xsi:type
1.28 ht 567: #
568: # Revision 1.30 2000/01/10 17:36:34 richard
569: # changes for xsi:schemaLocation
1.27 richard 570: #
571: # Revision 1.29 2000/01/08 23:33:50 ht
572: # towards support for xsi:schemaLocation
1.26 ht 573: #
574: # Revision 1.28 2000/01/08 12:07:38 ht
575: # Change command-line arg sequence in preparation for use of schemaLocation!!!!!
1.25 ht 576: # Add debug printout for schemaLocation for now
577: #
578: # Revision 1.27 2000/01/07 17:08:26 richard
579: # start on xsi:type
580: #
1.24 ht 581: # Revision 1.26 2000/01/06 14:59:38 ht
1.1 ht 582: # fix command line bug, display args on entry
583: #
584: # Revision 1.25 2000/01/06 14:38:56 ht
585: # detect cross-scope keyref and signal error
586: #
587: # Revision 1.24 2000/01/03 17:02:37 ht
588: # Include result of sub-ordinate key checking in overall result
589: # Accommodate new calling sequence for xpath.find
590: # add Log and Id
591: #
592: #
Webmaster