Annotation of xmlschema/applyschema.py, revision 1.31

1.24      ht          1: # actually apply a schema to an instance
1.31    ! richard     2: # $Id: applyschema.py,v 1.30 2000/01/10 17:36:34 richard Exp $
1.24      ht          3: 
1.2       ht          4: from PyXML import *
                      5: from XML import *
                      6: from schema import *
1.28      ht          7: import layer
1.3       aqw         8: import sys
1.4       richard     9: import re
1.18      ht         10: import xpath
1.20      ht         11: import types
1.4       richard    12: 
                     13: whitespace = re.compile("^[ \t\r\n]*$")
1.27      richard    14: xsi = "http://www.w3.org/1999/XMLSchema/instance"
1.2       ht         15: 
1.6       richard    16: def where(w):
                     17:   if w:
                     18:     print "In %s at line %d char %d of %s:" % w
1.5       richard    19: 
1.2       ht         20: def readXML(url):
1.14      ht         21:   input = Open(url, NSL_read|NSL_read_namespaces|NSL_read_defaulted_attributes)
1.5       richard    22: #  item = GetNextQueryItem(input, ParseQuery(input.doctype, "."))
                     23: #  elem = Element(item, input.doctype)
                     24:   elem = Element(input, 1)
1.2       ht         25:   Close(input)
                     26:   return elem
                     27: 
1.20      ht         28: def validate(element, typedef, schema):
                     29:   res=validateElement(element, typedef, schema)
1.18      ht         30:   if schema.keys:
                     31:     # hack
                     32:     x=Element("#root")
                     33:     x.children.append(element)
1.21      ht         34:     res=validateKeys(schema,x) and res
1.18      ht         35:   return res
1.2       ht         36: 
1.21      ht         37: def validateElement(element, type, schema,eltDecl=None):
1.4       richard    38:   global vel, vtype
                     39:   vel = element
                     40:   vtype = type
1.2       ht         41: #  print "validating element %s against %s" % (element.name, type)
1.31    ! richard    42:   if element.nsattrs.has_key((xsi, "type")):
        !            43:     t = element.nsattrs[(xsi, "type")].value;
        !            44:     try:
        !            45:       qt = QName(t, element.nsdict)
        !            46:     except SchemaError:
        !            47:       print "namespace not found for xsi:type %s" % t
        !            48:       return 0
        !            49:     if schema.vComplexTypeTable.has_key(qt):
        !            50:       xsitype=schema.vComplexTypeTable[qt]
        !            51:     elif schema.vSimpleTypeTable.has_key(qt):
        !            52:       xsitype=schema.vSimpleTypeTable[qt]
        !            53:     else:
        !            54:       print "xsi:type %s undefined" % qt
        !            55:       return 0
        !            56:     if not xsitype.isSubtype(type):
        !            57:       print "xsi:type %s is not a subtype of the declared type %s" % (qt, type.name)
        !            58:       return 0
        !            59:     print "using xsi:type %s instead of original %s" % (qt, type.name)
        !            60:     type = xsitype.effectiveType
1.4       richard    61:   if isinstance(type, AbInitio):
1.2       ht         62:     return validateElementSimple(element, type, schema)
1.9       ht         63:   v1 = assignAttributeTypes(element, type.attrTable, type.extendable, schema)
1.7       richard    64:   if v1:
                     65:     v1 = validateAttributeTypes(element.attrs, type.attrTable, schema)
1.12      richard    66: #  print "assigning types for %s" % element.name
1.7       richard    67:   v2 = assignChildTypes(element.children, type.elementTable(), type.extendable, schema)
1.13      richard    68:   # we must look at the content model before checking the types, so that
                     69:   # we know which children matched <any>
                     70:   v3 = validateContentModel(element, type, schema)
1.7       richard    71:   if v2:
                     72:     v2 = validateChildTypes(element.children, schema)
1.21      ht         73:   eqn=QName(None,element.local,element.uri)
                     74:   if not eltDecl and s.vElementTable.has_key(eqn):
                     75:     eltDecl=s.vElementTable[eqn]
                     76:   if eltDecl:
                     77:     v4=validateKeys(eltDecl,element)
1.24      ht         78:   else:
                     79:     v4=1
                     80:   return v1 and v2 and v3 and v4
1.2       ht         81: 
                     82: def validateElementSimple(element, type, schema):
                     83:   # check that:
                     84:   #   it has no attributes
                     85:   #   it has one pcdata child, and if so
                     86:   #     the text of the pcdata matches the type
                     87:   name = element.name
                     88:   if element.attrs:
1.6       richard    89:     where(element.where)
1.2       ht         90:     print "element %s has attributes %s but has type %s" % (name, element.attrs, type)
1.7       richard    91:     return 0
1.2       ht         92:   return validateTextModel(element, type, schema)
                     93: 
                     94: def validateText(text, type, schema):
                     95:   return 1
                     96: 
1.9       ht         97: def assignAttributeTypes(element, attrdefs, extendable, schema):
1.2       ht         98:   # look up each attribute in attrdefs and assign its type
                     99:   # error if attr declaration is not found and type is not extendable
1.15      richard   100: #  print "assigning attrs for %s {%s}%s" % (element.name, element.uri, element.local)
1.7       richard   101:   v = 1
1.9       ht        102:   for a in element.attrs.values():
1.15      richard   103: #    print "assigning attr %s {%s}%s" % (a.name, a.uri, a.local)
1.16      ht        104:     an=QName(None,a.local,a.uri)
1.27      richard   105:     if a.uri == xsi:
                    106:       if a.local == "type":
1.31    ! richard   107:         # we've already handled it
        !           108:         pass
1.27      richard   109:       elif a.local == "schemaLocation":
1.30      richard   110:         # we've already handled it
                    111:         pass
1.27      richard   112:       else:
                    113:         print "unknown xsi attribute %s" % an
                    114:         v = 0
                    115:     elif attrdefs.has_key(an):
1.14      ht        116:       a.type = attrdefs[an].effectiveType
1.17      richard   117:     elif attrdefs.has_key("#any"):
                    118:       # XXX check the namespaces
                    119:       print "allowing undeclared attribute %s because anyAttribute(%s)" % (an, attrdefs["#any"])
                    120:       a.type = None
1.2       ht        121:     else:
1.9       ht        122:       where(element.where)
1.14      ht        123:       print "undeclared attribute %s" % an
1.2       ht        124:       a.type = None
1.7       richard   125:       v = 0
                    126:   return v
1.2       ht        127: 
                    128: def validateAttributeTypes(attrs, attrdefs, schema):
                    129:   # check that each attribute matches its type
                    130:   # check that all required attributes are present
                    131:   # add defaulted attributes (shouldn't need to check their types)
                    132:   return 1
                    133: 
                    134: def assignChildTypes(children, elementTable, extendable, schema):
                    135:   # look up each child tag and record the type
                    136:   # (it may not be an error if it is not declared; we don't know that
                    137:   #  until we see what it matches in the content model)
                    138:   for child in children:
                    139:     if child.__class__ == Element:
1.10      richard   140:       qname = QName(None,child.local,child.uri)
                    141:       if elementTable.has_key(qname):
                    142:        child.type = elementTable[qname][1]
1.2       ht        143:       else:
                    144:        child.type = None
                    145:   return 1
                    146: 
                    147: def validateContentModel(element, type, schema):
                    148:   # trace a path through the content model
                    149:   # if a child matches an <any tag=... type=...> we need to indicate
                    150:   # that that child should be validated with its xsd:type if it has one
                    151:   # if a child matches some other kind of <any> we need to indicate
                    152:   # that it's not an error if we can't find its type
                    153: 
1.4       richard   154: #  print "validating model for %s content %s" % (element.name, type.content)
1.2       ht        155:   if type.content == "empty":
                    156:     return validateEmptyModel(element, type, schema)
                    157:   elif type.content == "textOnly":
                    158:     return validateTextModel(element, type, schema)
                    159: 
1.4       richard   160:   return validateElementModel(element, type.fsm, 
                    161:                              type.content == "mixed", schema)
1.2       ht        162: 
                    163: def validateEmptyModel(element, type, schema):
                    164:   if len(element.children) != 0:
1.6       richard   165:     where(element.where)
1.2       ht        166:     print "element %s must be empty but is not" % element.name
1.7       richard   167:     return 0
1.2       ht        168:   return 1
                    169: 
                    170: def validateTextModel(element, type, schema):
                    171:   # check that:
                    172:   #   it has one pcdata child, and if so
                    173:   #     the text of the pcdata matches the type
                    174:   name = element.name
                    175:   n = len(element.children)
                    176:   if n > 1:
1.6       richard   177:     where(element.where)
1.2       ht        178:     print "element %s has %s (> 1) children but has type %s" % (name, n, type)
1.7       richard   179:     return 0
1.2       ht        180:   elif n > 0 and element.children[0].__class__ != Pcdata:
1.6       richard   181:     where(element.where)
1.2       ht        182:     print "element %s has non-text children but has type %s" % (name, type)
1.7       richard   183:     return 0
1.2       ht        184:   else:
                    185:     if n == 0:
                    186:       text = ""
                    187:     else:
                    188:       text = element.children[0].value
                    189:     validateText(text, type, schema)
                    190: 
                    191:   return 1
                    192: 
1.4       richard   193: def validateElementModel(element, fsm, mixed, schema):
                    194: #  print "validating element model for %s" % element.name
1.13      richard   195:   v = 1
1.4       richard   196:   n = fsm.startNode
                    197:   for c in element.children:
                    198:     if c.__class__ == Pcdata:
1.19      ht        199:       if (not mixed) and (not whitespace.match(c.value)):
1.6       richard   200:        where(c.where)
1.19      ht        201:        print "text not allowed in element %s: |%s|" % (element.name,c.value)
1.4       richard   202:        return 0
                    203:     elif c.__class__ == Element:
1.10      richard   204:       qname = QName(None, c.local, c.uri)
1.8       richard   205:       next = None
1.13      richard   206:       anynext = None
1.4       richard   207:       for e in n.edges:
1.10      richard   208:         if e.label == qname:
1.8       richard   209:          next = e.dest
1.4       richard   210:          break
1.8       richard   211:         if isinstance(e.label, Gensym):
1.17      richard   212:           # XXX check the namespaces
1.13      richard   213:           anynext = e.dest
1.8       richard   214:       if not next:
1.13      richard   215:         if anynext:
                    216:           n = anynext
1.17      richard   217: # this is no longer an error, but something more complicated is XXX
                    218: #          if c.type:
                    219: #            where(child.where)
                    220: #            print "element matched <any> but had a type assigned"
                    221: #            v = 0
                    222: #          else:
                    223: #            c.type = "<any>"
                    224:           c.type = "<any>"
1.13      richard   225:         else:
                    226:           where(c.where)
                    227:           print "element %s not allowed here in element %s" % (qname, QName(None,element.local,element.uri))
                    228:           fsm.printme()
                    229:           return 0
                    230:       else:
                    231:         n = next
1.4       richard   232:   if not n.isEndNode:
1.6       richard   233:     where(element.where2)
1.4       richard   234:     print "content of %s is not allowed to end here" % element.name
1.13      richard   235:     fsm.printme()
1.7       richard   236:     return 0
1.13      richard   237:   return v
1.2       ht        238: 
                    239: def validateChildTypes(children, schema):
                    240:   # validate each child element against its type, if we know it
                    241:   # report an error if we don't know it and it's not in <any>
1.7       richard   242:   v = 1
1.2       ht        243:   for child in children:
                    244:     if child.__class__ == Element:
1.13      richard   245:       if child.type == "<any>":
                    246:         q = QName(prefix,child.local,child.uri)
                    247:         print "allowing %s because it matched <any>" % q
1.19      ht        248:        if schema.factory.schemas.has_key(child.uri):
                    249:          # only try if we might win -- needs work
                    250:          e = schema.vElementTable[q]
                    251:          if e:
                    252:            print "validating it against %s" % e
                    253:            if not validateElement(child, e.effectiveType, schema):
                    254:              v = 0
                    255:          else:
                    256:            where(child.where)
                    257:            print "can't for a type for <any>-matching element %s" % q
                    258:            v = 0
1.13      richard   259:       elif child.type:
1.7       richard   260:         if not validateElement(child, child.type, schema):
                    261:           v = 0
1.2       ht        262:       else:
1.6       richard   263:        where(child.where)
1.12      richard   264:        print "undeclared element %s" % QName(None,child.local,child.uri)
1.7       richard   265:         v = 0
                    266:   return v
1.2       ht        267: 
1.21      ht        268: def validateKeys(decl,elt):
1.22      ht        269:   elt.keyTabs={}
                    270:   r1=validateKeys1(elt,decl.keys,1)
                    271:   r2=validateKeys1(elt,decl.uniques,0)
                    272:   r3=validateKeyRefs(elt,decl.keyrefs)
                    273:   return r1 and r2 and r3
                    274: 
                    275: def validateKeys1(elt,kds,reqd):
1.21      ht        276:   res=1
1.22      ht        277:   for key in kds:
1.21      ht        278:     tab={}
                    279:     sp=xpath.XPath(key.selector)
1.24      ht        280:     candidates=sp.find(elt)
1.21      ht        281:     if candidates:
                    282:       fps=map(lambda f:xpath.XPath(f),key.field)
                    283:       for s in candidates:
1.22      ht        284:         keyKey=buildKey(s,fps)
                    285:         if reqd and not keyKey:
1.24      ht        286:           where(s.where)
                    287:           print "missing one or more fields %s from key %s"%(key.field,
                    288:                                                              key.name)
1.22      ht        289:           res=0
                    290:           break
1.21      ht        291:        if len(keyKey)>1:
                    292:          keyKey=tuple(keyKey)
                    293:        else:
                    294:          keyKey=keyKey[0]
                    295:        if tab.has_key(keyKey):
1.23      ht        296:          where(s.where)
                    297:          print "duplicate key %s, first appearance was"%str(keyKey),
1.21      ht        298:          where(tab[keyKey].where)
                    299:          res=0
                    300:        else:
                    301:          tab[keyKey]=s
1.22      ht        302:     elt.keyTabs[key.name]=tab
                    303:   return res
                    304: 
                    305: def buildKey(s,fps):
                    306:   keyKey=[]
                    307:   for fp in fps:
1.24      ht        308:     kv=fp.find(s)
1.22      ht        309:     if kv:
                    310:       if len(kv)>1:
                    311:         print "oops, multiple field hits for %s at %s: %s"%(fp.str,s,kv)
                    312:       if isinstance(kv[0],Element):
                    313:         if (len(kv[0].children)>0 and
                    314:             isinstance(kv[0].children[0],Pcdata)):
                    315:           keyKey.append(kv[0].children[0].value)
                    316:         else:
                    317:           # XPath says in this case value is the empty string
                    318:           pass
                    319:       elif type(kv[0])==types.StringType:
                    320:         keyKey.append(kv[0])
                    321:       else:
                    322:         print "oops, key value %s:%s"%(type(kv[0]),kv[0])
                    323:     else:
                    324:       return None
                    325:   return keyKey
                    326: 
                    327: def validateKeyRefs(elt,krds):
                    328:   res=1
                    329:   for ref in krds:
1.25      ht        330:     if elt.keyTabs.has_key(ref.refer):
                    331:       keyTab=elt.keyTabs[ref.refer]
                    332:       if keyTab=='bogus':
                    333:        break
                    334:     else:
                    335:       elt.keyTabs[ref.refer]='bogus'
                    336:       print "No key or unique constraint named %s declared, refed by keyref %s"%(ref.refer,ref.name)
                    337:       res=0
                    338:       break
1.22      ht        339:     sp=xpath.XPath(ref.selector)
1.24      ht        340:     candidates=sp.find(elt)
1.22      ht        341:     if candidates:
                    342:       fps=map(lambda f:xpath.XPath(f),ref.field)
                    343:       for s in candidates:
                    344:         keyKey=buildKey(s,fps)
                    345:         if not keyKey:
                    346:           break
                    347:        if len(keyKey)>1:
                    348:          keyKey=tuple(keyKey)
                    349:        else:
                    350:          keyKey=keyKey[0]
1.25      ht        351:        if not keyTab.has_key(keyKey):
1.23      ht        352:          where(s.where)
1.22      ht        353:          print "no key in %s for %s"%(ref.refer,str(keyKey))
                    354:          res=0
1.21      ht        355:   return res
                    356: 
1.30      richard   357: def findSchemaLocs(element):
                    358:   pairs = []
                    359:   for a in element.attrs.values():
                    360:     if a.uri == xsi and a.local == "schemaLocation":
                    361:       scls=string.split(a.value)
                    362:       while scls:
                    363:         pairs.append((scls[0], scls[1]))
                    364:         scls=scls[2:]
                    365:   for c in element.children:
                    366:     if isinstance(c, Element):
                    367:       pairs = pairs + findSchemaLocs(c)
                    368:   return pairs
                    369:   
1.28      ht        370: def runit(en,rns=[]):
1.21      ht        371:   global s,e,t
1.30      richard   372:   s = None
                    373:   
1.28      ht        374:   print "schema-validating %s using schemas %s"%(en,rns)
1.30      richard   375: 
                    376:   f=newFactory()
                    377: 
                    378:   e=readXML(en)
                    379: 
1.28      ht        380:   if rns:
1.30      richard   381:     s = fromFile(rns[0],f)
                    382:     for rn in rns[1:]:
                    383:       fromFile(rn,f)
                    384: 
                    385:   schemaLocs = findSchemaLocs(e)
                    386:   print "schemaLocations from instance: %s" % schemaLocs
                    387:   for (ns, sl) in schemaLocs:
                    388:     checkinSchema(f, ns, sl)
                    389:   
                    390:   if not rns and not schemaLocs:
                    391:     print "didn't find any schemas!"
                    392:     
                    393:   prepare(f)
                    394: 
1.28      ht        395:   cl=string.find(':',e.name)
                    396:   if cl>-1:
                    397:     prefix=e.name[0:cl]
                    398:   else:
                    399:     prefix=''
                    400:   typename = QName(prefix,e.local,e.uri)
1.30      richard   401: 
                    402:   if not s:
                    403:     # any one will do
                    404:     s = f.schema
                    405: 
1.28      ht        406:   if s and s.vElementTable.has_key(typename):
                    407:     t=s.vElementTable[typename].effectiveType
1.21      ht        408:   else:
1.28      ht        409:     print "can't validate, because can't find type for %s" % typename
                    410:     t=None
1.27      richard   411:   if e and t and s:
1.31    ! richard   412:     print "validating with type %s" % typename
1.27      richard   413:     print "validate returns %s" % validate(e, t, s)
1.2       ht        414: 
1.3       aqw       415: if len(sys.argv)>1:
1.28      ht        416:   runit(sys.argv[1],sys.argv[2:])
1.3       aqw       417: else:
1.28      ht        418:   runit("triv.xml",["triv.xsd"])
1.24      ht        419: 
1.25      ht        420: # $Log: applyschema.py,v $
1.31    ! richard   421: # Revision 1.31.2.1  2000/02/08 21:43:39  ht
        !           422: # fork private branch to track internal drafts
        !           423: # change calling sequence of checkinSchema
1.30      richard   424: #
                    425: # Revision 1.31.1.1  2000/02/08 13:54:25  ht
                    426: # fork branch for non-public changes
1.29      ht        427: # calling sequence to checkinSchema changed
                    428: #
                    429: # Revision 1.31  2000/01/13 16:55:42  richard
                    430: # Finally do something with xsi:type
1.28      ht        431: #
                    432: # Revision 1.30  2000/01/10 17:36:34  richard
                    433: # changes for xsi:schemaLocation
1.27      richard   434: #
                    435: # Revision 1.29  2000/01/08 23:33:50  ht
                    436: # towards support for xsi:schemaLocation
1.26      ht        437: #
                    438: # Revision 1.28  2000/01/08 12:07:38  ht
                    439: # Change command-line arg sequence in preparation for use of schemaLocation!!!!!
1.25      ht        440: # Add debug printout for schemaLocation for now
                    441: #
                    442: # Revision 1.27  2000/01/07 17:08:26  richard
                    443: # start on xsi:type
                    444: #
1.24      ht        445: # Revision 1.26  2000/01/06 14:59:38  ht
1.1       ht        446: # fix command line bug, display args on entry
                    447: #
                    448: # Revision 1.25  2000/01/06 14:38:56  ht
                    449: # detect cross-scope keyref and signal error
                    450: #
                    451: # Revision 1.24  2000/01/03 17:02:37  ht
                    452: # Include result of sub-ordinate key checking in overall result
                    453: # Accommodate new calling sequence for xpath.find
                    454: # add Log and Id
                    455: #
                    456: #

Webmaster