Annotation of xmlschema/applyschema.py, revision 1.66.2.1

1.44      ht          1: # Copyright (C) 2000 LTG -- See accompanying COPYRIGHT and COPYING files
1.24      ht          2: # actually apply a schema to an instance
1.66.2.1! ht          3: # $Id: applyschema.py,v 1.67 2000/08/31 11:48:41 ht Exp $
1.58      ht          4: 
                      5: # TODO: enforce datatype constraints on xsi:type,
                      6: #   null, (noNamespace)schemaLocation
1.24      ht          7: 
1.63      ht          8: from PyLTXML import *
1.2       ht          9: from XML import *
1.58      ht         10: import os
1.41      ht         11: import XMLSchema
1.28      ht         12: import layer
1.3       aqw        13: import sys
1.4       richard    14: import re
1.18      ht         15: import xpath
1.20      ht         16: import types
1.41      ht         17: import string
1.53      ht         18: from urlparse import urljoin
1.58      ht         19: import tempfile
1.62      ht         20: import traceback
1.4       richard    21: 
                     22: whitespace = re.compile("^[ \t\r\n]*$")
1.33      ht         23: xsi = "http://www.w3.org/1999/XMLSchema-instance"
1.66.2.1! ht         24: vsraw="$Revision: 1.67 $ of $Date: 2000/08/31 11:48:41 $"
1.40      ht         25: vss=string.split(vsraw)
1.41      ht         26: vs="XSV %s/%s of %s %s"%(string.split(XMLSchema.versionString)[0],
1.40      ht         27:                          vss[1],vss[5],vss[6])
1.5       richard    28: 
1.2       ht         29: def readXML(url):
1.14      ht         30:   input = Open(url, NSL_read|NSL_read_namespaces|NSL_read_defaulted_attributes)
1.5       richard    31: #  item = GetNextQueryItem(input, ParseQuery(input.doctype, "."))
                     32: #  elem = Element(item, input.doctype)
1.58      ht         33:   try:
                     34:     elem = Element(input, 1)
                     35:   except:
                     36:     elem=None
                     37:   if elem:
                     38:     if input.doctype.encoding=='unknown':
                     39:       encoding=input.doctype.xencoding
                     40:     else:
                     41:       encoding=input.doctype.encoding
                     42:   else:
                     43:     encoding=None
1.2       ht         44:   Close(input)
1.58      ht         45:   return (elem,encoding)
1.2       ht         46: 
1.55      ht         47: def validate(element, typedef, schema, eltDecl):
1.58      ht         48:   if not hasattr(schema.factory,'errors'):
                     49:     schema.factory.errors=0
1.55      ht         50:   validateElement(element, typedef, schema, eltDecl)
1.33      ht         51:   return schema.factory.errors
1.2       ht         52: 
1.55      ht         53: def validateElement(element, type, schema, eltDecl=None):
1.4       richard    54:   global vel, vtype
                     55:   vel = element
                     56:   vtype = type
1.2       ht         57: #  print "validating element %s against %s" % (element.name, type)
1.47      richard    58:   if not eltDecl:
                     59:     eqn=XMLSchema.QName(None,element.local,element.uri)
                     60:     if s.vElementTable.has_key(eqn):
                     61:       eltDecl=s.vElementTable[eqn]
1.58      ht         62:   nullable = eltDecl and eltDecl.nullable # TODO: is this right if no eltDecl?
1.48      richard    63:   nulled = 0
                     64:   if element.nsattrs.has_key((xsi, "null")):
                     65:     if not nullable:
1.58      ht         66:       verror(element,
                     67:              "xsi:null specified on non-nullable element %s" % element.name,
                     68:              schema,"cvc-elt.1.1")
1.48      richard    69:       return
                     70:     nulled = (element.nsattrs[(xsi, "null")].value == "true")
1.31      richard    71:   if element.nsattrs.has_key((xsi, "type")):
                     72:     t = element.nsattrs[(xsi, "type")].value;
1.60      ht         73:     qt = XMLSchema.QName(t, element.nsdict)
1.57      ht         74:     if schema.vTypeTable.has_key(qt):
                     75:       xsitype=schema.vTypeTable[qt]
1.31      richard    76:     else:
1.58      ht         77:       verror(element,"xsi:type %s undefined" % qt,schema,"cvc-elt.2.2")
1.33      ht         78:       return
1.61      ht         79:     if type and not xsitype.isSubtype(type):
1.58      ht         80:       verror(element,
                     81:            "xsi:type %s is not a subtype of the declared type %s"%(qt,
                     82:                                                                    type.name),
                     83:              schema,"cvc-elt.2.3")
1.33      ht         84:       return
1.61      ht         85:     if type:
                     86:       vwarn(element,
                     87:             "using xsi:type %s instead of original %s" % (qt, type.name))
                     88:     else:
                     89:       vwarn(element,"using xsi:type %s" % qt)
1.33      ht         90:     type = xsitype
1.55      ht         91:   lax = not type
                     92:   # might have none in case of recursive call inside <any/>, or at top level
1.48      richard    93:   if nulled:
                     94:     return validateElementNull(element, type, schema)
1.55      ht         95:   if type:
1.58      ht         96:     # TODO: check type is not abstract
                     97:     if ((not type==XMLSchema.urType) and
                     98:         (isinstance(type, XMLSchema.AbInitio) or
                     99:          isinstance(type, XMLSchema.SimpleType))):
1.66      ht        100:       validateElementSimple(element, type, schema)
1.58      ht        101:       if eltDecl:
                    102:         validateKeys(eltDecl,element)
                    103:       return
                    104:     # a complexType
1.55      ht        105:     ad=type.attributeDeclarations
                    106:     ps=type.prohibitedSubstitutions
                    107:     et=type.elementTable
                    108:   else:
                    109:     ps=[]
                    110:     ad={}
                    111:     et={}
                    112:   assignAttributeTypes(element, ad, ps, schema, lax)
                    113:   validateAttributeTypes(element, element.attrTable, ad, schema)
                    114: #  print "assigning types for %s" % element.name
                    115:   assignChildTypes(element.children, et, ps, schema, lax)
                    116:   # we must look at the content model before checking the types, so that
                    117:   # we know which children matched <any>
                    118:   if type:
1.33      ht        119:     validateContentModel(element, type, schema)
1.55      ht        120:   validateChildTypes(element.children, schema, lax)
1.21      ht        121:   if eltDecl:
1.33      ht        122:     validateKeys(eltDecl,element)
1.2       ht        123: 
1.48      richard   124: def validateElementNull(element, type, schema):
                    125:   if len(element.children) != 0:
1.58      ht        126:     verror(element,"element %s is nulled but is not empty" % element.name,
                    127:            schema,"cvc-elt.1.2.1")
                    128:   # TODO: should check for fixed value constraint
1.48      richard   129: 
1.2       ht        130: def validateElementSimple(element, type, schema):
                    131:   # check that:
1.47      richard   132:   #   it has no attributes (except xsi: ones)
1.2       ht        133:   #   it has one pcdata child, and if so
                    134:   #     the text of the pcdata matches the type
                    135:   if element.attrs:
1.47      richard   136:     for a in element.attrs.values():
                    137:       if a.uri != xsi:
1.58      ht        138:         verror(element,
                    139:                "element {%s}%s with simple type not allowed attributes"%
                    140:                (element.uri, element.name),
                    141:                schema,"cvc-elt.4.1.1")
1.47      richard   142:         return
1.2       ht        143:   return validateTextModel(element, type, schema)
                    144: 
1.55      ht        145: def assignAttributeTypes(element, attrdefs, extendable, schema, lax):
1.2       ht        146:   # look up each attribute in attrdefs and assign its type
                    147:   # error if attr declaration is not found and type is not extendable
1.15      richard   148: #  print "assigning attrs for %s {%s}%s" % (element.name, element.uri, element.local)
1.33      ht        149:   element.attrTable={}
1.9       ht        150:   for a in element.attrs.values():
1.15      richard   151: #    print "assigning attr %s {%s}%s" % (a.name, a.uri, a.local)
1.41      ht        152:     an=XMLSchema.QName(None,a.local,a.uri)
1.33      ht        153:     element.attrTable[an]=a
1.27      richard   154:     if a.uri == xsi:
1.43      ht        155:       if a.local not in ('type','null','schemaLocation','noNamespaceSchemaLocation'):
1.58      ht        156:         verror(element,"unknown xsi attribute %s" % an,schema,
                    157:                "cvc-complex-type.1.3")
1.27      richard   158:     elif attrdefs.has_key(an):
1.43      ht        159:       a.type = attrdefs[an].attributeDeclaration
1.55      ht        160:     elif lax:
                    161:       if a.uri and schema.vAttributeTable.has_key(an):
                    162:         a.type=schema.vAttributeTable[an]
                    163:       else:
                    164:         a.type=None
1.43      ht        165:     elif (attrdefs.has_key("#any") and
                    166:           attrdefs["#any"].attributeDeclaration.allows(a.uri)):
                    167:       a.type = attrdefs["#any"].attributeDeclaration
1.2       ht        168:     else:
1.58      ht        169:       verror(element,"undeclared attribute %s" % an,schema,
                    170:                "cvc-complex-type.1.3")
1.2       ht        171:       a.type = None
1.33      ht        172:   return
1.2       ht        173: 
1.33      ht        174: def validateAttributeTypes(element,attrs, attrdefs, schema):
1.2       ht        175:   # check that each attribute matches its type
                    176:   # check that all required attributes are present
                    177:   # add defaulted attributes (shouldn't need to check their types)
1.33      ht        178:   for (adq,ad) in attrdefs.items():
                    179:     if ad.minOccurs==1 and not attrs.has_key(adq):
1.58      ht        180:       verror(element,"required attribute %s not present"%adq,schema,
                    181:              'cvc-complex-type.1.4')
1.33      ht        182:   for (an,a) in attrs.items():
                    183:     if an.uri!=xsi and a.type:
1.43      ht        184:       if isinstance(a.type,XMLSchema.Wildcard):
                    185:         res=a.type.validate(a,schema,'attribute',element)
                    186:       else:
1.58      ht        187:         if a.type.typeDefinition:
                    188:           res=a.type.typeDefinition.validateText(a.value,element, schema)
                    189:         else:
                    190:           res=None
1.33      ht        191:       if res:
                    192:         verror(element,"attribute type check failed for %s: %s%s"%(an,
                    193:                                                                    a.value,
                    194:                                                                    res),
1.58      ht        195:                schema,'cvc-attribute.1.2')
1.2       ht        196: 
1.55      ht        197: def assignChildTypes(children, elementTable, extendable, schema, lax):
1.2       ht        198:   # look up each child tag and record the type
                    199:   # (it may not be an error if it is not declared; we don't know that
                    200:   #  until we see what it matches in the content model)
                    201:   for child in children:
1.58      ht        202:     if isinstance(child,Element):
1.41      ht        203:       qname = XMLSchema.QName(None,child.local,child.uri)
1.10      richard   204:       if elementTable.has_key(qname):
                    205:        child.type = elementTable[qname][1]
1.55      ht        206:       elif lax and child.uri and schema.vElementTable.has_key(qname):
                    207:         child.type=schema.vElementTable[qname].typeDefinition
1.2       ht        208:       else:
                    209:        child.type = None
                    210:   return 1
                    211: 
                    212: def validateContentModel(element, type, schema):
                    213:   # trace a path through the content model
                    214:   # if a child matches an <any tag=... type=...> we need to indicate
                    215:   # that that child should be validated with its xsd:type if it has one
                    216:   # if a child matches some other kind of <any> we need to indicate
                    217:   # that it's not an error if we can't find its type
                    218: 
1.4       richard   219: #  print "validating model for %s content %s" % (element.name, type.content)
1.33      ht        220:   if type.contentType == "empty":
                    221:     validateEmptyModel(element, type, schema)
                    222:   elif type.contentType == "textOnly":
                    223:     validateTextModel(element, type.model, schema)
                    224:   else:
                    225:     validateElementModel(element, type.fsm,
                    226:                          type.contentType == "mixed", schema)
1.2       ht        227: 
                    228: def validateEmptyModel(element, type, schema):
                    229:   if len(element.children) != 0:
1.58      ht        230:     verror(element,"element %s must be empty but is not" % element.name,schema,
                    231:            "cvc-complex-type.1.2")
1.2       ht        232: 
                    233: def validateTextModel(element, type, schema):
                    234:   # check that:
                    235:   #   it has one pcdata child, and if so
                    236:   #     the text of the pcdata matches the type
                    237:   name = element.name
1.58      ht        238:   n=0
                    239:   for child in element.children:
                    240:     if isinstance(child,Pcdata):
                    241:       n=1
                    242:     elif isinstance(child,Element):
                    243:       verror(element,
                    244:              "element {%s}%s with simple type not allowed element children"%
                    245:              (element.uri,name),schema,"cvc-complex-type.1.2.2")
1.66      ht        246:       # TODO: mark this (and any others) as not validated
1.62      ht        247:       return
1.2       ht        248:   else:
                    249:     if n == 0:
                    250:       text = ""
                    251:     else:
                    252:       text = element.children[0].value
1.58      ht        253:     res=type.validateText(text, element, schema)
1.33      ht        254:     if res:
                    255:       verror(element,"element content failed type check: %s%s"%(text,res),
1.58      ht        256:              schema,"cvc-complex-type.1.2.2")
1.2       ht        257: 
1.4       richard   258: def validateElementModel(element, fsm, mixed, schema):
1.33      ht        259:   #  print "validating element model for %s" % element.name
1.4       richard   260:   n = fsm.startNode
                    261:   for c in element.children:
1.58      ht        262:     if isinstance(c,Pcdata):
1.19      ht        263:       if (not mixed) and (not whitespace.match(c.value)):
1.58      ht        264:        verror(c,
                    265:                "text not allowed in element %s: |%s|" %
                    266:                (element.name,c.value),
                    267:                schema,"cvc-complex-type.1.2.3")
1.33      ht        268:        return
1.58      ht        269:     elif isinstance(c,Element):
1.41      ht        270:       qname = XMLSchema.QName(None, c.local, c.uri)
1.8       richard   271:       next = None
1.13      richard   272:       anynext = None
1.4       richard   273:       for e in n.edges:
1.10      richard   274:         if e.label == qname:
1.8       richard   275:          next = e.dest
1.4       richard   276:          break
1.43      ht        277:         if isinstance(e.label, XMLSchema.Wildcard):
1.41      ht        278:           if e.label.allows(c.uri):
                    279:             anynext = e.dest
                    280:             anylab = e.label
1.8       richard   281:       if not next:
1.13      richard   282:         if anynext:
                    283:           n = anynext
1.17      richard   284: # this is no longer an error, but something more complicated is XXX
                    285: #          if c.type:
                    286: #            where(child.where)
                    287: #            print "element matched <any> but had a type assigned"
                    288: #            v = 0
                    289: #          else:
                    290: #            c.type = "<any>"
1.33      ht        291:           c.type = anylab
1.13      richard   292:         else:
1.58      ht        293:           verror(c,
                    294:                  "element %s not allowed here in element %s:\n"%
                    295:                  (qname, XMLSchema.QName(None,element.local,element.uri)),
                    296:                  schema,"cvc-complex-type.1.2.4",0,fsm.asXML())
1.13      richard   297:       else:
                    298:         n = next
1.4       richard   299:   if not n.isEndNode:
1.58      ht        300:     verror(element,
                    301:            "content of %s is not allowed to end here:\n"%
                    302:            element.name,
                    303:            schema,"cvc-complex-type.1.2.4",1,fsm.asXML())
1.33      ht        304:   return
1.2       ht        305: 
1.55      ht        306: def validateChildTypes(children, schema, lax):
1.2       ht        307:   # validate each child element against its type, if we know it
                    308:   # report an error if we don't know it and it's not in <any>
1.7       richard   309:   v = 1
1.2       ht        310:   for child in children:
1.58      ht        311:     if isinstance(child,Element):
1.33      ht        312:       if child.type:
1.43      ht        313:         child.type.validate(child,schema,'element',child)
1.55      ht        314:       elif lax:
1.58      ht        315:         # TODO: record impact of missing type in PSVI
1.55      ht        316:         validateElement(child,None,schema) # will be lax because no type
1.2       ht        317:       else:
1.33      ht        318:        verror(child,
1.58      ht        319:                "undeclared element %s"%
                    320:                XMLSchema.QName(None,child.local,child.uri),
                    321:                schema,"src-resolve")
1.2       ht        322: 
1.21      ht        323: def validateKeys(decl,elt):
1.22      ht        324:   elt.keyTabs={}
1.33      ht        325:   validateKeys1(elt,decl.keys,1)
                    326:   validateKeys1(elt,decl.uniques,0)
                    327:   validateKeyRefs(elt,decl.keyrefs)
1.22      ht        328: 
                    329: def validateKeys1(elt,kds,reqd):
                    330:   for key in kds:
1.21      ht        331:     tab={}
                    332:     sp=xpath.XPath(key.selector)
1.24      ht        333:     candidates=sp.find(elt)
1.21      ht        334:     if candidates:
                    335:       fps=map(lambda f:xpath.XPath(f),key.field)
                    336:       for s in candidates:
1.22      ht        337:         keyKey=buildKey(s,fps)
1.59      ht        338:         if keyKey:
                    339:           if len(keyKey)>1:
                    340:             keyKey=tuple(keyKey)
                    341:           else:
                    342:             keyKey=keyKey[0]
                    343:         else:
                    344:           if reqd:
                    345:             verror(s,
                    346:                    "missing one or more fields %s from key %s"%(key.field,
                    347:                                                                 key.name),
                    348:                    key.schema,"cvc-identity-constraint.2.2.2")
1.22      ht        349:           break
1.21      ht        350:        if tab.has_key(keyKey):
1.58      ht        351:           if reqd:
                    352:             code="cvc-identity-constraint.2.2.3"
                    353:           else:
                    354:             code="cvc-identity-constraint.2.1.2"
                    355:          verror(s,"duplicate key %s, first appearance was %s"%
                    356:                  (str(keyKey),
                    357:                   XMLSchema.whereString(tab[keyKey].where)),
                    358:                  key.schema,code)
1.21      ht        359:        else:
                    360:          tab[keyKey]=s
1.22      ht        361:     elt.keyTabs[key.name]=tab
                    362: 
                    363: def buildKey(s,fps):
                    364:   keyKey=[]
                    365:   for fp in fps:
1.24      ht        366:     kv=fp.find(s)
1.22      ht        367:     if kv:
                    368:       if len(kv)>1:
1.58      ht        369:         # TODO error or shouldnt?
1.33      ht        370:         vwarn(s,"oops, multiple field hits for %s at %s: %s"%(fp.str,s,kv))
1.22      ht        371:       if isinstance(kv[0],Element):
                    372:         if (len(kv[0].children)>0 and
                    373:             isinstance(kv[0].children[0],Pcdata)):
                    374:           keyKey.append(kv[0].children[0].value)
                    375:         else:
                    376:           # XPath says in this case value is the empty string
                    377:           pass
1.54      ht        378:       elif somestring(type(kv[0])):
1.22      ht        379:         keyKey.append(kv[0])
                    380:       else:
1.58      ht        381:         # TODO error or shouldnt?
1.33      ht        382:         vwarn(s,"oops, key value %s:%s"%(type(kv[0]),kv[0]))
1.22      ht        383:     else:
                    384:       return None
                    385:   return keyKey
                    386: 
                    387: def validateKeyRefs(elt,krds):
                    388:   res=1
                    389:   for ref in krds:
1.25      ht        390:     if elt.keyTabs.has_key(ref.refer):
                    391:       keyTab=elt.keyTabs[ref.refer]
                    392:       if keyTab=='bogus':
                    393:        break
                    394:     else:
                    395:       elt.keyTabs[ref.refer]='bogus'
1.52      richard   396:       verror(elt,
1.33      ht        397:              "No key or unique constraint named %s declared, refed by keyref %s"%(ref.refer,ref.name),
1.58      ht        398:              ref.schema,"cvc-identity-constraint.2.3.2")
1.25      ht        399:       break
1.22      ht        400:     sp=xpath.XPath(ref.selector)
1.24      ht        401:     candidates=sp.find(elt)
1.22      ht        402:     if candidates:
                    403:       fps=map(lambda f:xpath.XPath(f),ref.field)
                    404:       for s in candidates:
                    405:         keyKey=buildKey(s,fps)
                    406:         if not keyKey:
                    407:           break
                    408:        if len(keyKey)>1:
                    409:          keyKey=tuple(keyKey)
                    410:        else:
                    411:          keyKey=keyKey[0]
1.25      ht        412:        if not keyTab.has_key(keyKey):
1.58      ht        413:          verror(s,"no key in %s for %s"%(ref.refer,str(keyKey)),ref.schema,
                    414:                  "cvc-identity-constraint.2.3.2")
1.21      ht        415: 
1.58      ht        416: def findSchemaLocs(element,schema):
1.30      richard   417:   pairs = []
                    418:   for a in element.attrs.values():
1.43      ht        419:     if a.uri == xsi:
                    420:       if a.local == "schemaLocation":
                    421:         scls=string.split(a.value)
                    422:         while scls:
1.58      ht        423:           if len(scls)>1:
                    424:             pairs.append((scls[0], scls[1]))
                    425:           else:
                    426:             verror(element,"xsi:schemaLocation must be a list with an even number of members: %s"%string.split(a.value),schema,"???")
1.43      ht        427:           scls=scls[2:]
                    428:       elif a.local == "noNamespaceSchemaLocation":
1.58      ht        429:         pairs.append((None,a.value))
1.30      richard   430:   for c in element.children:
                    431:     if isinstance(c, Element):
1.58      ht        432:       scl=findSchemaLocs(c,schema)
1.43      ht        433:       if scl:
                    434:         pairs = pairs + scl
1.30      richard   435:   return pairs
                    436:   
1.64      ht        437: def runitAndShow(en,rns=[],k=0,style=None,enInfo=None):
1.58      ht        438:   (res,encoding)=runit(en,rns,k)
                    439:   if not encoding:
                    440:     encoding='UTF-8'
                    441:   errout=OpenStream(sys.stderr,
                    442:                     CharacterEncodingNames[encoding],
                    443:                     NSL_write+NSL_write_plain)
                    444:   if encoding!='UTF-8':
                    445:     es=" encoding='%s'"%encoding
                    446:   else:
                    447:     es=""
                    448:   sys.stderr.write("<?xml version='1.0'%s?>\n"%es)
                    449:   if style:
                    450:     sys.stderr.write("<?xml-stylesheet type='text/xsl' href='%s'?>\n"%style)
1.64      ht        451:   if enInfo:
                    452:     for (k,v) in enInfo.items():
                    453:       res.addAttr(k,v)
1.58      ht        454:   res.printme(errout)
                    455:   sys.stderr.write("\n")
                    456:   Close(errout)
                    457:   return res
                    458: 
1.37      ht        459: def runit(en,rns=[],k=0):
1.58      ht        460:   global s,e,t,f,res,ed
1.33      ht        461: 
1.45      ht        462:   ss = s = None
1.33      ht        463: 
1.41      ht        464:   f=XMLSchema.newFactory()
1.58      ht        465:   f.errors=0
1.36      ht        466:   base=f.fileNames[0]
1.53      ht        467:   ren=urljoin(base,en)
1.30      richard   468: 
1.58      ht        469:   res=Element("xsv")
                    470:   f.resElt=res
                    471:   res.addAttr("xmlns","http://www.w3.org/2000/05/xsv")
                    472:   res.addAttr("version",vs)
                    473:   res.addAttr("target",ren)
                    474:   if rns:
                    475:     res.addAttr("schemaDocs",string.join(rns,' '))
                    476: 
                    477:   rdn=tempfile.mktemp("xsverrs")
                    478:   redirect=open(rdn,"w+")
                    479:   savedstderr=os.dup(2)                        # save stderr
                    480:   os.dup2(redirect.fileno(),2)
                    481:   try:
                    482:     (e,encoding)=readXML(ren)
1.63      ht        483:   except LTXMLinter.error:
1.58      ht        484:     e=None
                    485:     encoding=None
                    486:   if not e:
                    487:     res.addAttr('outcome',"validation not attempted")
                    488:     sys.stderr.flush()
                    489:     registerRawErrors(redirect,res)
                    490:     # put stderr back
                    491:     os.dup2(savedstderr,2)
                    492:     return (res,None)
                    493: 
                    494:   # TODO: check each schema doc against schema for schemas, if possible,
                    495:   # unless caller explicitly opts out (?)
1.28      ht        496:   if rns:
1.49      ht        497:     try:
1.53      ht        498:       s = XMLSchema.fromFile(urljoin(base,rns[0]),f)
1.63      ht        499:     except LTXMLinter.error:
1.49      ht        500:       pass
1.30      richard   501:     for rn in rns[1:]:
1.49      ht        502:       try:
1.53      ht        503:         ss=ss or XMLSchema.fromFile(urljoin(base,rn),f)
1.63      ht        504:       except LTXMLinter.error:
1.49      ht        505:         pass
1.30      richard   506: 
1.45      ht        507:   if not s:
1.65      ht        508:     if ss:
                    509:       s=ss
                    510:     else:
                    511:       s = XMLSchema.Schema(f,None)
                    512:       s.targetNS='##dummy'
1.45      ht        513: 
1.58      ht        514:   schemaLocs = findSchemaLocs(e,s)
                    515:   res.addAttr('schemaLocs',join(map(lambda p:"%s -> %s"%(p[0] or 'None',p[1]),
                    516:                                     schemaLocs),
                    517:                                 '; '))
1.30      richard   518:   for (ns, sl) in schemaLocs:
1.49      ht        519:     try:
                    520:       XMLSchema.checkinSchema(f, ns, sl,e,ren)
1.63      ht        521:     except LTXMLinter.error:
1.49      ht        522:       pass
1.30      richard   523:   
1.58      ht        524:   res.addAttr('docElt',"{%s}%s"%(e.uri,e.local))
1.34      ht        525:   if (e.uri and
                    526:       (e.uri not in ('http://www.w3.org/XML/1998/namespace',
                    527:                    'http://www.w3.org/1999/XMLSchema-instance')) and
                    528:       not f.schemas.has_key(e.uri)):
                    529:     try:
1.49      ht        530:       XMLSchema.checkinSchema(f,e.uri,e.uri,e,ren)
1.58      ht        531:       res.addAttr('nsURIDeref','success')
1.63      ht        532:     except LTXMLinter.error:
1.58      ht        533:       res.addAttr('nsURIDeref','failure')
1.30      richard   534:     
1.58      ht        535:   sys.stderr.flush()
                    536:   registerRawErrors(redirect,res)
                    537:   # put stderr back
                    538:   os.dup2(savedstderr,2)
                    539: 
                    540:   try:
                    541:     ecount=XMLSchema.prepare(f)
                    542:   except:
                    543:     ecount=-1
                    544:     pfe=Element("bug")
                    545:     pfe.children=[Pcdata("Factory preparation failure:\n%s"%
                    546:                          traceback.format_exception(sys.exc_type,
                    547:                                                     sys.exc_value,
                    548:                                                     sys.exc_traceback))]
                    549:     res.children.append(pfe)
1.33      ht        550:   
1.58      ht        551:   kgm="true"
                    552:   kg=1
1.33      ht        553:   if ecount:
1.58      ht        554:     if ecount<0:
                    555:       kg=0
1.33      ht        556:     else:
1.58      ht        557:       if not k:
                    558:         kg=0
                    559:     if not kg:
                    560:      kgm="false"
                    561:   res.addAttr('schemaErrors',str(ecount))
                    562:   res.addAttr('instanceAssessed',kgm)
                    563:   if not kg:
                    564:     return (res,encoding)
1.30      richard   565: 
1.28      ht        566:   cl=string.find(':',e.name)
                    567:   if cl>-1:
                    568:     prefix=e.name[0:cl]
                    569:   else:
                    570:     prefix=''
1.41      ht        571:   eltname = XMLSchema.QName(prefix,e.local,e.uri)
1.30      richard   572: 
                    573:   if not s:
                    574:     # any one will do
1.33      ht        575:     s = f.sfors
                    576:   t=None
                    577:   
1.55      ht        578:   ed=None
1.33      ht        579:   if s and s.vElementTable.has_key(eltname):
1.55      ht        580:     ed=s.vElementTable[eltname]
                    581:     t=ed.typeDefinition
1.58      ht        582:   if t:
                    583:     if t.name:
1.66      ht        584:       if hasattr(t,'qname'):
                    585:         tn=t.qname.string()
                    586:       else:
                    587:         tn=t.name
1.58      ht        588:     else:
                    589:       tn='[Anonymous]'
                    590:     res.addAttr('rootType',tn)
                    591:     res.addAttr('validation','strict')
                    592:   else:
                    593:     res.addAttr('validation','lax')
1.33      ht        594: 
                    595:   if e and s:
1.55      ht        596:     validate(e, t, s, ed)
1.58      ht        597:     res.addAttr('instanceErrors',str(s.factory.errors))
                    598:     return (res,encoding)
1.30      richard   599: 
1.58      ht        600: def registerRawErrors(redirect,res):
                    601:   if redirect.tell(): 
                    602:     redirect.seek(0)
                    603:     ro=Element("XMLMessages")
                    604:     o="\n%s"%redirect.read()
                    605:     ro.children=[Pcdata(o)]
                    606:     res.children.append(ro)
                    607:   redirect.close()
                    608: 
                    609: def verror(elt,message,schema,code=None,two=0,daughter=None):
                    610:   # code argument identifies CVC
                    611:   ve=Element("invalid")
                    612:   ve.children=[Pcdata(message)]
                    613:   if code:
                    614:     ve.addAttr("code",code)
1.33      ht        615:   if two:
1.58      ht        616:     XMLSchema.where(ve,elt.where2)
1.33      ht        617:   else:
1.58      ht        618:     XMLSchema.where(ve,elt.where)
                    619:   if daughter:
                    620:     ve.children.append(daughter)
                    621:   res.children.append(ve)
1.33      ht        622:   schema.factory.errors=schema.factory.errors+1
                    623: 
                    624: def vwarn(elt,message):
1.58      ht        625:   ve=Element("warning")
                    626:   ve.children=[Pcdata(message)]
1.33      ht        627:   if elt:
1.58      ht        628:     XMLSchema.where(ve,elt.where)
                    629:   res.children.append(ve)
1.24      ht        630: 
1.41      ht        631: # validation methods for schema components
                    632: 
1.43      ht        633: def av(self,child,schema,kind,elt):
1.41      ht        634:   q = XMLSchema.QName(None,child.local,child.uri)
1.43      ht        635:   vwarn(elt,"allowing %s because it matched wildcard(%s)" %
                    636:         (q,self.allowed))
                    637:   if self.processContents!='skip':
1.41      ht        638:     if schema.factory.schemas.has_key(child.uri):
                    639:       # only try if we might win -- needs work
                    640:       try:
1.43      ht        641:         if kind=='element':
                    642:           e = schema.vElementTable[q]
                    643:         else:
                    644:           e = schema.vAttributeTable[q]
1.41      ht        645:       except KeyError:
                    646:         e=None
1.58      ht        647:       if e and e.typeDefinition:
1.43      ht        648:         vwarn(None,"validating it against %s" %
                    649:               (e.typeDefinition.name or 'anonymous type'))
                    650:         if kind=='element':
                    651:           validateElement(child, e.typeDefinition, schema)
                    652:         else:
1.58      ht        653:           return e.typeDefinition.validateText(child.value, elt, schema)
1.61      ht        654:       elif (self.processContents=='strict' and
                    655:             not (kind=='element' and child.nsattrs.has_key((xsi, "type")))):
1.58      ht        656:         # TODO check this against actual def'n of missing component
1.43      ht        657:         verror(elt,
                    658:                "can't find a type for wildcard-matching %s %s" %(kind, q),
1.58      ht        659:                schema,
                    660:                "src-resolve")
1.61      ht        661:       elif kind=='element':
                    662:         vwarn(None,"validating it laxly")
                    663:         validateElement(child,None,schema)
1.43      ht        664: 
                    665: XMLSchema.Wildcard.validate=av
1.42      ht        666: 
1.43      ht        667: def tv(self,child,schema,kind,elt):
1.42      ht        668:   validateElement(child, self, schema)
1.41      ht        669: 
1.43      ht        670: XMLSchema.Type.validate=XMLSchema.AbInitio.validate=tv
1.41      ht        671: 
1.58      ht        672: def validateText(self, text, context, schema):
                    673:   if self==XMLSchema.urType:
                    674:     return
                    675:   else:
1.66.2.1! ht        676:     if self.variety=='atomic':
        !           677:       # ref may have failed
        !           678:       return (self.primitiveType and
        !           679:               self.primitiveType.checkString(text,context))
        !           680:     elif self.variety=='list':
        !           681:       it=self.itemType
1.58      ht        682:       # TODO: what about post-list facets?
                    683:       for substr in string.split(text):
1.66.2.1! ht        684:         res=it.validateText(substr,context,schema)
1.58      ht        685:         if res:
                    686:           return res+' in list'
                    687:       return
1.66.2.1! ht        688:     elif self.variety=='union':
        !           689:       mts=self.memberTypes
        !           690:       subres=[]
        !           691:       # TODO: what about post-union facets?
        !           692:       for mt in mts:
        !           693:         res=mt.validateText(text,context,schema)
        !           694:         if res:
        !           695:           subres.append(res)
        !           696:         else:
        !           697:           # bingo
        !           698:           return
        !           699:       # no subtypes won, we lose
        !           700:       return " all members of union failed: %s"%subres
1.58      ht        701:     else:
1.66.2.1! ht        702:       XMLSchema.shouldnt('vv')
1.58      ht        703: 
                    704: XMLSchema.SimpleType.validateText=validateText
                    705: 
                    706: def validateText(self, text, context, schema):
                    707:     return self.checkString(text,context)
                    708: 
                    709: XMLSchema.AbInitio.validateText=validateText
                    710: 
                    711: 
1.42      ht        712: # run at import if top
                    713: 
                    714: if __name__=='__main__':
                    715:   argl=sys.argv[1:]
                    716:   k=0
1.58      ht        717:   style=None
1.42      ht        718:   while argl:
                    719:     if argl[0]=='-k':
                    720:       k=1
1.58      ht        721:     elif argl[0]=='-s':
                    722:       style=argl[1]
                    723:       argl=argl[1:]
1.42      ht        724:     else:
                    725:       break
                    726:     argl=argl[1:]
                    727: 
                    728:   if argl:
1.58      ht        729:     runitAndShow(argl[0],argl[1:],k,style)
1.42      ht        730:   else:
1.58      ht        731:     runitAndShow("tiny.xml",["tiny.xsd"],k,"xsv.msxsl")
1.41      ht        732: 
1.25      ht        733: # $Log: applyschema.py,v $
1.66.2.1! ht        734: # Revision 1.67  2000/08/31 11:48:41  ht
        !           735: # Direct support for validating lists and unions
        !           736: #
        !           737: # Revision 1.67  2000/08/31 11:48:41  ht
        !           738: # Direct support for validating lists and unions
        !           739: #
1.66      ht        740: # Revision 1.66  2000/08/22 13:11:30  ht
                    741: # handle type w/o qname as document validation type
1.65      ht        742: # remove special treatment for AbInitio simple types on elements,
1.64      ht        743: # thereby fixing list validation bug
                    744: #
                    745: # Revision 1.65  2000/07/12 09:31:58  ht
1.63      ht        746: # try harder to always have a schema
                    747: #
                    748: # Revision 1.64  2000/07/10 14:39:02  ht
                    749: # prepare for fileinfo to runit
1.62      ht        750: #
                    751: # Revision 1.63  2000/07/05 09:05:37  ht
                    752: # change name to PyLTXML
1.61      ht        753: #
                    754: # Revision 1.62  2000/07/03 09:37:38  ht
                    755: # bail out if textonly has elt daughter(s)
1.60      ht        756: # add missing import
                    757: #
                    758: # Revision 1.61  2000/06/27 09:25:51  ht
1.59      ht        759: # attempt to handle interaction between xsi:type and <any>
                    760: #
                    761: # Revision 1.60  2000/06/24 11:17:07  ht
1.58      ht        762: # fix bug in unqualified xsi:type
                    763: #
                    764: # Revision 1.59  2000/06/22 10:31:33  ht
                    765: # Bug in unique processing -- broke on missing field
                    766: #
                    767: # Revision 1.58  2000/06/20 08:07:42  ht
                    768: # merge xmlout branches back in to main line
                    769: #
                    770: 
                    771: # Revision 1.57  2000/05/18 08:01:25  ht
                    772: # fix bug in handling of xsi:type
                    773: #
                    774: # Revision 1.56  2000/05/14 12:19:34  ht
                    775: # add context to checkSting calls
                    776: #
                    777: # Revision 1.55  2000/05/11 11:55:57  ht
                    778: # just better handling of lax validation from other branch
                    779: #
                    780: # Revision 1.54.2.16  2000/06/15 16:03:20  ht
                    781: # cover several missing definition cases
                    782: #
                    783: # Revision 1.54.2.15  2000/06/03 16:29:30  ht
                    784: # oops, removing debugging comment
                    785: #
                    786: # Revision 1.54.2.14  2000/06/03 13:45:55  ht
                    787: # catch arity bug in xsi:schemaLoc
                    788: #
                    789: # Revision 1.54.2.13  2000/05/30 09:35:43  ht
                    790: # fix encoding bug when things break down altogether
                    791: #
                    792: # Revision 1.54.2.12  2000/05/29 08:46:53  ht
                    793: # strong enforcement of nullable
                    794: # add error codes to all errors
                    795: # remove remaining __class__ tests
                    796: # change error reporting wrt disallowed content
                    797: #
                    798: # Revision 1.54.2.11  2000/05/24 20:46:47  ht
                    799: # make validateText a method, split across SimpleType and AbInitio
                    800: #
                    801: # Revision 1.54.2.10  2000/05/24 12:03:28  ht
                    802: # modest effort to validate list types
                    803: # fix bug in noNamespaceSchemaLocation handling at validation time
                    804: #
                    805: # Revision 1.54.2.9  2000/05/22 16:11:52  ht
                    806: # use OpenStream, take more control of encoding
                    807: #
                    808: # Revision 1.54.2.8  2000/05/18 17:37:40  ht
                    809: # parameterise stylesheet,
                    810: # remove formatting from xsv:xsv attributes,
                    811: # add namespace decl
                    812: #
                    813: # Revision 1.54.2.7  2000/05/18 07:59:48  ht
                    814: # fix xsi:type validation bug
                    815: #
                    816: # Revision 1.54.2.6  2000/05/16 16:31:11  ht
                    817: # fix bug handling un-typed element declarations == urType validation
                    818: #
                    819: # Revision 1.54.2.5  2000/05/14 12:29:59  ht
                    820: # merge QName checking from main branch
                    821: #
                    822: # Revision 1.54.2.4  2000/05/12 15:15:01  ht
                    823: # process keys even if type is simple,
                    824: # add a few codes to get started
                    825: #
                    826: # Revision 1.54.2.3  2000/05/11 13:59:11  ht
                    827: # convert verror/vwarn to produce elements
                    828: # eliminate a few special error outputs in favour of special
                    829: # sub-elements
1.57      ht        830: #
                    831: # Revision 1.54.2.2  2000/05/11 11:14:00  ht
                    832: # more error protection
1.56      ht        833: # handle lax recursively and at the start
                    834: #
                    835: # Revision 1.54.2.1  2000/05/10 11:36:47  ht
1.55      ht        836: # begin converting to XML output
                    837: #
                    838: # Revision 1.56  2000/05/14 12:19:34  ht
1.54      ht        839: # add context to checkSting calls
                    840: #
                    841: # Revision 1.55  2000/05/11 11:55:57  ht
                    842: # just better handling of lax validation from other branch
1.53      ht        843: #
                    844: # Revision 1.54  2000/05/09 14:52:52  ht
                    845: # Check for strings in a way that works with or without 16-bit support
1.52      richard   846: #
                    847: # Revision 1.53  2000/05/09 12:27:58  ht
                    848: # replace our hack with python's url parsing stuff
1.51      ht        849: # make f global for debugging
                    850: #
                    851: # Revision 1.52  2000/05/05 15:15:45  richard
1.50      richard   852: # wrong (?) elt arg to verror in validateKeyRefs
                    853: #
                    854: # Revision 1.51  2000/05/04 07:56:35  ht
1.49      ht        855: # Fix typo in opportunistic attribute validation
                    856: #
                    857: # Revision 1.50  2000/05/01 15:07:00  richard
1.48      richard   858: # bug fix schema -> key.schema
                    859: #
                    860: # Revision 1.49  2000/05/01 10:05:43  ht
                    861: # catch various missing file errors more gracefully
1.47      richard   862: #
                    863: # Revision 1.48  2000/04/28 15:40:01  richard
                    864: # Implement xsi:null (still don't check nullable)
1.46      ht        865: #
                    866: # Revision 1.47  2000/04/28 15:11:23  richard
                    867: # allow xsi: attributes on simple type
                    868: # moved eltDecl code up validateElement ready for implementing xsi:null
1.45      ht        869: #
                    870: # Revision 1.46  2000/04/27 09:41:18  ht
                    871: # remove raw types from error messages
1.44      ht        872: #
                    873: # Revision 1.45  2000/04/27 09:30:21  ht
                    874: # check that inputs are actually schemas,
                    875: # remove schema arg to doImport, checkInSchema
                    876: #
                    877: # Revision 1.44  2000/04/26 13:00:40  ht
1.43      ht        878: # add copyright
                    879: #
                    880: # Revision 1.43  2000/04/24 20:46:40  ht
1.42      ht        881: # cleanup residual bugs with massive rename,
                    882: # rename Any to Wildcard,
                    883: # replace AnyAttribute with Wildcard,
                    884: # get validation of Wildcard working in both element and attribute contexts
1.41      ht        885: #
                    886: # Revision 1.42  2000/04/24 15:08:34  ht
                    887: # minor glitches, tiny.xml works again
1.40      ht        888: #
                    889: # Revision 1.41  2000/04/24 15:00:09  ht
                    890: # wholesale name changes -- init. caps for all classes,
1.39      ht        891: # schema.py -> XMLSchema.py
                    892: #
                    893: # Revision 1.40  2000/04/24 11:09:17  ht
1.38      ht        894: # make version string universally available
                    895: #
                    896: # Revision 1.39  2000/04/24 10:06:59  ht
1.37      ht        897: # add version info to message
                    898: #
                    899: # Revision 1.38  2000/04/24 10:02:39  ht
                    900: # change invocation message
1.36      ht        901: #
                    902: # Revision 1.37  2000/04/24 09:41:43  ht
                    903: # clean up invocation some more, add k arg't to runit
1.35      ht        904: #
                    905: # Revision 1.36  2000/04/21 09:32:21  ht
                    906: # another dose of resolveURL
1.34      ht        907: # use tiny only if run from command line
                    908: #
                    909: # Revision 1.35  2000/04/20 22:12:43  ht
1.33      ht        910: # use resolveURL on input, schemaLocs
                    911: #
                    912: # Revision 1.34  2000/04/20 15:45:08  ht
                    913: # better handling of use of ns uri for loc
                    914: #
                    915: # Revision 1.33  2000/04/20 14:26:59  ht
                    916: # merge in private and comp branches
                    917: #
                    918: # Revision 1.32.2.5  2000/04/20 14:25:54  ht
                    919: # merge in comp branch
                    920: #
                    921: # Revision 1.32.2.4.2.9  2000/04/20 14:22:39  ht
                    922: # manage document validation schema creation and search better
                    923: #
                    924: # Revision 1.32.2.4.2.8  2000/04/20 12:03:21  ht
                    925: # Remove a few lingering effectiveTypes
                    926: # Allow better for absent types etc.
                    927: #
                    928: # Revision 1.32.2.4.2.7  2000/04/14 21:18:27  ht
                    929: # minor attr names/path changes to track schema
                    930: #
                    931: # Revision 1.32.2.4.2.6  2000/04/13 23:04:39  ht
                    932: # allow for urType as simple type (?)
                    933: # track Any->AnyWrap change
                    934: #
                    935: # Revision 1.32.2.4.2.5  2000/04/12 17:29:37  ht
                    936: # begin work on model merger,
                    937: #
                    938: # Revision 1.32.2.4.2.4  2000/04/11 18:13:17  ht
                    939: # interpolate attributeUse between complexType and attributeDeclaration,
                    940: # parallel to particle
                    941: #
                    942: # Revision 1.32.2.4.2.3  2000/04/10 15:48:46  ht
                    943: # put modest attribute validation in place
                    944: #
                    945: # Revision 1.32.2.4.2.2  2000/04/09 16:13:26  ht
                    946: # working on complex type, attribute;
                    947: # back out component.qname
                    948: #
                    949: # Revision 1.32.2.4.2.1  2000/04/05 12:12:36  ht
                    950: # accommodate changes in schema.py
                    951: #
                    952: # Revision 1.32.2.4  2000/04/01 18:01:25  ht
                    953: # various minor compatibility fixes
                    954: #
                    955: # Revision 1.32.2.3  2000/03/25 12:12:27  ht
                    956: # restructure error handling/reporting;
                    957: # allow for switching 208 on and off
                    958: #
                    959: # Revision 1.32.2.2  2000/03/21 15:57:23  ht
                    960: # fix bug in skip,
1.32      ht        961: # allow 208 override
                    962: #
                    963: # Revision 1.32.2.1  2000/03/20 17:22:52  ht
                    964: # better coverage of <any>, including beginning of processcontents
                    965: #
                    966: # Revision 1.33  2000/03/20 17:20:53  ht
                    967: # better coverage of <any>, including beginning of processcontents
                    968: #
                    969: # Revision 1.32  2000/03/08 15:28:46  ht
                    970: # merge private branches back into public after 20000225 release
                    971: #
                    972: # Revision 1.31.2.3  2000/02/24 23:40:32  ht
                    973: # fix any bug
                    974: #
                    975: # Revision 1.31.2.2  2000/02/21 09:18:13  ht
                    976: # bug in <any> handling
                    977: #
1.31      richard   978: # Revision 1.31.2.1  2000/02/08 21:43:39  ht
                    979: # fork private branch to track internal drafts
                    980: # change calling sequence of checkinSchema
1.30      richard   981: #
                    982: # Revision 1.31.1.1  2000/02/08 13:54:25  ht
                    983: # fork branch for non-public changes
1.29      ht        984: # calling sequence to checkinSchema changed
                    985: #
                    986: # Revision 1.31  2000/01/13 16:55:42  richard
                    987: # Finally do something with xsi:type
1.28      ht        988: #
                    989: # Revision 1.30  2000/01/10 17:36:34  richard
                    990: # changes for xsi:schemaLocation
1.27      richard   991: #
                    992: # Revision 1.29  2000/01/08 23:33:50  ht
                    993: # towards support for xsi:schemaLocation
1.26      ht        994: #
                    995: # Revision 1.28  2000/01/08 12:07:38  ht
                    996: # Change command-line arg sequence in preparation for use of schemaLocation!!!!!
1.25      ht        997: # Add debug printout for schemaLocation for now
                    998: #
                    999: # Revision 1.27  2000/01/07 17:08:26  richard
                   1000: # start on xsi:type
                   1001: #
1.24      ht       1002: # Revision 1.26  2000/01/06 14:59:38  ht
1.1       ht       1003: # fix command line bug, display args on entry
                   1004: #
                   1005: # Revision 1.25  2000/01/06 14:38:56  ht
                   1006: # detect cross-scope keyref and signal error
                   1007: #
                   1008: # Revision 1.24  2000/01/03 17:02:37  ht
                   1009: # Include result of sub-ordinate key checking in overall result
                   1010: # Accommodate new calling sequence for xpath.find
                   1011: # add Log and Id
                   1012: #
                   1013: #

Webmaster