def encode_error(self, validation, obj, encoder, reason=None, source=None, namespaces=None, **_kwargs): """ Helper method for generating encode errors. Incompatible with 'skip' validation mode. Il validation mode is 'lax' returns the error, otherwise raises the error. :param validation: an error-compatible validation mode: can be 'lax' or 'strict'. :param obj: the not validated XML data. :param encoder: the XML encoder. :param reason: the detailed reason of failed validation. :param source: the XML resource that contains the error. :param namespaces: is an optional mapping from namespace prefix to URI. :param _kwargs: keyword arguments of the validation process that are not used. """ error = XMLSchemaEncodeError(self, obj, encoder, reason, source, namespaces) if validation == 'lax': return error elif validation == 'strict': raise error elif validation == 'skip': raise XMLSchemaValueError( "validation mode 'skip' incompatible with error generation.") else: raise XMLSchemaValueError("unknown validation mode %r" % validation)
def get_xsd_component(elem, required=True, strict=True): """ Returns the first XSD component child, excluding the annotation. :param elem: the parent Element. :param required: if `True`, that is the default, raises a *ValueError* if there \ is not any component; with `False` in those cases `None` is returned. :param strict: raises a *ValueError* if there is more than one component. """ components_iterator = iter_xsd_components(elem) try: xsd_component = next(components_iterator) except StopIteration: if required: raise XMLSchemaValueError("missing XSD component") return None else: if not strict: return xsd_component try: next(components_iterator) except StopIteration: return xsd_component else: raise XMLSchemaValueError("too many XSD components")
def get_fields(self, context, decoders=None): """ Get fields for a schema or instance context element. :param context: Context Element or XsdElement :param decoders: Context schema fields decoders. :return: A tuple with field values. An empty field is replaced by `None`. """ fields = [] for k, field in enumerate(self.fields): result = field.xpath_selector.select(context) if not result: if isinstance(self, XsdKey): raise XMLSchemaValueError( "%r key field must have a value!" % field) else: fields.append(None) elif len(result) == 1: if decoders is None or decoders[k] is None: fields.append(result[0]) else: fields.append(decoders[k].decode(result[0], validation="skip")) else: raise XMLSchemaValueError("%r field selects multiple values!" % field) return tuple(fields)
def __setattr__(self, name, value): if name == 'model' and value is not None: if value not in XSD_GROUP_MODELS: raise XMLSchemaValueError("invalid model group %r." % value) if self.model is not None and value != self.model and self.model != 'all': raise XMLSchemaValueError("cannot change group model from %r to %r" % (self.model, value)) elif name == '_group': if not all(isinstance(item, (tuple, ParticleMixin)) for item in value): raise XMLSchemaValueError("XsdGroup's items must be tuples or ParticleMixin instances.") super(ModelGroup, self).__setattr__(name, value)
def __setattr__(self, name, value): if name == 'source': self._root, self._document, self._text, self._url = self._fromsource(value) elif name == 'defuse' and value not in DEFUSE_MODES: raise XMLSchemaValueError(u"'defuse' attribute: {!r} is not a defuse mode.".format(value)) elif name == 'timeout' and (not isinstance(value, int) or value <= 0): raise XMLSchemaValueError(u"'timeout' attribute must be a positive integer: {!r}".format(value)) elif name == 'lazy' and not isinstance(value, bool): raise XMLSchemaValueError(u"'lazy' attribute must be a boolean: {!r}".format(value)) super(XMLResource, self).__setattr__(name, value)
def __init__(self, namespaces=None, dict_class=None, list_class=None, text_key='$', attr_prefix='@', cdata_prefix=None, etree_element_class=None, indent=4, **kwargs): if etree_element_class is not None and etree_element_class not in ( etree_element, lxml_etree_element): raise XMLSchemaValueError("%r: unsupported element.") self.dict = dict_class or dict self.list = list_class or list self.text_key = text_key self.attr_prefix = attr_prefix self.cdata_prefix = cdata_prefix self.etree_element_class = etree_element_class or etree_element self.indent = indent if etree_element_class is etree_element: super(XMLSchemaConverter, self).__init__(namespaces, etree_register_namespace) else: super(XMLSchemaConverter, self).__init__(namespaces, lxml_etree_register_namespace)
def __setattr__(self, name, value): if name == 'text_key' and value != '$' or name == 'attr_prefix' and value != '@' or \ name == 'cdata_prefix' and value != '#': raise XMLSchemaValueError( 'Wrong value %r for the attribute %r of a %r.' % (value, name, type(self))) super(XMLSchemaConverter, self).__setattr__(name, value)
def extend_namespace(self, other): if self.namespace == '##any' or self.namespace == other.namespace: return elif other.namespace == '##any': self.namespace = other.namespace return elif other.namespace == '##other': w1, w2 = other, self elif self.namespace == '##other': w1, w2 = self, other elif self.target_namespace == other.target_namespace: self.namespace = ' '.join( set(other.namespace.split() + self.namespace.split())) return else: self.namespace = ' '.join( set(list(other.iter_namespaces()) + self.namespace.split())) return namespaces = set(w2.iter_namespaces()) if w1.target_namespace in namespaces and '' in namespaces: self.namespace = '##any' elif '' not in namespaces and w1.target_namespace == w2.target_namespace: self.namespace = '##other' else: msg = "not expressible wildcard namespace union: {!r} V {!r}:" raise XMLSchemaValueError( msg.format(other.namespace, self.namespace))
def is_restriction(self, other, check_occurs=True): if not self: return True elif self.ref is not None: return self[0].is_restriction(other, check_occurs) elif not isinstance(other, ParticleMixin): raise XMLSchemaValueError( "the argument 'base' must be a %r instance" % ParticleMixin) elif not isinstance(other, XsdGroup): return self.is_element_restriction(other) elif not other: return False elif other.ref: return self.is_restriction(other[0], check_occurs) elif len(other) == other.min_occurs == other.max_occurs == 1: if len(self) > 1: return self.is_restriction(other[0], check_occurs) elif isinstance(self[0], XsdGroup) and self[0].is_pointless(parent=self): return self[0].is_restriction(other[0], check_occurs) # Compare model with model if self.model != other.model and self.model != 'sequence' and len( self) > 1: return False elif self.model == other.model or other.model == 'sequence': return self.is_sequence_restriction(other) elif other.model == 'all': return self.is_all_restriction(other) elif other.model == 'choice': return self.is_choice_restriction(other)
def fetch_resource(location, base_url=None, timeout=30): """ Fetch a resource trying to accessing it. If the resource is accessible returns the URL, otherwise raises an error (XMLSchemaURLError). :param location: an URL or a file path. :param base_url: reference base URL for normalizing local and relative URLs. :param timeout: the timeout in seconds for the connection attempt in case of remote data. :return: a normalized URL. """ if not location: raise XMLSchemaValueError("'location' argument must contains a not empty string.") url = normalize_url(location, base_url) try: resource = urlopen(url, timeout=timeout) except URLError as err: # fallback joining the path without a base URL url = normalize_url(location) try: resource = urlopen(url, timeout=timeout) except URLError: raise XMLSchemaURLError(reason=err.reason) else: resource.close() return url else: resource.close() return url
def __init__(self, validation='strict'): if validation not in XSD_VALIDATION_MODES: raise XMLSchemaValueError( "validation argument can be 'strict', 'lax' or 'skip': %r" % validation) self.validation = validation self.errors = []
def boolean_to_python(s): if s in ('true', '1'): return True elif s in ('false', '0'): return False else: raise XMLSchemaValueError('not a boolean value: %r' % s)
def encode(self, obj, *args, **kwargs): """ Encodes data to XML using the XSD schema/component. :param obj: the data to be encoded to XML. :param args: arguments that maybe passed to :func:`XMLSchema.iter_encode`. :param kwargs: keyword arguments from the ones included in the optional \ arguments of the :func:`XMLSchema.iter_encode`. :return: An element tree's Element if the original data is a structured data or \ a string if it's simple type datum. If *validation* argument is 'lax' a 2-items \ tuple is returned, where the first item is the encoded object and the second item \ is a list containing the errors. :raises: :exc:`XMLSchemaValidationError` if the object is not encodable by \ the XSD component, or also if it's invalid when ``validation='strict'`` is provided. """ validation = kwargs.pop('validation', 'strict') if validation not in XSD_VALIDATION_MODES: raise XMLSchemaValueError( "validation argument can be 'strict', 'lax' or 'skip': %r" % validation) errors = [] for result in self.iter_encode(obj, validation=validation, *args, **kwargs): if isinstance(result, XMLSchemaValidationError): if validation == 'strict': raise result elif validation == 'lax': errors.append(result) elif validation == 'lax': return result, errors else: return result
def children_validation_error(self, validation, elem, index, particle, occurs=0, expected=None, source=None, namespaces=None, **_kwargs): """ Helper method for generating model validation errors. Incompatible with 'skip' validation mode. Il validation mode is 'lax' returns the error, otherwise raise the error. :param validation: the validation mode. Can be 'lax' or 'strict'. :param elem: the instance Element. :param index: the child index. :param particle: the XSD component (subgroup or element) associated to the child. :param occurs: the child tag occurs. :param expected: the expected element tags/object names. :param source: the XML resource related to the validation process. :param namespaces: is an optional mapping from namespace prefix to URI. :param _kwargs: keyword arguments of the validation process that are not used. """ if validation == 'skip': raise XMLSchemaValueError( "validation mode 'skip' incompatible with error generation.") error = XMLSchemaChildrenValidationError(self, elem, index, particle, occurs, expected, source, namespaces) if validation == 'strict': raise error else: return error
def check_code_point(cp): """ Checks a code point or code point range. :return: a valid code point range. """ if isinstance(cp, int): if not (0 <= cp <= maxunicode): raise XMLSchemaValueError("not a Unicode code point: %r" % cp) return cp, cp + 1 else: if not (0 <= cp[0] < cp[1] <= maxunicode + 1) \ or not isinstance(cp[0], int) or not isinstance(cp[1], int): raise XMLSchemaValueError("not a Unicode code point range: %r" % cp) return cp
def open(self): """Returns a opened resource reader object for the instance URL.""" if self._url is None: raise XMLSchemaValueError("can't open, the resource has no URL associated.") try: return urlopen(self._url, timeout=self.timeout) except URLError as err: raise XMLSchemaURLError(reason="cannot access to resource %r: %s" % (self._url, err.reason))
def element_encode(self, obj, xsd_element, level=0): unmap_qname = self.unmap_qname attributes = self.dict() if not isinstance(obj, (self.list, list)) or not obj: raise XMLSchemaValueError( "Wrong data format, a not empty list required: %r." % obj) data_len = len(obj) if data_len == 1: if not xsd_element.is_matching(unmap_qname(obj[0]), self.get('')): raise XMLSchemaValueError("Unmatched tag") return ElementData(xsd_element.name, None, None, attributes) unmap_attribute_qname = self._unmap_attribute_qname try: for k, v in obj[1].items(): if k == 'xmlns': self[''] = v elif k.startswith('xmlns:'): self[k.split('xmlns:')[1]] = v else: attributes[unmap_attribute_qname(k)] = v except AttributeError: content_index = 1 else: content_index = 2 if not xsd_element.is_matching(unmap_qname(obj[0]), self.get('')): raise XMLSchemaValueError("Unmatched tag") if data_len <= content_index: return ElementData(xsd_element.name, None, [], attributes) elif data_len == content_index + 1 and ( xsd_element.type.is_simple() or xsd_element.type.has_simple_content()): return ElementData(xsd_element.name, obj[content_index], [], attributes) else: cdata_num = iter(range(1, data_len)) list_types = list if self.list is list else (self.list, list) content = [(unmap_qname(e[0]), e) if isinstance(e, list_types) else (next(cdata_num), e) for e in obj[content_index:]] return ElementData(xsd_element.name, None, content, attributes)
def __setattr__(self, name, value): if name in ('attr_prefix', 'text_key', 'cdata_prefix'): if value is not None and any(c in string.ascii_letters or c == '_' for c in value): raise XMLSchemaValueError( '%r cannot includes letters or underscores: %r' % (name, value)) elif name == 'attr_prefix': self.ns_prefix = (value or '') + 'xmlns' super(XMLSchemaConverter, self).__setattr__(name, value)
def __init__(self, source, base_url=None, defuse='remote', timeout=300, lazy=True): if base_url is not None and not isinstance(base_url, string_base_type): raise XMLSchemaValueError(u"'base_url' argument has to be a string: {!r}".format(base_url)) self._root = self._document = self._url = self._text = None self._base_url = base_url self.defuse = defuse self.timeout = timeout self._lazy = lazy self.source = source
def element_encode(self, obj, xsd_element, level=0): tag = xsd_element.qualified_name if level == 0 else xsd_element.name if not isinstance(obj, (self.dict, dict)): if obj == []: obj = None return ElementData(tag, obj, None, self.dict()) else: unmap_qname = self.unmap_qname unmap_attribute_qname = self._unmap_attribute_qname attributes = self.dict() try: attributes.update([(unmap_attribute_qname(k), v) for k, v in obj['attributes'].items()]) except KeyError: children = obj else: children = obj.get('children', []) if isinstance(children, (self.dict, dict)): children = [children] elif children and not isinstance(children[0], (self.dict, dict)): if len(children) > 1: raise XMLSchemaValueError("Wrong format") else: return ElementData(tag, children[0], None, attributes) content = [] for child in children: for name, value in child.items(): if not isinstance(value, (self.list, list)) or not value: content.append((unmap_qname(name), value)) elif isinstance(value[0], (self.dict, dict, self.list, list)): ns_name = unmap_qname(name) for item in value: content.append((ns_name, item)) else: ns_name = unmap_qname(name) for xsd_child in xsd_element.type.content_type.iter_elements( ): matched_element = xsd_child.match( ns_name, self.get('')) if matched_element is not None: if matched_element.type.is_list(): content.append((ns_name, value)) else: for item in value: content.append((ns_name, item)) break else: content.append((ns_name, value)) return ElementData(tag, None, content, attributes)
def validation_error(self, validation, error, obj=None, source=None, namespaces=None, **_kwargs): """ Helper method for generating and updating validation errors. Incompatible with 'skip' validation mode. Il validation mode is 'lax' returns the error, otherwise raises the error. :param validation: an error-compatible validation mode: can be 'lax' or 'strict'. :param error: an error instance or the detailed reason of failed validation. :param obj: the instance related to the error. :param source: the XML resource related to the validation process. :param namespaces: is an optional mapping from namespace prefix to URI. :param _kwargs: keyword arguments of the validation process that are not used. """ if not isinstance(error, XMLSchemaValidationError): error = XMLSchemaValidationError(self, obj, error, source, namespaces) else: if error.obj is None and obj is not None: error.obj = obj if error.namespaces is None and namespaces is not None: error.namespaces = namespaces if error.elem is None and is_etree_element(obj): error.elem = obj if error.source is None and source is not None: error.source = source if validation == 'lax': return error elif validation == 'strict': raise error elif validation == 'skip': raise XMLSchemaValueError( "validation mode 'skip' incompatible with error generation.") else: raise XMLSchemaValueError("unknown validation mode %r" % validation)
def __setitem__(self, key, value): if key is None: assert isinstance( value, XsdAnyAttribute), 'An XsdAnyAttribute instance is required.' self._attribute_group[key] = value else: assert isinstance( value, XsdAttribute), 'An XsdAttribute instance is required.' if key[0] != '{': if value.local_name != key: raise XMLSchemaValueError("%r name and key %r mismatch." % (value.name, key)) if value.target_namespace != self.target_namespace: # Qualify attributes of other namespaces key = value.qualified_name elif value.qualified_name != key: raise XMLSchemaValueError("%r name and key %r mismatch." % (value.name, key)) self._attribute_group[key] = value
def parse_error(self, error, elem=None): """ Helper method for registering parse errors. Does nothing if validation mode is 'skip'. Il validation mode is 'lax' collects the error, otherwise raise the error. :param error: can be a parse error or an error message. :param elem: the Element instance related to the error, for default uses the 'elem' \ attribute of the validator, if it's present. """ if self.validation == 'skip': return if is_etree_element(elem): pass elif elem is None: elem = getattr(self, 'elem', None) else: raise XMLSchemaValueError( "'elem' argument must be an Element instance, not %r." % elem) if isinstance(error, XMLSchemaParseError): error.validator = self error.namespaces = getattr(self, 'namespaces', None) error.elem = elem error.source = getattr(self, 'source', None) elif isinstance(error, Exception): error = XMLSchemaParseError(self, unicode_type(error).strip('\'" '), elem) elif isinstance(error, string_base_type): error = XMLSchemaParseError(self, error, elem) else: raise XMLSchemaValueError( "'error' argument must be an exception or a string, not %r." % error) if self.validation == 'lax': self.errors.append(error) else: raise error
def __setattr__(self, name, value): if name == "elem": if not is_etree_element(value): raise XMLSchemaTypeError( "%r attribute must be an Etree Element: %r" % (name, value)) elif value.tag not in self._admitted_tags: raise XMLSchemaValueError( "wrong XSD element %r for %r, must be one of %r." % (local_name(value.tag), self, [local_name(tag) for tag in self._admitted_tags])) super(XsdComponent, self).__setattr__(name, value) self._parse() return elif name == "schema": if hasattr( self, 'schema' ) and self.schema.target_namespace != value.target_namespace: raise XMLSchemaValueError( "cannot change 'schema' attribute of %r: the actual %r has a different " "target namespace than %r." % (self, self.schema, value)) super(XsdComponent, self).__setattr__(name, value)
def discard(self, s): for part in self._re_char_group.split(s): if part in CHARACTER_ESCAPES: value = CHARACTER_ESCAPES[part] if isinstance(value, string_base_type): self.positive.difference_update(value) elif part[-1].islower(): self.positive -= value else: self.negative -= value elif part.startswith('\\p'): if self._re_unicode_ref.search(part) is None: raise XMLSchemaValueError( "wrong Unicode subset specification %r" % part) self.positive -= get_unicode_subset(part[3:-1]) elif part.startswith('\\P'): if self._re_unicode_ref.search(part) is None: raise XMLSchemaValueError( "wrong Unicode subset specification %r" % part) self.negative -= get_unicode_subset(part[3:-1]) else: self.positive.difference_update(part)
def retrieve_schema_source(self, source): """ Returns a schema source that can be used to create an XMLSchema instance. :param source: A string or an ElementTree's Element. :return: An schema source string, an ElementTree's Element or a full pathname. """ if is_etree_element(source): if source.tag in (XSD_SCHEMA, 'schema'): return source elif get_namespace(source.tag): raise XMLSchemaValueError( "source %r namespace has to be empty." % source) elif source.tag not in { 'element', 'attribute', 'simpleType', 'complexType', 'group', 'attributeGroup', 'notation' }: raise XMLSchemaValueError( "% is not an XSD global definition/declaration." % source) root = etree_element('schema', attrib={ 'xmlns:ns': "ns", 'xmlns': "http://www.w3.org/2001/XMLSchema", 'targetNamespace': "ns", 'elementFormDefault': "qualified", 'version': self.schema_class.XSD_VERSION, }) root.append(source) return root else: source = source.strip() if not source.startswith('<'): return self.casepath(source) else: return self.SCHEMA_TEMPLATE.format( self.schema_class.XSD_VERSION, source)
def get_xsd_form_attribute(elem, attribute): """ Get an XSD form attribute, checking the value. If the attribute is missing returns `None` :param elem: the Element instance. :param attribute: the attribute name (maybe 'form', or 'elementFormDefault' or 'attributeFormDefault'). :return: a string. """ value = elem.get(attribute) if value is None: return elif value not in ('qualified', 'unqualified'): raise XMLSchemaValueError( "wrong value %r for attribute %r, it must be 'qualified' or 'unqualified'." % (value, attribute)) return value
def lookup(self, tag, qname): if tag in (XSD_SIMPLE_TYPE, XSD_COMPLEX_TYPE): return self.lookup_type(qname) elif tag == XSD_ELEMENT: return self.lookup_element(qname) elif tag == XSD_GROUP: return self.lookup_group(qname) elif tag == XSD_ATTRIBUTE: return self.lookup_attribute(qname) elif tag == XSD_ATTRIBUTE_GROUP: return self.lookup_attribute_group(qname) elif tag == XSD_NOTATION: return self.lookup_notation(qname) else: raise XMLSchemaValueError( "wrong tag {!r} for an XSD global definition/declaration". format(tag))
def update_occurs(self, counter): """ Update group occurrences. :param counter: a Counter object that trace occurrences for elements and groups. """ if self.model in ('sequence', 'all'): if all(counter[item] for item in self if not item.is_emptiable()): counter[self] += 1 for item in self: counter[item] = 0 elif self.model == 'choice': if any(counter[item] for item in self): counter[self] += 1 for item in self: counter[item] = 0 else: raise XMLSchemaValueError("the group %r has no model!" % self)
def __setattr__(self, name, value): if name == 'elem' and value is not None and not is_etree_element( value): raise XMLSchemaValueError( "'elem' attribute requires an Element, not %r." % type(value)) super(XMLSchemaValidatorError, self).__setattr__(name, value) # Calculate and set the element's path: have to be calculated asap because is the # XML resource is lazy the intermediate nodes could be deleted. if name in ('elem', 'source'): elem, root = self.elem, self.root if not is_etree_element(elem) or not is_etree_element(root): self.path = None else: self.path = etree_getpath(elem, root, self.namespaces, relative=False, add_position=True)