def from_xml(self, source, validation='strict', **kwargs): """ Load XML data. Data is validated against the schema. :param source: a filepath to an XML file or a string containing XML data. :param validation: validation mode, can be 'strict', 'lax' or 'skip'. :param kwargs: other options for creating the :class:`xmlschema.XMLResource` \ instance used for reading the XML data. :return: a couple with the root element of the XML ElementTree a list \ containing the detected errors. """ if not isinstance(source, xmlschema.XMLResource): source = xmlschema.XMLResource(source, **kwargs) errors = [] if validation == 'strict': self.schema.validate(source) elif validation == 'lax': errors.extend(e for e in self.schema.iter_errors(source)) self.root = source.root self.errors = errors self._namespaces = source.get_namespaces() if source.url is None: self.filename = None self.format = None else: self.filename = removeprefix(source.url, 'file://') self.format = 'xml'
def lazy_decode(source, repeat=1): decoder = xmlschema.XMLSchema.meta_schema if source.endswith( '.xsd') else xmlschema for _ in range(repeat): for result in decoder.to_dict(xmlschema.XMLResource(source, lazy=True), path='*'): del result
def get_schema( source: Union[xmlschema.XMLResource, str]) -> xmlschema.XMLSchema: """Fetch an XMLSchema object given XML source. Parameters ---------- source : XMLResource or str can be an :class:`xmlschema.XMLResource` instance, a file-like object, a path to a file or an URI of a resource or an Element instance or an ElementTree instance or a string containing the XML data. Returns ------- xmlschema.XMLSchema An XMLSchema object for the source """ if not isinstance(source, xmlschema.XMLResource): source = xmlschema.XMLResource(source) for ns, uri in source.get_locations(): try: return _build_schema(ns, uri) except XMLSchemaParseError: pass raise XMLSchemaValueError( f"Could not find a schema for XML resource {source!r}.")
def lazy_validate(source): if source.endswith('.xsd'): validator, path = xmlschema.XMLSchema.meta_schema, '*' else: validator, path = xmlschema, None return validator.validate(xmlschema.XMLResource(source, lazy=True), path=path)
def lazy_validate(source, repeat=1): if source.endswith('.xsd'): validator, path = xmlschema.XMLSchema.meta_schema, '*' else: validator, path = xmlschema, None for _ in range(repeat): validator.validate(xmlschema.XMLResource(source, lazy=True), path=path)
def convert_to_text(xml_file, schema=False): if schema: tree = xmlschema.XMLResource(xml_file) string = tree.tostring(indent=' ', spaces_for_tab=2) else: tree = et.parse(xml_file) string = et.tostring(tree, encoding='utf-8').decode('utf-8') return string
def to_xml_str(self) -> str: """Convert the instance to XML string""" xs = xmlschema.XMLSchema(PATH_TO_XML_SCHEMA_FOR_LOGGING) json_text = json.dumps(self.to_dict_xml(), indent=2) et = xmlschema.from_json(json_text, xs) xml_str = xmlschema.etree_tostring(et) namespace = xmlschema.XMLResource(PATH_TO_XML_SCHEMA_FOR_LOGGING).get_namespaces()[''] xml_str = xml_str.replace('xmlns="%s"' % namespace, '') return xml_str
def _parse(self, answer): resource = xmlschema.XMLResource(answer) parsed = self.answer_schema.to_dict(resource) assert 'Answer' in parsed assert isinstance(parsed['Answer'], list) result = {} for qanda in parsed['Answer']: assert 'QuestionIdentifier' in qanda if 'FreeText' in qanda: answer = qanda['FreeText'] else: answer = dict(qanda) del answer['QuestionIdentifier']# remove dupe info result[qanda['QuestionIdentifier']] = answer return result
def from_xml(self, source, validation='strict', **kwargs): """ Load XML data. Data is validated against the schema. :param source: a filepath to an XML file or a string containing XML data. :param validation: validation mode, can be 'strict', 'lax' or 'skip'. :param kwargs: other options for creating the :class:`xmlschema.XMLResource` \ instance used for reading the XML data. :return: a couple with the root element of the XML ElementTree a list \ containing the detected errors. """ if not isinstance(source, str): raise TypeError("the source argument must be a string!") elif '\n' not in source and not source.strip().startswith('<'): root = ElementTree.parse(source).getroot() filename = source.strip() else: root = ElementTree.XML(source) filename = None resource = xmlschema.XMLResource(source, **kwargs) schema_names = [ os.path.basename(location) for ns, location in resource.iter_location_hints() if ns == resource.namespace ] if not schema_names or self.schema.url is None or \ any(self.schema.url.endswith(x) for x in schema_names): pass elif '\n' in source: logger.warning("XML data seems built for schema {!r}".format( schema_names[0])) else: logger.warning("XML data {!r} seems built for schema {!r}".format( source, schema_names[0])) errors = [] if validation == 'strict': self.schema.validate(source) elif validation == 'lax': errors.extend(e for e in self.schema.iter_errors(source)) self.root = root self.errors = errors self.filename = filename self.format = 'xml' if filename else None self._namespaces = resource.get_namespaces()
def __call__(self, xosc: Path) -> bool: try: self.schema.validate(xmlschema.XMLResource(str(xosc))) if self.verbose: print("[OK] " + str(xosc)) return True except Exception as exception: print("[NG] " + str(xosc)) print() print("Error: " + str(exception)) return False
def __init__(self, source=None, schema=None): self.root = None self.filename = None self.format = None self.errors = [] self._namespaces = {} if source is None and schema is None: raise XmlDocumentError("missing both initialization arguments!") elif source is not None: resource = xmlschema.XMLResource(source) if resource.namespace == XSD_NAMESPACE: alt_schema = source source = None else: for ns, location in resource.iter_location_hints(): if ns != resource.namespace: continue location = self.fetch_schema(location) if location is not None: alt_schema = location break else: alt_schema = None if schema is None: if alt_schema is None: raise XmlDocumentError("missing schema for XML data!") schema = alt_schema elif alt_schema is None or not isinstance(schema, str): pass elif '\n' not in schema and not schema.strip().startswith('<') \ and os.path.basename(schema) != os.path.basename(alt_schema): schema = alt_schema if isinstance(schema, xmlschema.XMLSchemaBase): self.schema = schema elif not isinstance(schema, str) or '\n' in schema or \ schema.strip().startswith('<'): self.schema = xmlschema.XMLSchema(schema) else: schema = self.fetch_schema(schema) or schema self.schema = xmlschema.XMLSchema(schema) if source is not None: self.from_xml(source, validation='lax')
def __init__(self, source=None, schema=None): self.root = None self.filename = None self.format = None self.errors = [] self._namespaces = {} if source is None: source_schema = None else: if not isinstance(source, xmlschema.XMLResource): source = xmlschema.XMLResource(source) if source.namespace == XSD_NAMESPACE: raise XmlDocumentError("source is an XSD schema") for ns, location in source.iter_location_hints(): if ns == source.namespace: source_schema = self.fetch_schema(location) if source_schema is not None: break else: source_schema = None if isinstance(schema, xmlschema.XMLSchemaBase): self.schema = schema elif isinstance(schema, str) and '\n' not in schema \ and not schema.lstrip().startswith('<'): self.schema = xmlschema.XMLSchema( self.fetch_schema(schema) or schema) elif schema is not None: self.schema = xmlschema.XMLSchema(schema) elif source_schema is not None: self.schema = xmlschema.XMLSchema(source_schema) elif self.DEFAULT_SCHEMA is not None: default_schema = self.fetch_schema(self.DEFAULT_SCHEMA) self.schema = xmlschema.XMLSchema(default_schema) else: raise XmlDocumentError("missing schema for XML data!") if source is not None: self.from_xml(source, validation='lax')
def test_document_validate_api_lazy(self): source = xmlschema.XMLResource(self.col_xml_file, lazy=False) namespaces = source.get_namespaces() source.root[0].clear() # Drop internal elements source.root[1].clear() xsd_element = self.col_schema.elements['collection'] self.assertRaises(XMLSchemaValidationError, xsd_element.decode, source.root, namespaces=namespaces) for _ in xsd_element.iter_decode(source.root, 'strict', namespaces=namespaces, source=source, max_depth=1): del _ self.assertIsNone(xmlschema.validate(self.col_xml_file, lazy=True))
def from_xml_str(self, xml_source: str): """Imports logging configuration file Args: xml_source(str): File path or string content of XML file to import """ xs = xmlschema.XMLSchema(PATH_TO_XML_SCHEMA_FOR_LOGGING) if os.path.isfile(xml_source): with open(xml_source, 'rt') as file: xml_str = file.read() else: xml_str = xml_source namespace = xmlschema.XMLResource( PATH_TO_XML_SCHEMA_FOR_LOGGING).get_namespaces()[''] xml_str = xml_str.replace('<simulators', '<simulators xmlns="%s" ' % namespace) if xs.is_valid(xml_str): xml_dict = xs.to_dict(xml_str) self.from_dict_xml(xml_dict) else: xs.validate(xml_str)
def main(xsd_pth, xml_gz_pth, out_folder): logger.info(f'Load XML schema from {xsd_pth}') xs = xmlschema.XMLSchema(xsd_pth) logger.info(f'Read XML file {xml_gz_pth} ...') with gzip.open(xml_gz_pth, 'rt') as f: r = xmlschema.XMLResource(f, lazy=True) xml_iter = xs.iter_decode(r, path='*') for i, entry_d in enumerate(xml_iter, start=1): try: uniprot_acc = entry_d['accession'][0] except TypeError: logger.warning(f'Skip an element not an UniProt entry: {entry_d}') continue out_pth = Path(out_folder, f'{uniprot_acc}.json') with out_pth.open('wb') as of: of.write(orjson.dumps(entry_d)) if i % 1000 == 0: logger.info(f'... processed {i:,d} entries') logger.info(f'Total processed {i:,d} entries')
if len(missing) > 0: msg='''Required parts of the spatial element are either missing, misspelled, or not delimited properly. These elements could not be found: {}. The invalid element that was submitted was:\n {}.'''.format(''.join(missing),xmldict['spatial']) else: msg=None return msg problems={} noproblems=[] my_schema = xmls.XMLSchema(schemafile) for file in os.listdir(xmlfolder): if file[-4:]=='.xml': filepath=os.path.join(xmlfolder,file) my_xml = xmls.XMLResource(filepath) test=my_schema.is_valid(my_xml) if test==True: msg=spatial_check(my_xml) if msg==None: noproblems.append(file) else: problems[file]=msg else: try: my_schema.validate(my_xml) except xmls.XMLSchemaException as e: problems[file]=e continue if len(problems)>0: