예제 #1
0
파일: documents.py 프로젝트: QEF/qeschema
    def from_xml(self, source, validation='strict', **kwargs):
        """
        Load XML data. Data is validated against the schema.

        :param source: a filepath to an XML file or a string containing XML data.
        :param validation: validation mode, can be 'strict', 'lax' or 'skip'.
        :param kwargs: other options for creating the :class:`xmlschema.XMLResource` \
        instance used for reading the XML data.
        :return: a couple with the root element of the XML ElementTree a list \
        containing the detected errors.
        """
        if not isinstance(source, xmlschema.XMLResource):
            source = xmlschema.XMLResource(source, **kwargs)

        errors = []
        if validation == 'strict':
            self.schema.validate(source)
        elif validation == 'lax':
            errors.extend(e for e in self.schema.iter_errors(source))

        self.root = source.root
        self.errors = errors
        self._namespaces = source.get_namespaces()

        if source.url is None:
            self.filename = None
            self.format = None
        else:
            self.filename = removeprefix(source.url, 'file://')
            self.format = 'xml'
예제 #2
0
def lazy_decode(source, repeat=1):
    decoder = xmlschema.XMLSchema.meta_schema if source.endswith(
        '.xsd') else xmlschema
    for _ in range(repeat):
        for result in decoder.to_dict(xmlschema.XMLResource(source, lazy=True),
                                      path='*'):
            del result
예제 #3
0
def get_schema(
        source: Union[xmlschema.XMLResource, str]) -> xmlschema.XMLSchema:
    """Fetch an XMLSchema object given XML source.

    Parameters
    ----------
    source : XMLResource or str
        can be an :class:`xmlschema.XMLResource` instance, a file-like object, a path
        to a file or an URI of a resource or an Element instance or an ElementTree
        instance or a string containing the XML data.

    Returns
    -------
    xmlschema.XMLSchema
        An XMLSchema object for the source
    """
    if not isinstance(source, xmlschema.XMLResource):
        source = xmlschema.XMLResource(source)

    for ns, uri in source.get_locations():
        try:
            return _build_schema(ns, uri)
        except XMLSchemaParseError:
            pass
    raise XMLSchemaValueError(
        f"Could not find a schema for XML resource {source!r}.")
예제 #4
0
def lazy_validate(source):
    if source.endswith('.xsd'):
        validator, path = xmlschema.XMLSchema.meta_schema, '*'
    else:
        validator, path = xmlschema, None
    return validator.validate(xmlschema.XMLResource(source, lazy=True),
                              path=path)
예제 #5
0
def lazy_validate(source, repeat=1):
    if source.endswith('.xsd'):
        validator, path = xmlschema.XMLSchema.meta_schema, '*'
    else:
        validator, path = xmlschema, None

    for _ in range(repeat):
        validator.validate(xmlschema.XMLResource(source, lazy=True), path=path)
예제 #6
0
 def convert_to_text(xml_file, schema=False):
     if schema:
         tree = xmlschema.XMLResource(xml_file)
         string = tree.tostring(indent='  ', spaces_for_tab=2)
     else:
         tree = et.parse(xml_file)
         string = et.tostring(tree, encoding='utf-8').decode('utf-8')
     return string
예제 #7
0
    def to_xml_str(self) -> str:
        """Convert the instance to XML string"""
        xs = xmlschema.XMLSchema(PATH_TO_XML_SCHEMA_FOR_LOGGING)
        json_text = json.dumps(self.to_dict_xml(), indent=2)
        et = xmlschema.from_json(json_text, xs)

        xml_str = xmlschema.etree_tostring(et)
        namespace = xmlschema.XMLResource(PATH_TO_XML_SCHEMA_FOR_LOGGING).get_namespaces()['']
        xml_str = xml_str.replace('xmlns="%s"' % namespace, '')

        return xml_str
예제 #8
0
 def _parse(self, answer):
     resource = xmlschema.XMLResource(answer)
     parsed = self.answer_schema.to_dict(resource)
     assert 'Answer' in parsed
     assert isinstance(parsed['Answer'], list)
     result = {}
     for qanda in parsed['Answer']:
         assert 'QuestionIdentifier' in qanda
         if 'FreeText' in qanda:
             answer = qanda['FreeText']
         else:
             answer = dict(qanda)
             del answer['QuestionIdentifier']# remove dupe info
         result[qanda['QuestionIdentifier']] = answer
     return result
예제 #9
0
    def from_xml(self, source, validation='strict', **kwargs):
        """
        Load XML data. Data is validated against the schema.

        :param source: a filepath to an XML file or a string containing XML data.
        :param validation: validation mode, can be 'strict', 'lax' or 'skip'.
        :param kwargs: other options for creating the :class:`xmlschema.XMLResource` \
        instance used for reading the XML data.
        :return: a couple with the root element of the XML ElementTree a list \
        containing the detected errors.
        """
        if not isinstance(source, str):
            raise TypeError("the source argument must be a string!")
        elif '\n' not in source and not source.strip().startswith('<'):
            root = ElementTree.parse(source).getroot()
            filename = source.strip()
        else:
            root = ElementTree.XML(source)
            filename = None

        resource = xmlschema.XMLResource(source, **kwargs)
        schema_names = [
            os.path.basename(location)
            for ns, location in resource.iter_location_hints()
            if ns == resource.namespace
        ]
        if not schema_names or self.schema.url is None or \
                any(self.schema.url.endswith(x) for x in schema_names):
            pass
        elif '\n' in source:
            logger.warning("XML data seems built for schema {!r}".format(
                schema_names[0]))
        else:
            logger.warning("XML data {!r} seems built for schema {!r}".format(
                source, schema_names[0]))

        errors = []
        if validation == 'strict':
            self.schema.validate(source)
        elif validation == 'lax':
            errors.extend(e for e in self.schema.iter_errors(source))

        self.root = root
        self.errors = errors
        self.filename = filename
        self.format = 'xml' if filename else None
        self._namespaces = resource.get_namespaces()
예제 #10
0
    def __call__(self, xosc: Path) -> bool:

        try:
            self.schema.validate(xmlschema.XMLResource(str(xosc)))

            if self.verbose:
                print("[OK] " + str(xosc))

            return True

        except Exception as exception:

            print("[NG] " + str(xosc))
            print()
            print("Error: " + str(exception))

            return False
예제 #11
0
    def __init__(self, source=None, schema=None):
        self.root = None
        self.filename = None
        self.format = None
        self.errors = []
        self._namespaces = {}

        if source is None and schema is None:
            raise XmlDocumentError("missing both initialization arguments!")
        elif source is not None:
            resource = xmlschema.XMLResource(source)
            if resource.namespace == XSD_NAMESPACE:
                alt_schema = source
                source = None
            else:
                for ns, location in resource.iter_location_hints():
                    if ns != resource.namespace:
                        continue
                    location = self.fetch_schema(location)
                    if location is not None:
                        alt_schema = location
                        break
                else:
                    alt_schema = None

            if schema is None:
                if alt_schema is None:
                    raise XmlDocumentError("missing schema for XML data!")
                schema = alt_schema
            elif alt_schema is None or not isinstance(schema, str):
                pass
            elif '\n' not in schema and not schema.strip().startswith('<') \
                    and os.path.basename(schema) != os.path.basename(alt_schema):
                schema = alt_schema

        if isinstance(schema, xmlschema.XMLSchemaBase):
            self.schema = schema
        elif not isinstance(schema, str) or '\n' in schema or \
                schema.strip().startswith('<'):
            self.schema = xmlschema.XMLSchema(schema)
        else:
            schema = self.fetch_schema(schema) or schema
            self.schema = xmlschema.XMLSchema(schema)

        if source is not None:
            self.from_xml(source, validation='lax')
예제 #12
0
파일: documents.py 프로젝트: QEF/qeschema
    def __init__(self, source=None, schema=None):
        self.root = None
        self.filename = None
        self.format = None
        self.errors = []
        self._namespaces = {}

        if source is None:
            source_schema = None
        else:
            if not isinstance(source, xmlschema.XMLResource):
                source = xmlschema.XMLResource(source)

            if source.namespace == XSD_NAMESPACE:
                raise XmlDocumentError("source is an XSD schema")

            for ns, location in source.iter_location_hints():
                if ns == source.namespace:
                    source_schema = self.fetch_schema(location)
                    if source_schema is not None:
                        break
            else:
                source_schema = None

        if isinstance(schema, xmlschema.XMLSchemaBase):
            self.schema = schema
        elif isinstance(schema, str) and '\n' not in schema \
                and not schema.lstrip().startswith('<'):
            self.schema = xmlschema.XMLSchema(
                self.fetch_schema(schema) or schema)
        elif schema is not None:
            self.schema = xmlschema.XMLSchema(schema)
        elif source_schema is not None:
            self.schema = xmlschema.XMLSchema(source_schema)
        elif self.DEFAULT_SCHEMA is not None:
            default_schema = self.fetch_schema(self.DEFAULT_SCHEMA)
            self.schema = xmlschema.XMLSchema(default_schema)
        else:
            raise XmlDocumentError("missing schema for XML data!")

        if source is not None:
            self.from_xml(source, validation='lax')
예제 #13
0
    def test_document_validate_api_lazy(self):
        source = xmlschema.XMLResource(self.col_xml_file, lazy=False)
        namespaces = source.get_namespaces()
        source.root[0].clear()  # Drop internal elements
        source.root[1].clear()
        xsd_element = self.col_schema.elements['collection']

        self.assertRaises(XMLSchemaValidationError,
                          xsd_element.decode,
                          source.root,
                          namespaces=namespaces)

        for _ in xsd_element.iter_decode(source.root,
                                         'strict',
                                         namespaces=namespaces,
                                         source=source,
                                         max_depth=1):
            del _

        self.assertIsNone(xmlschema.validate(self.col_xml_file, lazy=True))
예제 #14
0
    def from_xml_str(self, xml_source: str):
        """Imports logging configuration file

        Args:
            xml_source(str): File path or string content of XML file to import
        """
        xs = xmlschema.XMLSchema(PATH_TO_XML_SCHEMA_FOR_LOGGING)
        if os.path.isfile(xml_source):
            with open(xml_source, 'rt') as file:
                xml_str = file.read()
        else:
            xml_str = xml_source
        namespace = xmlschema.XMLResource(
            PATH_TO_XML_SCHEMA_FOR_LOGGING).get_namespaces()['']
        xml_str = xml_str.replace('<simulators', '<simulators xmlns="%s" ' % namespace)
        if xs.is_valid(xml_str):
            xml_dict = xs.to_dict(xml_str)
            self.from_dict_xml(xml_dict)
        else:
            xs.validate(xml_str)
def main(xsd_pth, xml_gz_pth, out_folder):
    logger.info(f'Load XML schema from {xsd_pth}')
    xs = xmlschema.XMLSchema(xsd_pth)

    logger.info(f'Read XML file {xml_gz_pth} ...')
    with gzip.open(xml_gz_pth, 'rt') as f:
        r = xmlschema.XMLResource(f, lazy=True)
        xml_iter = xs.iter_decode(r, path='*')
        for i, entry_d in enumerate(xml_iter, start=1):
            try:
                uniprot_acc = entry_d['accession'][0]
            except TypeError:
                logger.warning(f'Skip an element not an UniProt entry: {entry_d}')
                continue

            out_pth = Path(out_folder, f'{uniprot_acc}.json')
            with out_pth.open('wb') as of:
                of.write(orjson.dumps(entry_d))

            if i % 1000 == 0:
                logger.info(f'... processed {i:,d} entries')

    logger.info(f'Total processed {i:,d} entries')
예제 #16
0
    if len(missing) > 0:
        msg='''Required parts of the spatial element are either missing, misspelled, 
        or not delimited properly. These elements could not be found: {}. The 
        invalid element that was submitted was:\n {}.'''.format(''.join(missing),xmldict['spatial'])
    else:
        msg=None
    return msg
    
problems={}
noproblems=[]
my_schema = xmls.XMLSchema(schemafile)

for file in os.listdir(xmlfolder):
    if file[-4:]=='.xml':
        filepath=os.path.join(xmlfolder,file)
        my_xml = xmls.XMLResource(filepath)
        test=my_schema.is_valid(my_xml)
        if test==True:
            msg=spatial_check(my_xml)
            if msg==None:
                noproblems.append(file)
            else:
                problems[file]=msg
        else:
            try:
                my_schema.validate(my_xml)
            except xmls.XMLSchemaException as e:
                problems[file]=e
                continue
            
if len(problems)>0: