def clean_dte_xml_file(input_file_path: str,
                       output_file_path: str) -> Iterable[bytes]:
    with open(input_file_path, mode='rb') as f:
        file_bytes = f.read()

    xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

    xml_doc_cleaned, modified = cl_sii.dte.parse.clean_dte_xml(
        xml_doc,
        set_missing_xmlns=True,
        remove_doc_personalizado=True,
    )

    # TODO: add exception with a nice message for the caller.
    cl_sii.dte.parse.validate_dte_xml(xml_doc_cleaned)

    with open(output_file_path, 'w+b') as f:
        xml_utils.write_xml_doc(xml_doc_cleaned, f)

    with open(output_file_path, mode='rb') as f:
        file_bytes_rewritten = f.read()

    # note: another way to compute the difference in a similar format is
    #   `diff -Naur $input_file_path $output_file_path`
    file_bytes_diff_gen = difflib.diff_bytes(
        dfunc=difflib.unified_diff,
        a=file_bytes.splitlines(),
        b=file_bytes_rewritten.splitlines())

    return file_bytes_diff_gen
Пример #2
0
def _set_dte_xml_missing_xmlns(xml_doc: XmlElement) -> Tuple[XmlElement, bool]:

    # source: name of the XML element without namespace.
    #   cl_sii/data/ref/factura_electronica/schemas-xml/DTE_v10.xsd#L22 (f57a326)
    #   cl_sii/data/ref/factura_electronica/schemas-xml/EnvioDTE_v10.xsd#L92 (f57a326)
    em_tag_simple = 'DTE'

    em_namespace = DTE_XMLNS
    em_tag_namespaced = '{%s}%s' % (em_namespace, em_tag_simple)

    # Tag of 'DTE' should be ...
    assert em_tag_namespaced == '{http://www.sii.cl/SiiDte}DTE'

    modified = False

    root_em = xml_doc.getroottree().getroot()
    root_em_tag = root_em.tag

    if root_em_tag == em_tag_namespaced:
        pass
    elif root_em_tag == em_tag_simple:
        modified = True
        root_em.set('xmlns', em_namespace)
        f = io.BytesIO()
        xml_utils.write_xml_doc(xml_doc, f)
        new_xml_doc_bytes = f.getvalue()
        xml_doc = xml_utils.parse_untrusted_xml(new_xml_doc_bytes)
    else:
        exc_msg = "XML root element tag does not match the expected simple or namespaced name."
        raise Exception(exc_msg, em_tag_simple, em_tag_namespaced, root_em_tag)

    return xml_doc, modified
Пример #3
0
    def test_clean_dte_xml_ok_3(self) -> None:
        file_bytes = self.dte_bad_xml_3_xml_bytes
        xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

        self.assertEqual(xml_doc.getroottree().getroot().tag, 'DTE')

        with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm:
            validate_dte_xml(xml_doc)
        self.assertSequenceEqual(cm.exception.args, (
            "Element 'DTE': No matching global declaration available for the validation root., "
            "line 2", ))

        xml_doc_cleaned, modified = clean_dte_xml(
            xml_doc,
            set_missing_xmlns=True,
            remove_doc_personalizado=True,
        )
        self.assertTrue(modified)

        # This will not raise.
        validate_dte_xml(xml_doc_cleaned)

        f = io.BytesIO()
        xml_utils.write_xml_doc(xml_doc_cleaned, f)
        file_bytes_rewritten = f.getvalue()
        del f

        xml_doc_rewritten = xml_utils.parse_untrusted_xml(file_bytes_rewritten)
        validate_dte_xml(xml_doc_rewritten)

        expected_file_bytes_diff = (
            b'--- \n',
            b'+++ \n',
            b'@@ -1,5 +1,5 @@\n',
            b'-<?xml version="1.0" encoding="windows-1252"?>',
            b'-<DTE version="1.0">',
            b"+<?xml version='1.0' encoding='WINDOWS-1252'?>",
            b'+<DTE xmlns="http://www.sii.cl/SiiDte" version="1.0">',
            b' <Documento ID="DTE-33-2336600">',
            b' <Encabezado>',
            b' <IdDoc>',
        )

        file_bytes_diff_gen = difflib.diff_bytes(
            dfunc=difflib.unified_diff,
            a=file_bytes.splitlines(),
            b=file_bytes_rewritten.splitlines())
        self.assertSequenceEqual(
            [diff_line for diff_line in file_bytes_diff_gen],
            expected_file_bytes_diff)
    def test_clean_dte_xml_ok_2(self) -> None:
        file_bytes = self.dte_bad_xml_2_xml_bytes
        xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

        self.assertEqual(
            xml_doc.getroottree().getroot().tag,
            'DTE')

        with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm:
            validate_dte_xml(xml_doc)
        self.assertSequenceEqual(
            cm.exception.args,
            ("Element 'DTE': No matching global declaration available for the validation root., "
             "line 2", )
        )

        xml_doc_cleaned, modified = clean_dte_xml(
            xml_doc,
            set_missing_xmlns=True,
            remove_doc_personalizado=True,
        )
        self.assertTrue(modified)

        # This will not raise.
        validate_dte_xml(xml_doc_cleaned)

        f = io.BytesIO()
        xml_utils.write_xml_doc(xml_doc_cleaned, f)
        file_bytes_rewritten = f.getvalue()
        del f

        xml_doc_rewritten = xml_utils.parse_untrusted_xml(file_bytes_rewritten)
        validate_dte_xml(xml_doc_rewritten)

        expected_file_bytes_diff = (
            b'--- \n',
            b'+++ \n',
            b'@@ -1,5 +1,5 @@\n',
            b'-<?xml version="1.0" encoding="ISO-8859-1"?>',
            b'-<DTE version="1.0">',
            b"+<?xml version='1.0' encoding='ISO-8859-1'?>",
            b'+<DTE xmlns="http://www.sii.cl/SiiDte" version="1.0">',
            b'   <!-- O Win32 Chrome 73 central VERSION: v20190227 -->',
            b' <Documento ID="MiPE76399752-6048">',
            b'     <Encabezado>',
            b'@@ -64,13 +64,13 @@\n',
            b'   </Documento>',
            b' <Signature xmlns="http://www.w3.org/2000/09/xmldsig#">',
            b' <SignedInfo>',
            b'-<CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315" />',  # noqa: E501
            b'-<SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#rsa-sha1" />',
            b'+<CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>',  # noqa: E501
            b'+<SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#rsa-sha1"/>',
            b' <Reference URI="#MiPE76399752-6048">',
            b' <Transforms>',
            b'-<Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315" />',
            b'+<Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>',
            b' </Transforms>',
            b'-<DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1" />',
            b'+<DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1"/>',
            b' <DigestValue>tk/D3mfO/KtdWyFXYZHe7dtYijg=</DigestValue>',
            b' </Reference>',
            b' </SignedInfo>',
        )

        file_bytes_diff_gen = difflib.diff_bytes(
            dfunc=difflib.unified_diff,
            a=file_bytes.splitlines(),
            b=file_bytes_rewritten.splitlines())
        self.assertSequenceEqual(
            [diff_line for diff_line in file_bytes_diff_gen],
            expected_file_bytes_diff
        )