def test_type_error(self) -> None:
        value = 1  # type: ignore
        with self.assertRaises(TypeError) as cm:
            parse_untrusted_xml(value)

        self.assertSequenceEqual(
            cm.exception.args, ("Value to be parsed as XML must be bytes.", ))
    def test_attack_quadratic_blowup(self) -> None:
        value = read_test_file_bytes(
            'test_data/xml/attacks/quadratic-blowup-entity-expansion.xml')
        with self.assertRaises(XmlFeatureForbidden) as cm:
            parse_untrusted_xml(value)

        self.assertSequenceEqual(
            cm.exception.args, ("XML uses or contains a forbidden feature.", ))
    def test_attack_external_entity_expansion_remote(self) -> None:
        value = read_test_file_bytes(
            'test_data/xml/attacks/external-entity-expansion-remote.xml')
        with self.assertRaises(XmlFeatureForbidden) as cm:
            parse_untrusted_xml(value)

        self.assertSequenceEqual(
            cm.exception.args, ("XML uses or contains a forbidden feature.", ))
    def test_bytes_text(self) -> None:
        value = b'not xml'  # type: ignore
        with self.assertRaises(XmlSyntaxError) as cm:
            parse_untrusted_xml(value)

        self.assertSequenceEqual(cm.exception.args, (
            "XML syntax error. Start tag expected, '<' not found, line 1, column 1.",
        ))
    def test_attack_billion_laughs_2(self) -> None:
        value = read_test_file_bytes(
            'test_data/xml/attacks/billion-laughs-2.xml')
        with self.assertRaises(XmlSyntaxError) as cm:
            parse_untrusted_xml(value)

        self.assertSequenceEqual(cm.exception.args, (
            "XML syntax error. Detected an entity reference loop, line 1, column 4.",
        ))
    def test_clean_dte_xml_ok_3(self) -> None:
        file_bytes = self.dte_bad_xml_3_xml_bytes
        xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

        self.assertEqual(xml_doc.getroottree().getroot().tag, 'DTE')

        with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm:
            validate_dte_xml(xml_doc)
        self.assertSequenceEqual(cm.exception.args, (
            "Element 'DTE': No matching global declaration available for the validation root., "
            "line 2", ))

        xml_doc_cleaned, modified = clean_dte_xml(
            xml_doc,
            set_missing_xmlns=True,
            remove_doc_personalizado=True,
        )
        self.assertTrue(modified)

        # This will not raise.
        validate_dte_xml(xml_doc_cleaned)

        f = io.BytesIO()
        xml_utils.write_xml_doc(xml_doc_cleaned, f)
        file_bytes_rewritten = f.getvalue()
        del f

        xml_doc_rewritten = xml_utils.parse_untrusted_xml(file_bytes_rewritten)
        validate_dte_xml(xml_doc_rewritten)

        expected_file_bytes_diff = (
            b'--- \n',
            b'+++ \n',
            b'@@ -1,5 +1,5 @@\n',
            b'-<?xml version="1.0" encoding="windows-1252"?>',
            b'-<DTE version="1.0">',
            b"+<?xml version='1.0' encoding='WINDOWS-1252'?>",
            b'+<DTE xmlns="http://www.sii.cl/SiiDte" version="1.0">',
            b' <Documento ID="DTE-33-2336600">',
            b' <Encabezado>',
            b' <IdDoc>',
        )

        file_bytes_diff_gen = difflib.diff_bytes(
            dfunc=difflib.unified_diff,
            a=file_bytes.splitlines(),
            b=file_bytes_rewritten.splitlines())
        self.assertSequenceEqual(
            [diff_line for diff_line in file_bytes_diff_gen],
            expected_file_bytes_diff)
    def test_validate_dte_xml_ok_dte_2(self) -> None:
        xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_2_xml_bytes)
        validate_dte_xml(xml_doc)

        self.assertEqual(
            xml_doc.getroottree().getroot().tag,
            '{%s}DTE' % DTE_XMLNS)
    def test_parse_dte_xml_ok_2(self) -> None:
        xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_2_xml_bytes)

        parsed_dte = parse_dte_xml(xml_doc)
        self.assertDictEqual(
            dict(parsed_dte.as_dict()),
            dict(
                emisor_rut=Rut('76399752-9'),
                tipo_dte=cl_sii.dte.constants.TipoDteEnum.FACTURA_ELECTRONICA,
                folio=25568,
                fecha_emision_date=date(2019, 3, 29),
                receptor_rut=Rut('96874030-K'),
                monto_total=230992,
                emisor_razon_social='COMERCIALIZADORA INNOVA MOBEL SPA',
                receptor_razon_social='EMPRESAS LA POLAR S.A.',
                fecha_vencimiento_date=None,
                firma_documento_dt=tz_utils.convert_naive_dt_to_tz_aware(
                    dt=datetime(2019, 3, 28, 13, 59, 52),
                    tz=DteDataL2.DATETIME_FIELDS_TZ),
                signature_value=self._TEST_DTE_2_SIGNATURE_VALUE,
                signature_x509_cert_der=self.dte_clean_xml_2_cert_der,
                emisor_giro='COMERCIALIZACION DE PRODUCTOS PARA EL HOGAR',
                emisor_email='*****@*****.**',
                receptor_email=None,
            ))
    def test_parse_dte_xml_ok_1b(self) -> None:
        xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_1b_xml_bytes)

        parsed_dte = parse_dte_xml(xml_doc)
        self.assertDictEqual(
            dict(parsed_dte.as_dict()),
            dict(
                emisor_rut=Rut('76354771-K'),
                tipo_dte=cl_sii.dte.constants.TipoDteEnum.FACTURA_ELECTRONICA,
                folio=170,
                fecha_emision_date=date(2019, 4, 1),
                receptor_rut=Rut('96790240-3'),
                monto_total=2996301,
                emisor_razon_social='INGENIERIA ENACON SPA',
                receptor_razon_social='MINERA LOS PELAMBRES',
                fecha_vencimiento_date=None,
                firma_documento_dt=tz_utils.convert_naive_dt_to_tz_aware(
                    dt=datetime(2019, 4, 1, 1, 36, 40),
                    tz=DteDataL2.DATETIME_FIELDS_TZ),
                signature_value=self._TEST_DTE_1_SIGNATURE_VALUE,
                signature_x509_cert_der=self.dte_clean_xml_1_cert_der,
                emisor_giro='Ingenieria y Construccion',
                emisor_email=None,
                receptor_email=None,
            ))
示例#10
0
def _set_dte_xml_missing_xmlns(xml_doc: XmlElement) -> Tuple[XmlElement, bool]:

    # source: name of the XML element without namespace.
    #   cl_sii/data/ref/factura_electronica/schemas-xml/DTE_v10.xsd#L22 (f57a326)
    #   cl_sii/data/ref/factura_electronica/schemas-xml/EnvioDTE_v10.xsd#L92 (f57a326)
    em_tag_simple = 'DTE'

    em_namespace = DTE_XMLNS
    em_tag_namespaced = '{%s}%s' % (em_namespace, em_tag_simple)

    # Tag of 'DTE' should be ...
    assert em_tag_namespaced == '{http://www.sii.cl/SiiDte}DTE'

    modified = False

    root_em = xml_doc.getroottree().getroot()
    root_em_tag = root_em.tag

    if root_em_tag == em_tag_namespaced:
        pass
    elif root_em_tag == em_tag_simple:
        modified = True
        root_em.set('xmlns', em_namespace)
        f = io.BytesIO()
        xml_utils.write_xml_doc(xml_doc, f)
        new_xml_doc_bytes = f.getvalue()
        xml_doc = xml_utils.parse_untrusted_xml(new_xml_doc_bytes)
    else:
        exc_msg = "XML root element tag does not match the expected simple or namespaced name."
        raise Exception(exc_msg, em_tag_simple, em_tag_namespaced, root_em_tag)

    return xml_doc, modified
def clean_dte_xml_file(input_file_path: str,
                       output_file_path: str) -> Iterable[bytes]:
    with open(input_file_path, mode='rb') as f:
        file_bytes = f.read()

    xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

    xml_doc_cleaned, modified = cl_sii.dte.parse.clean_dte_xml(
        xml_doc,
        set_missing_xmlns=True,
        remove_doc_personalizado=True,
    )

    # TODO: add exception with a nice message for the caller.
    cl_sii.dte.parse.validate_dte_xml(xml_doc_cleaned)

    with open(output_file_path, 'w+b') as f:
        xml_utils.write_xml_doc(xml_doc_cleaned, f)

    with open(output_file_path, mode='rb') as f:
        file_bytes_rewritten = f.read()

    # note: another way to compute the difference in a similar format is
    #   `diff -Naur $input_file_path $output_file_path`
    file_bytes_diff_gen = difflib.diff_bytes(
        dfunc=difflib.unified_diff,
        a=file_bytes.splitlines(),
        b=file_bytes_rewritten.splitlines())

    return file_bytes_diff_gen
示例#12
0
    def test_parse_dte_xml_ok_3(self) -> None:
        xml_doc = xml_utils.parse_untrusted_xml(self.dte_clean_xml_3_xml_bytes)

        parsed_dte = parse_dte_xml(xml_doc)
        self.assertDictEqual(
            dict(parsed_dte.as_dict()),
            dict(
                emisor_rut=Rut('60910000-1'),
                tipo_dte=cl_sii.dte.constants.TipoDteEnum.FACTURA_ELECTRONICA,
                folio=2336600,
                fecha_emision_date=date(2019, 8, 8),
                receptor_rut=Rut('76555835-2'),
                monto_total=10642,
                emisor_razon_social='Universidad de Chile',
                receptor_razon_social='FYNPAL SPA',
                fecha_vencimiento_date=date(2019, 8, 8),
                firma_documento_dt=tz_utils.convert_naive_dt_to_tz_aware(
                    dt=datetime(2019, 8, 9, 9, 41, 9),
                    tz=DteDataL2.DATETIME_FIELDS_TZ),
                signature_value=self._TEST_DTE_3_SIGNATURE_VALUE,
                signature_x509_cert_der=self.dte_clean_xml_3_cert_der,
                emisor_giro=
                'Corporación Educacional y Servicios                 Profesionales',
                emisor_email=None,
                receptor_email=None,
            ))
示例#13
0
    def test_parse_dte_xml_fail_3(self) -> None:
        xml_doc = xml_utils.parse_untrusted_xml(self.dte_bad_xml_3_xml_bytes)

        with self.assertRaises(ValueError) as cm:
            parse_dte_xml(xml_doc)
        self.assertSequenceEqual(
            cm.exception.args,
            ("Top level XML element 'Document' is required.", ))
 def test_parse_untrusted_xml_valid(self) -> None:
     value = (b'<root>\n'
              b'   <element key="value">text</element>\n'
              b'   <element>text</element>tail\n'
              b'   <empty-element/>\n'
              b'</root>')
     xml = parse_untrusted_xml(value)
     self.assertIsInstance(xml, XmlElement)
     # print(xml)
     self.assertEqual(lxml.etree.tostring(xml, pretty_print=False), value)
示例#15
0
    def test_validate_dte_xml_fail_dte_3(self) -> None:
        file_bytes = self.dte_bad_xml_3_xml_bytes
        xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

        self.assertEqual(xml_doc.getroottree().getroot().tag, 'DTE')

        with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm:
            validate_dte_xml(xml_doc)
        self.assertSequenceEqual(cm.exception.args, (
            "Element 'DTE': No matching global declaration available for the validation root., "
            "line 2", ))
    def test_clean_dte_xml_ok_2(self) -> None:
        file_bytes = self.dte_bad_xml_2_xml_bytes
        xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

        self.assertEqual(
            xml_doc.getroottree().getroot().tag,
            'DTE')

        with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm:
            validate_dte_xml(xml_doc)
        self.assertSequenceEqual(
            cm.exception.args,
            ("Element 'DTE': No matching global declaration available for the validation root., "
             "line 2", )
        )

        xml_doc_cleaned, modified = clean_dte_xml(
            xml_doc,
            set_missing_xmlns=True,
            remove_doc_personalizado=True,
        )
        self.assertTrue(modified)

        # This will not raise.
        validate_dte_xml(xml_doc_cleaned)

        f = io.BytesIO()
        xml_utils.write_xml_doc(xml_doc_cleaned, f)
        file_bytes_rewritten = f.getvalue()
        del f

        xml_doc_rewritten = xml_utils.parse_untrusted_xml(file_bytes_rewritten)
        validate_dte_xml(xml_doc_rewritten)

        expected_file_bytes_diff = (
            b'--- \n',
            b'+++ \n',
            b'@@ -1,5 +1,5 @@\n',
            b'-<?xml version="1.0" encoding="ISO-8859-1"?>',
            b'-<DTE version="1.0">',
            b"+<?xml version='1.0' encoding='ISO-8859-1'?>",
            b'+<DTE xmlns="http://www.sii.cl/SiiDte" version="1.0">',
            b'   <!-- O Win32 Chrome 73 central VERSION: v20190227 -->',
            b' <Documento ID="MiPE76399752-6048">',
            b'     <Encabezado>',
            b'@@ -64,13 +64,13 @@\n',
            b'   </Documento>',
            b' <Signature xmlns="http://www.w3.org/2000/09/xmldsig#">',
            b' <SignedInfo>',
            b'-<CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315" />',  # noqa: E501
            b'-<SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#rsa-sha1" />',
            b'+<CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>',  # noqa: E501
            b'+<SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#rsa-sha1"/>',
            b' <Reference URI="#MiPE76399752-6048">',
            b' <Transforms>',
            b'-<Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315" />',
            b'+<Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>',
            b' </Transforms>',
            b'-<DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1" />',
            b'+<DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1"/>',
            b' <DigestValue>tk/D3mfO/KtdWyFXYZHe7dtYijg=</DigestValue>',
            b' </Reference>',
            b' </SignedInfo>',
        )

        file_bytes_diff_gen = difflib.diff_bytes(
            dfunc=difflib.unified_diff,
            a=file_bytes.splitlines(),
            b=file_bytes_rewritten.splitlines())
        self.assertSequenceEqual(
            [diff_line for diff_line in file_bytes_diff_gen],
            expected_file_bytes_diff
        )