Exemplo n.º 1
0
    def test_complete_mobile_v0(self):
        """ Detection of phone number """

        test = "móvil: ESP 780000000"

        proximity_dict = {
            "MOBILE": {
                "left_span_len": 20,
                "right_span_len": 0,
                "word_list": ["movil"]
            }
        }

        ner = ner_regex.RegexNer(regexp_config_dict=proximity_dict)

        result = ner.regex_detection(test, full_text=test)

        self.assertTrue(
            "MOBILE" in result,
            "{} {} {}".format(self.shortDescription(), MSG_MOBILE_NOT_DETECTED,
                              result))

        self.assertEqual(
            clean_text(result["MOBILE"][0][0]), "780000000",
            MSG_DETECTED.format(self.shortDescription(),
                                MSG_MOBILE_WRONG_DETECTED,
                                result["MOBILE"][0]))
Exemplo n.º 2
0
    def test_is_not_iban_V0(self):
        """ Test IBAN is not detected """

        test = ("This is the IBAN of the account ES91 2100 0418 " +
                "4502 0005 1332 4576 .")
        ner = ner_regex.RegexNer()

        result = ner._detect_regexp(test, "strict")

        self.assertTrue(
            "FINANCIAL_DATA" in result,
            "{} {} {}".format(self.shortDescription(), MSG_IBAN_NOT_DETECTED,
                              result))
        idx = -1
        for i, _regexp in enumerate(result["FINANCIAL_DATA"]):
            if _regexp[1] == "BROAD_REG_IBAN_APPROX_V1":
                idx = i
                break

        self.assertEqual(
            clean_text(result["FINANCIAL_DATA"][idx][0]),
            "ES9121000418450200051332",
            MSG_DETECTED.format(self.shortDescription(),
                                MSG_IBAN_WRONG_DETECTED,
                                result["FINANCIAL_DATA"][idx]))
Exemplo n.º 3
0
    def test_complete_phone_number(self):
        """ Detection of phone number """

        test = "tel.: ESP 980000007."

        proximity_dict = {
            "PHONE": {
                "left_span_len": 20,
                "right_span_len": 0,
                "word_list": ["tel."]
            }
        }

        ner = ner_regex.RegexNer(regexp_config_dict=proximity_dict)
        result = ner.regex_detection(test, full_text=test)

        self.assertTrue(
            "PHONE" in result,
            "{} {} {}".format(self.shortDescription(), MSG_PHONE_NOT_DETECTED,
                              result))

        self.assertEqual(
            clean_text(result["PHONE"][0][0]), "980000007",
            MSG_DETECTED.format(self.shortDescription(),
                                MSG_PHONE_WRONG_DETECTED, result["PHONE"][0]))
Exemplo n.º 4
0
    def test_complete_iban_V1(self):
        """ Test the detection of the IBAN account """

        test = "This is the IBAN of the account ES91 2100 4334471600021142P ."

        proximity_dict = {
            "FINANCIAL_DATA": {
                "left_span_len": 20,
                "right_span_len": 0,
                "word_list": ["iban"]
            }
        }

        ner = ner_regex.RegexNer(regexp_config_dict=proximity_dict)
        result = ner.regex_detection(test, full_text=test)

        self.assertTrue(
            "FINANCIAL_DATA" in result,
            "{} {} {}".format(self.shortDescription(), MSG_IBAN_NOT_DETECTED,
                              result))

        idx = -1
        for i, _regexp in enumerate(result["FINANCIAL_DATA"]):
            if _regexp[1] == "BROAD_REG_IBAN_APPROX_V1":
                idx = i
                break

        self.assertEqual(
            clean_text(result["FINANCIAL_DATA"][idx][0]),
            "ES9121004334471600021142",
            MSG_DETECTED.format(self.shortDescription(),
                                MSG_IBAN_WRONG_DETECTED,
                                result["FINANCIAL_DATA"][idx]))
Exemplo n.º 5
0
    def test_3_broad_phone_number_v0(self):
        """ Test the detection of a wrong phone number """

        test = "Mi teléfono es 45 988 888 888"
        ner = ner_regex.RegexNer()

        result = ner._detect_regexp(test, "broad")

        self.assertTrue(
            "PHONE" in result, "{} {} {}".format(self.shortDescription(),
                                                 MSG_PHONE_DETECTED, result))

        for i, _regexp in enumerate(result["PHONE"]):
            if _regexp[1] == "BROAD_REG_PHONE_NUMBER_GEN_V3":
                idx = i
                break

        self.assertEqual(
            clean_text(result["PHONE"][idx][0].strip()), "988888888",
            MSG_EXTRACTED.format(self.shortDescription(),
                                 MSG_PHONE_NOT_DETECTED, result["PHONE"][idx]))
Exemplo n.º 6
0
    def test_0_broad_phone_number_v0(self):
        """ Test the detection of a phone number """

        test = "Mi teléfono es 988 888 888 "
        ner = ner_regex.RegexNer()

        result = ner._detect_regexp(test, "broad")

        self.assertTrue(
            "PHONE" in result,
            "{} {} {}".format(self.shortDescription(), MSG_PHONE_NOT_DETECTED,
                              result))

        idx = -1
        for i, _regexp in enumerate(result["PHONE"]):
            if _regexp[1] == "BROAD_REG_PHONE_NUMBER_APPROX_V3":
                idx = i
                break

        self.assertEqual(
            clean_text(result["PHONE"][idx][0].strip()), "988888888",
            "{self.shortDescription()} {MSG_PHONE_NOT_DETECTED}. Extracted {result['PHONE'][idx]}"
        )
Exemplo n.º 7
0
    def test_0_CP_MOBILE_NUMBER_V0(self):
        """ Test the detection of a phone number """

        test = "Mi teléfono móvil es 688 888 888 "
        ner = ner_regex.RegexNer()

        result = ner._detect_regexp(test, "broad")

        self.assertTrue(
            "MOBILE" in result,
            "{} {} {}".format(self.shortDescription(), MSG_MOBILE_NOT_DETECTED,
                              result))

        for i, _regexp in enumerate(result["MOBILE"]):
            if _regexp[1] == "BROAD_REG_MOBILE_NUMBER_GEN_V3":
                idx = i
                break

        self.assertEqual(
            clean_text(result["MOBILE"][idx][0].strip()), "688888888",
            MSG_EXTRACTED.format(self.shortDescription(),
                                 MSG_MOBILE_NOT_DETECTED,
                                 result["MOBILE"][idx]))
Exemplo n.º 8
0
    def test_broad_phone_number_v6(self):
        """ Detection of phone number """

        test = "teléfono: ESP 980000001A"

        ner = ner_regex.RegexNer()
        result = ner._detect_regexp(test, "broad")

        self.assertTrue(
            "PHONE" in result,
            "{} {} {}".format(self.shortDescription(), MSG_PHONE_NOT_DETECTED,
                              result))

        idx = -1
        for i, _regexp in enumerate(result["PHONE"]):
            if _regexp[1] == "BROAD_REG_PHONE_NUMBER_GEN_V3":
                idx = i
                break

        self.assertEqual(
            clean_text(result["PHONE"][idx][0]), "980000001",
            MSG_DETECTED.format(self.shortDescription(),
                                MSG_PHONE_WRONG_DETECTED,
                                result["PHONE"][idx]))
Exemplo n.º 9
0
    def test_strict_dni_v2(self):
        """ Detection of DNI"""

        test = "15373458B"

        ner = ner_regex.RegexNer()
        result = ner._detect_regexp(test, "strict")

        self.assertTrue(
            "ID_DOCUMENT" in result,
            "{} {} {}".format(self.shortDescription(),
                              MSG_ID_DOCUMENT_NOT_DETECTED, result))

        idx = -1
        for i, _regexp in enumerate(result["ID_DOCUMENT"]):
            if _regexp[1] == "STRICT_REG_DNI_V0":
                idx = i
                break

        self.assertEqual(
            clean_text(result["ID_DOCUMENT"][idx][0]), "15373458B",
            MSG_DETECTED.format(self.shortDescription(),
                                MSG_DNI_WRONG_DETECTED,
                                result["ID_DOCUMENT"][idx]))
Exemplo n.º 10
0
    def test_dni_with_dash(self):
        """ Detection of DNI v0 rule with letter separated by dash """

        test = "el dni de Juan es 66666666-Y."

        ner = ner_regex.RegexNer()
        result = ner._detect_regexp(test, "strict")

        self.assertTrue(
            "ID_DOCUMENT" in result,
            "{} {} {}".format(self.shortDescription(),
                              MSG_ID_DOCUMENT_NOT_DETECTED, result))

        idx = -1
        for i, _regexp in enumerate(result["ID_DOCUMENT"]):
            if _regexp[1] == "STRICT_REG_DNI_V0":
                idx = i
                break

        self.assertEqual(
            clean_text(result["ID_DOCUMENT"][idx][0]), "66666666Y",
            MSG_DETECTED.format(self.shortDescription(),
                                MSG_DNI_WRONG_DETECTED,
                                result["ID_DOCUMENT"][idx]))