Exemplo n.º 1
0
    def test_valid_jcb_credit_card(self):
        number = '3528000700000000'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 16,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 2
0
    def test_valid_email_no_context(self):
        email = '*****@*****.**'
        results = email_recognizer.analyze(email, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 18,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 3
0
    def test_valid_cartebleue_credit_card(self):
        number = '5555555555554444'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 16,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 4
0
    def test_valid_maestro_credit_card(self):
        number = '6759649826438453'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 16,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 5
0
    def test_valid_diners_credit_card(self):
        number = '30569309025904'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 14,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 6
0
    def test_valid_btc(self):
        wallet = '16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ'
        results = crypto_recognizer.analyze(wallet, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 34,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 7
0
    def test_valid_discover_credit_card(self):
        number = '6011000400000000'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 16,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 8
0
    def test_person_title_and_last_name_is_also_a_date_expected_person_only(
            self):
        text = 'Mr. May'
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 1
        assert_result(results[0], entities[0], 4, 7, NER_STRENGTH)
Exemplo n.º 9
0
    def test_valid_visa_electron_credit_card(self):
        number = '4917300800000000'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 16,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 10
0
    def test_valid_airplus_credit_card(self):
        number = '122000000000003'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 15,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 11
0
    def test_valid_amex_credit_card(self):
        number = '371449635398431'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 15,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 12
0
    def test_added_pattern_recognizer_works(self):
        pattern = Pattern("rocket pattern", r'\W*(rocket)\W*', 0.8)
        pattern_recognizer = PatternRecognizer("ROCKET",
                                               name="Rocket recognizer",
                                               patterns=[pattern])

        # Make sure the analyzer doesn't get this entity
        recognizers_store_api_mock = RecognizerStoreApiMock()
        analyze_engine = AnalyzerEngine(
            registry=MockRecognizerRegistry(recognizers_store_api_mock),
            nlp_engine=MockNlpEngine())
        text = "rocket is my favorite transportation"
        entities = ["CREDIT_CARD", "ROCKET"]

        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)

        assert len(results) == 0

        # Add a new recognizer for the word "rocket" (case insensitive)
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer)

        # Check that the entity is recognized:
        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)

        assert len(results) == 1
        assert_result(results[0], "ROCKET", 0, 7, 0.8)
Exemplo n.º 13
0
    def test_valid_domain(self):
        domain = 'microsoft.com'
        results = domain_recognizer.analyze(domain, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 13,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 14
0
    def test_valid_dankort_credit_card(self):
        number = '5019717010103742'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 16,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 15
0
    def test_valid_email_with_context(self):
        email = '*****@*****.**'
        results = email_recognizer.analyze('my email is {}'.format(email),
                                           entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 12, 30,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 16
0
    def test_valid_airplus_credit_card_with_extact_context(self):
        number = '122000000000003'
        context = 'my credit card: '
        results = credit_card_recognizer.analyze(context + number, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 16, 31,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 17
0
    def test_valid_visa_purchasing_credit_card(self):
        number = '4484070000000000'
        results = credit_card_recognizer.analyze(number, entities)

        assert len(results) == 1
        assert results[0].score == 1.0
        assert_result(results[0], entities[0], 0, 16,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 18
0
    def test_valid_btc_with_exact_context(self):
        wallet = '16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ'
        context = 'my wallet address is: '
        results = crypto_recognizer.analyze(context + wallet, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 22, 56,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 19
0
def _assertion_coach_mentions_glob(global_ID, agg_rost_ment_file, roster_file,
                                   mgmt_list, sent_dict, result, team):
    """ Function to test assertions for coach_mentions_glob(). """
    assertions.assert_global_ID(global_ID)
    assertions.assert_agg_roster_ment_file_format(agg_rost_ment_file,
                                                  roster_file)
    assertions.assert_roster_file_format(roster_file)
    assertions.assert_str_list(mgmt_list)
    assertions.assert_result(result)
    assertions.assert_team(team)
Exemplo n.º 20
0
    def test_valid_domains_lemma_text(self):
        domain1 = 'microsoft.com'
        domain2 = 'google.co.il'
        results = domain_recognizer.analyze(
            'my domains: {} {}'.format(domain1, domain2), entities)

        assert len(results) == 2
        assert_result(results[0], entities[0], 12, 25,
                      EntityRecognizer.MAX_SCORE)
        assert_result(results[1], entities[0], 26, 38,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 21
0
    def test_analyze_with_predefined_recognizers_return_results(self):
        text = " Credit card: 4095-2609-9393-4932,  my phone is 425 8829090"
        language = "en"
        entities = ["CREDIT_CARD"]
        results = self.loaded_analyzer_engine.analyze(text,
                                                      entities,
                                                      language,
                                                      all_fields=False)

        assert len(results) == 1
        assert_result(results[0], "CREDIT_CARD", 14, 33,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 22
0
    def test_multiple_emails_with_lemma_context(self):
        email1 = '*****@*****.**'
        email2 = '*****@*****.**'
        results = email_recognizer.analyze(
            'try one of this emails: {} or {}'.format(email1, email2),
            entities)

        assert len(results) == 2
        assert_result(results[0], entities[0], 24, 42,
                      EntityRecognizer.MAX_SCORE)
        assert_result(results[1], entities[0], 46, 71,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 23
0
    def test_black_list_keywords_found(self):
        test_recognizer = MockRecognizer(patterns=[],
                                         entity="ENTITY_1",
                                         black_list=["phone", "name"],
                                         context=None,
                                         name=None)

        results = test_recognizer.analyze(
            "my phone number is 555-1234, and my name is John", ["ENTITY_1"])

        assert len(results) == 2
        assert_result(results[0], "ENTITY_1", 3, 8, 1.0)
        assert_result(results[1], "ENTITY_1", 36, 40, 1.0)
Exemplo n.º 24
0
    def test_removed_pattern_recognizer_doesnt_work(self):
        pattern = Pattern("spaceship pattern", r'\W*(spaceship)\W*', 0.8)
        pattern_recognizer = PatternRecognizer("SPACESHIP",
                                               name="Spaceship recognizer",
                                               patterns=[pattern])

        # Make sure the analyzer doesn't get this entity
        recognizers_store_api_mock = RecognizerStoreApiMock()
        analyze_engine = AnalyzerEngine(
            registry=MockRecognizerRegistry(recognizers_store_api_mock),
            nlp_engine=MockNlpEngine())
        text = "spaceship is my favorite transportation"
        entities = ["CREDIT_CARD", "SPACESHIP"]

        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)

        assert len(results) == 0

        # Add a new recognizer for the word "rocket" (case insensitive)
        recognizers_store_api_mock.add_custom_pattern_recognizer(
            pattern_recognizer)
        # Check that the entity is recognized:
        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)
        assert len(results) == 1
        assert_result(results[0], "SPACESHIP", 0, 10, 0.8)

        # Remove recognizer
        recognizers_store_api_mock.remove_recognizer("Spaceship recognizer")
        # Test again to see we didn't get any results
        results = analyze_engine.analyze(self.unit_test_guid,
                                         text=text,
                                         entities=entities,
                                         language='en',
                                         all_fields=False)

        assert len(results) == 0
Exemplo n.º 25
0
    def test_analyze_with_multiple_predefined_recognizers(self):
        text = " Credit card: 4095-2609-9393-4932,  my phone is 425 8829090"
        language = "en"
        entities = ["CREDIT_CARD", "PHONE_NUMBER"]

        # This analyzer engine is different from the global one, as this one
        # also loads SpaCy so it can detect the phone number entity
        analyzer_engine_with_spacy = AnalyzerEngine(self.loaded_registry)
        results = analyzer_engine_with_spacy.analyze(text,
                                                     entities,
                                                     language,
                                                     all_fields=False)

        assert len(results) == 2
        assert_result(results[0], "CREDIT_CARD", 14, 33,
                      EntityRecognizer.MAX_SCORE)
        expected_score = UsPhoneRecognizer.MEDIUM_REGEX_SCORE + \
            PatternRecognizer.CONTEXT_SIMILARITY_FACTOR  # 0.5 + 0.35 = 0.85
        assert_result(results[1], "PHONE_NUMBER", 48, 59, expected_score)
Exemplo n.º 26
0
    def test_person_full_name_complex(self):
        text = 'Richard (Ric) C. Henderson'
        results = self.prepare_and_analyze(nlp_engine, text)

        assert len(results) == 3
        # Richard
        assert text[results[0].start:results[0].end] == "Richard"
        assert_result(results[0], entities[0], 0, 7, NER_STRENGTH)
        # Ric
        assert text[results[1].start:results[1].end] == "Ric"
        assert_result(results[1], entities[0], 9, 12, NER_STRENGTH)
        # C. Henderson
        assert text[results[2].start:results[2].end] == "C. Henderson"
        assert_result(results[2], entities[0], 14, 26, NER_STRENGTH)
Exemplo n.º 27
0
    def test_valid_credit_cards(self):
        # init
        number1 = '4012888888881881'
        number2 = '4012-8888-8888-1881'
        number3 = '4012 8888 8888 1881'

        results = credit_card_recognizer.analyze(
            '{} {} {}'.format(number1, number2, number3), entities)

        assert len(results) == 3
        assert_result(results[0], entities[0], 0, 16,
                      EntityRecognizer.MAX_SCORE)
        assert_result(results[1], entities[0], 17, 36,
                      EntityRecognizer.MAX_SCORE)
        assert_result(results[2], entities[0], 37, 56,
                      EntityRecognizer.MAX_SCORE)
Exemplo n.º 28
0
    def test_us_bank_account_no_context(self):
        num = '945456787654'
        results = us_bank_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 12, 0.05)
Exemplo n.º 29
0
    def test_valid_uk_nhs_with_no_delimeters(self):
        num = '0032698674'
        results = nhs_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 10, 1.0)
Exemplo n.º 30
0
    def test_valid_uk_nhs_with_spaces(self):
        num = '221 395 1837'
        results = nhs_recognizer.analyze(num, entities)

        assert len(results) == 1
        assert_result(results[0], entities[0], 0, 12, 1.0)