def test_valid_btc_with_exact_context(self): wallet = '16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ' context = 'my wallet address is: ' results = crypto_recognizer.analyze(context + wallet, entities) assert len(results) == 1 assert_result(results[0], entities[0], 22, 56, EntityRecognizer.MAX_SCORE)
def test_valid_visa_electron_credit_card(self): number = '4917300800000000' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_person_title_and_last_name_is_also_a_date_expected_person_only( self): text = 'Mr. May' results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result(results[0], entities[0], 4, 7, NER_STRENGTH)
def test_valid_maestro_credit_card(self): number = '6759649826438453' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_when_all_ibans_then_succeed(iban, expected_len, expected_res, recognizer, entities, max_score): results = recognizer.analyze(iban, entities) assert len(results) == expected_len for res, (start, end) in zip(results, expected_res): assert_result(res, entities[0], start, end, max_score)
def test_added_pattern_recognizer_works(self): pattern = Pattern("rocket pattern", r'\W*(rocket)\W*', 0.8) pattern_recognizer = PatternRecognizer("ROCKET", name="Rocket recognizer", patterns=[pattern]) # Make sure the analyzer doesn't get this entity recognizers_store_api_mock = RecognizerStoreApiMock() analyze_engine = AnalyzerEngine( registry=MockRecognizerRegistry(recognizers_store_api_mock), nlp_engine=MockNlpEngine()) text = "rocket is my favorite transportation" entities = ["CREDIT_CARD", "ROCKET"] results = analyze_engine.analyze(self.unit_test_guid, text=text, entities=entities, language='en', all_fields=False) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze(self.unit_test_guid, text=text, entities=entities, language='en', all_fields=False) assert len(results) == 1 assert_result(results[0], "ROCKET", 0, 7, 0.8)
def test_valid_btc(self): wallet = '16Yeky6GMjeNkAiNcBY7ZhrLoMSgg1BoyZ' results = crypto_recognizer.analyze(wallet, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 34, EntityRecognizer.MAX_SCORE)
def test_valid_email_no_context(self): email = '*****@*****.**' results = email_recognizer.analyze(email, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 18, EntityRecognizer.MAX_SCORE)
def test_valid_discover_credit_card(self): number = '6011000400000000' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_cartebleue_credit_card(self): number = '5555555555554444' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_amex_credit_card(self): number = '371449635398431' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 15, EntityRecognizer.MAX_SCORE)
def test_valid_domain(self): domain = 'microsoft.com' results = domain_recognizer.analyze(domain, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 13, EntityRecognizer.MAX_SCORE)
def test_valid_airplus_credit_card(self): number = '122000000000003' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 15, EntityRecognizer.MAX_SCORE)
def test_valid_diners_credit_card(self): number = '30569309025904' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 14, EntityRecognizer.MAX_SCORE)
def test_when_all_cryptos_then_succeed( text, expected_len, expected_positions, recognizer, entities, max_score ): results = recognizer.analyze(text, entities) assert len(results) == expected_len for res, (st_pos, fn_pos) in zip(results, expected_positions): assert_result(res, entities[0], st_pos, fn_pos, max_score)
def test_valid_dankort_credit_card(self): number = '5019717010103742' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_jcb_credit_card(self): number = '3528000700000000' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_valid_airplus_credit_card_with_extact_context(self): number = '122000000000003' context = 'my credit card: ' results = credit_card_recognizer.analyze(context + number, entities) assert len(results) == 1 assert_result(results[0], entities[0], 16, 31, EntityRecognizer.MAX_SCORE)
def test_valid_domains_lemma_text(self): domain1 = 'microsoft.com' domain2 = 'google.co.il' results = domain_recognizer.analyze('my domains: {} {}'.format(domain1, domain2), entities) assert len(results) == 2 assert_result(results[0], entities[0], 12, 25, EntityRecognizer.MAX_SCORE) assert_result(results[1], entities[0], 26, 38, EntityRecognizer.MAX_SCORE)
def test_when_nhs_in_text_then_all_uk_nhses_found(text, expected_len, expected_positions, recognizer, entities, max_score): results = recognizer.analyze(text, entities) assert len(results) == expected_len for res, (st_pos, fn_pos) in zip(results, expected_positions): assert_result(res, entities[0], st_pos, fn_pos, max_score)
def test_valid_email_with_context(self): email = '*****@*****.**' results = email_recognizer.analyze('my email is {}'.format(email), entities) assert len(results) == 1 assert_result(results[0], entities[0], 12, 30, EntityRecognizer.MAX_SCORE)
def test_valid_visa_purchasing_credit_card(self): number = '4484070000000000' results = credit_card_recognizer.analyze(number, entities) assert len(results) == 1 assert results[0].score == 1.0 assert_result(results[0], entities[0], 0, 16, EntityRecognizer.MAX_SCORE)
def test_when_aba_routing_numbers_then_succeed(text, expected_len, expected_positions, expected_score, recognizer, entities): results = recognizer.analyze(text, entities) assert len(results) == expected_len for res, (st_pos, fn_pos) in zip(results, expected_positions): assert_result(res, entities[0], st_pos, fn_pos, expected_score)
def test_black_list_keywords_found(self): test_recognizer = MockRecognizer(patterns=[], entity="ENTITY_1", black_list=["phone", "name"], context=None, name=None) results = test_recognizer.analyze("my phone number is 555-1234, and my name is John", ["ENTITY_1"]) assert len(results) == 2 assert_result(results[0], "ENTITY_1", 3, 8, 1.0) assert_result(results[1], "ENTITY_1", 36, 40, 1.0)
def test_analyze_with_predefined_recognizers_return_results(self): text = " Credit card: 4095-2609-9393-4932, my phone is 425 8829090" language = "en" entities = ["CREDIT_CARD"] results = self.loaded_analyzer_engine.analyze( self.unit_test_guid, text, entities, language, all_fields=False) assert len(results) == 1 assert_result(results[0], "CREDIT_CARD", 14, 33, EntityRecognizer.MAX_SCORE)
def test_multiple_emails_with_lemma_context(self): email1 = '*****@*****.**' email2 = '*****@*****.**' results = email_recognizer.analyze( 'try one of this emails: {} or {}'.format(email1, email2), entities) assert len(results) == 2 assert_result(results[0], entities[0], 24, 42, EntityRecognizer.MAX_SCORE) assert_result(results[1], entities[0], 46, 71, EntityRecognizer.MAX_SCORE)
def test_create_account(): print(">>> Testing account creation") data = {} session = requests.Session() response = session.post(url + "/create-account", data) assert_result(response, False) data["username"] = "******" response = session.post(url + "/create-account", data) assert_result(response, True)
def test_all_sg_fins( text, expected_len, expected_positions, expected_scores, recognizer, entities, ): results = recognizer.analyze(text, entities) assert len(results) == expected_len for res, score, (st_pos, fn_pos) in zip(results, expected_scores, expected_positions): assert_result(res, entities[0], st_pos, fn_pos, score)
def test_removed_pattern_recognizer_doesnt_work(unit_test_guid): pattern = Pattern("spaceship pattern", r"\W*(spaceship)\W*", 0.8) pattern_recognizer = PatternRecognizer("SPACESHIP", name="Spaceship recognizer", patterns=[pattern]) # Make sure the analyzer doesn't get this entity recognizers_store_api_mock = RecognizerStoreApiMock() analyze_engine = AnalyzerEngine( registry=MockRecognizerRegistry(recognizers_store_api_mock), nlp_engine=NlpEngineMock(), ) text = "spaceship is my favorite transportation" entities = ["CREDIT_CARD", "SPACESHIP"] results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False, ) assert len(results) == 0 # Add a new recognizer for the word "rocket" (case insensitive) recognizers_store_api_mock.add_custom_pattern_recognizer( pattern_recognizer) # Check that the entity is recognized: results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False, ) assert len(results) == 1 assert_result(results[0], "SPACESHIP", 0, 10, 0.8) # Remove recognizer recognizers_store_api_mock.remove_recognizer("Spaceship recognizer") # Test again to see we didn't get any results results = analyze_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language="en", all_fields=False, ) assert len(results) == 0
def test_when_analyze_with_predefined_recognizers_then_return_results( loaded_analyzer_engine, unit_test_guid, max_score): text = " Credit card: 4095-2609-9393-4932, my phone is 425 8829090" language = "en" entities = ["CREDIT_CARD"] results = loaded_analyzer_engine.analyze( correlation_id=unit_test_guid, text=text, entities=entities, language=language, ) assert len(results) == 1 assert_result(results[0], "CREDIT_CARD", 14, 33, max_score)