def test_valid_us_itin_weak_match(self): num = '911701234' results = us_itin_recognizer.analyze(num, entities) assert len(results) == 1 assert_result_within_score_range(results[0], entities[0], 0, 9, 0.3, 0.4)
def test_valid_us_ssn_weak_match(): num = '078051120' results = us_ssn_recognizer.analyze(num, entities) assert len(results) == 1 assert results[0].score != 0 assert_result_within_score_range(results[0], entities[0], 0, 9, 0.3, 0.4)
def test_date_time_full_date(self): text = 'May 1st, 1977' results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range( results[0], entities[1], 0, 13, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_valid_us_passport_no_context(self): num = '912803456' results = us_passport_recognizer.analyze(num, entities) assert len(results) == 1 assert results[0].score != 0 assert_result_within_score_range(results[0], entities[0], 0, 9, 0, 0.1)
def test_person_title_and_last_name_is_also_a_date_with_context_expected_person_only(self): name = 'Mr. May' context = "They call me" text = '{} {}'.format(context, name) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range(results[0], entities[0], 17, 20, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_valid_us_itin_medium_match(self): num = '911-70-1234' results = us_itin_recognizer.analyze(num, entities) assert len(results) == 1 assert_result_within_score_range(results[0], entities[0], 0, 11, 0.5, 0.6)
def test_phone_number_strong_match_no_context(self): number = '(425) 882 9090' results = phone_recognizer.analyze(number, entities) assert len(results) == 1 assert results[0].score != 1 assert_result_within_score_range(results[0], entities[0], 0, 14, 0.7, EntityRecognizer.MAX_SCORE)
def test_valid_us_ssn_medium_match(): num = '078-05-1120' results = us_ssn_recognizer.analyze(num, entities) assert len(results) == 1 assert results[0].score != 0 assert_result_within_score_range(results[0], entities[0], 0, 11, 0.5, 0.6) assert 0.49 < results[0].score < 0.6
def test_valid_ipv4(self): ip = '192.168.0.1' context = 'microsoft.com ' results = ip_recognizer.analyze(context + ip, entities) assert len(results) == 1 assert_result_within_score_range( results[0], entities[0], 14, 25, 0.6, 0.81)
def test_date_time_day_in_month_with_year_with_context(self): date = 'May 1st, 1977' context = 'I bought my car on' text = '{} {}'.format(context, date) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range( results[0], entities[1], 19, 32, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_person_first_name_with_context(self): name = 'Dan' context = 'my name is' text = '{} {}'.format(context, name) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range( results[0], entities[0], 11, 14, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_person_full_name_with_context(self): name = 'John Oliver' context = ' is the funniest comedian' text = '{}{}'.format(name, context) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 assert_result_within_score_range( results[0], entities[0], 0, 11, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_valid_us_driver_license_weak_WA(self): num1 = 'AA1B2**9ABA7' num2 = 'A*1234AB*CD9' results = us_license_recognizer.analyze('{} {}'.format(num1, num2), entities) assert len(results) == 2 assert_result_within_score_range(results[0], entities[0], 0, 12, 0.3, 0.4) assert_result_within_score_range(results[1], entities[0], 13, 25, 0.3, 0.4)
def test_valid_us_ssn_very_weak_match(): num1 = '078-051120' num2 = '07805-1120' results = us_ssn_recognizer.analyze('{} {}'.format(num1, num2), entities) assert len(results) == 2 assert results[0].score != 0 assert_result_within_score_range(results[0], entities[0], 0, 10, 0, 0.3) assert results[0].score != 0 assert_result_within_score_range(results[1], entities[0], 11, 21, 0, 0.3)
def test_person_last_name_is_also_a_date_with_context_expected_person_only(self): name = 'Dan May' context = "has a bank account" text = '{} {}'.format(name, context) results = self.prepare_and_analyze(nlp_engine, text) assert len(results) == 1 print(results[0].score) print(results[0].entity_type) print(text[results[0].start:results[0].end]) assert_result_within_score_range( results[0], entities[0], 0, 7, NER_STRENGTH, EntityRecognizer.MAX_SCORE)
def test_valid_us_itin_very_weak_match(self): num1 = '911-701234' num2 = '91170-1234' results = us_itin_recognizer.analyze('{} {}'.format(num1, num2), entities) assert len(results) == 2 assert results[0].score != 0 assert_result_within_score_range(results[0], entities[0], 0, 10, 0, 0.3) assert results[1].score != 0 assert_result_within_score_range(results[1], entities[0], 11, 21, 0, 0.3)
def test_when_using_spacy_then_all_spacy_result_found( text, expected_len, expected_positions, entity_num, nlp_engine, nlp_recognizer, entities, ner_strength, max_score, ): results = prepare_and_analyze(nlp_engine, nlp_recognizer, text, entities) assert len(results) == expected_len entity_to_check = entities[entity_num] for res, (st_pos, fn_pos) in zip(results, expected_positions): assert_result_within_score_range(res, entity_to_check, st_pos, fn_pos, ner_strength, max_score)
def test_when_driver_licenes_in_text_then_all_us_driver_licenses_found( text, expected_len, expected_positions, expected_score_ranges, recognizer, entities, max_score, ): results = recognizer.analyze(text, entities) assert len(results) == expected_len for res, (st_pos, fn_pos), (st_score, fn_score) in zip(results, expected_positions, expected_score_ranges): if fn_score == "max": fn_score = max_score assert_result_within_score_range(res, entities[0], st_pos, fn_pos, st_score, fn_score)
def test_all_us_passports( text, expected_len, expected_positions, expected_score_ranges, recognizer, entities, max_score, ): results = recognizer.analyze(text, entities) assert len(results) == expected_len for res, (st_pos, fn_pos), (st_score, fn_score) in zip(results, expected_positions, expected_score_ranges): if fn_score == "max": fn_score = max_score assert_result_within_score_range(res, entities[0], st_pos, fn_pos, st_score, fn_score)
def test_when_snn_in_text_than_all_us_ssns_are_found( text, expected_len, expected_positions, expected_score_ranges, recognizer, entities, max_score, ): results = recognizer.analyze(text, entities) results = sorted(results, key=lambda x: x.start) assert len(results) == expected_len for res, (st_pos, fn_pos), (st_score, fn_score) in zip(results, expected_positions, expected_score_ranges): if fn_score == "max": fn_score = max_score assert_result_within_score_range(res, entities[0], st_pos, fn_pos, st_score, fn_score)
def test_assert_result_within_score_range_uses_given_range(): result = RecognizerResult(ENTITY_TYPE, 0, 10, 0.3) assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.2, 0.4) result = RecognizerResult(ENTITY_TYPE, 0, 10, 0.1) assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.05, 0.15) result = RecognizerResult(ENTITY_TYPE, 0, 10, 0.9) assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.89, 0.91)
def test_assert_result_within_score_range_uses_given_range_fails(): with pytest.raises(AssertionError): result = RecognizerResult(ENTITY_TYPE, 0, 10, 0.3) assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.4, 0.6) with pytest.raises(AssertionError): result = RecognizerResult(ENTITY_TYPE, 0, 10, 0) assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0.4, 0.6) with pytest.raises(AssertionError): result = RecognizerResult(ENTITY_TYPE, 0, 10, 1) assert_result_within_score_range(result, ENTITY_TYPE, 0, 10, 0, 0.5)