def test(self): self.initialize() person_id = 99 person = models.Person(id=person_id) person.save() section1 = self.create_section(1, "Иванов Иван Иванович", person=person) section2 = self.create_section(2, "Иванов И. И.", person=person) section3 = self.create_section(3, "Петров И. И.") permalinks_folder = os.path.dirname(__file__) db = TPermaLinksPerson(permalinks_folder) db.create_db() db.save_dataset(setup_logging()) db.recreate_auto_increment_table() db.close_db() section1.person = None section1.save() section2.person = None section2.save() person.delete() run_dedupe = RunDedupe(None, None) run_dedupe.handle(None, permalinks_folder=permalinks_folder, write_to_db=True, fake_dedupe=True, surname_bounds=',', take_sections_with_empty_income=True, rebuild=True) self.assertEqual(models.Person.objects.count(), 1) sec1 = models.Section.objects.get(id=1) self.assertEqual(sec1.person_id, person_id) sec2 = models.Section.objects.get(id=2) self.assertEqual(sec2.person_id, person_id) sec3 = models.Section.objects.get(id=3) self.assertEqual(sec3.person_id, person_id)
def test(self): logger = setup_logging(logger_name="test_real_dedupe") sql_script = os.path.join( os.path.dirname(__file__), "disclosures.sql.person_id_5295.n") run_sql_script(logger, sql_script) permalinks_folder = os.path.dirname(__file__) db = TPermaLinksPerson(permalinks_folder) db.create_db() db.save_dataset(setup_logging()) db.recreate_auto_increment_table() db.close_db() model_path = os.path.join(os.path.dirname(__file__), "../../../deduplicate/model/random_forest.pickle" ) run_dedupe = RunDedupe(None, None) run_dedupe.handle(None, permalinks_folder=permalinks_folder, write_to_db=True, surname_bounds=',', model_file=model_path, threshold=0.6 ) person_id = 5295 self.assertEqual(models.Person.objects.count(), 3) person = models.Person.objects.get(id=person_id) self.assertIsNotNone(person) self.assertEqual(5295, person.declarator_person_id) canon_sections = [ (451721, 5295, True), (452066, 5295, True), (452420, 5295, True), (453686, 5295, False), (455039, 5295, False), (1801614, 5296, True), (5105303, 5295, True), (6437989, 5297, True), (6672563, 5297, True), (6674154, 5297, True), (6773981, 5297, True), ] sections = [] for s in models.Section.objects.all(): sections.append ((s.id, s.person_id, s.dedupe_score is not None)) self.assertListEqual(canon_sections, sections)
def test(self): self.initialize() person_id = 2 declarator_person_id = 1111 person = models.Person(id=person_id, declarator_person_id=declarator_person_id, person_name="Иванов Иван Иванович") person.save() self.create_section(1, "Иванов Иван Иванович", person) self.create_section(2, "Иванов И. И.") permalinks_folder = os.path.dirname(__file__) db = TPermaLinksPerson(permalinks_folder) db.create_db() db.save_dataset(setup_logging()) #db.save_max_plus_one_primary_key(3) db.recreate_auto_increment_table() db.close_db() run_dedupe = RunDedupe(None, None) run_dedupe.handle(None, permalinks_folder=permalinks_folder, write_to_db=True, fake_dedupe=True, surname_bounds=',', take_sections_with_empty_income=True, rebuild=True) self.assertEqual(models.Person.objects.count(), 1) person = models.Person.objects.get(id=person_id) self.assertIsNotNone(person) self.assertEqual(declarator_person_id, person.declarator_person_id) sec1 = models.Section.objects.get(id=1) self.assertEqual(sec1.person_id, person.id) sec2 = models.Section.objects.get(id=2) self.assertEqual(sec2.person_id, person.id)