def test(self): logger = setup_logging(logger_name="test_real_dedupe") models.Section.objects.all().delete() permalinks_folder = os.path.dirname(__file__) db = TPermaLinksPerson(permalinks_folder) db.open_db_read_only() db.recreate_auto_increment_table() db.close_db() model_path = os.path.join( os.path.dirname(__file__), "../../../deduplicate/model/random_forest.pickle") dedupe_objects = os.path.join(os.path.dirname(__file__), "dedupe_objects.dump") run_dedupe = RunDedupe(None, None) run_dedupe.handle(None, permalinks_folder=permalinks_folder, input_dedupe_objects=dedupe_objects, model_file=model_path, threshold=0.6, recreate_db=True, surname_bounds=',', write_to_db=True) sec = models.Section.objects.get(id=757036) self.assertEqual(1406125, sec.person_id)
def test(self): self.create_test_db() person_id = 1 person = models.Person(id=person_id, person_name=self.fio) self.assertIsNone(person.declarator_person_id) person.save() section1 = models.Section.objects.get(id=self.section_id1) section1.person = person section1.save() TPermaLinksPerson(CopyPersonIdTestCaseBase.permalinks_folder ).create_and_save_empty_db() CreatePermalinksStorageCommand(None, None).handle( None, directory=CopyPersonIdTestCaseBase.permalinks_folder) permalinks_db = TPermaLinksPerson( CopyPersonIdTestCaseBase.permalinks_folder) permalinks_db.open_db_read_only() permalinks_db.recreate_auto_increment_table() self.run_copy_person_id(False, False) self.assertEqual(models.Person.objects.count(), 1) section1 = models.Section.objects.get(id=self.section_id1) self.assertEqual(section1.person.declarator_person_id, self.declarator_person_id) self.assertEqual(section1.person.id, person_id)
def test(self): TPermaLinksPerson(CopyPersonIdTestCaseBase.permalinks_folder ).create_and_save_empty_db() self.run_copy_person_id(False, False) # check that we reuse old person ids CreatePermalinksStorageCommand(None, None).handle( None, directory=CopyPersonIdTestCaseBase.permalinks_folder) permalinks_db = TPermaLinksPerson( CopyPersonIdTestCaseBase.permalinks_folder) permalinks_db.open_db_read_only() permalinks_db.recreate_auto_increment_table() self.run_copy_person_id(False, False) self.assertEqual(permalinks_db.get_last_inserted_id_for_testing(), None)
def test(self): self.initialize() person_id = 99 person = models.Person(id=person_id) person.save() section1 = self.create_section(1, "Иванов Иван Иванович", person=person) section2 = self.create_section(2, "Иванов И. И.", person=person) section3 = self.create_section(3, "Петров И. И.") permalinks_folder = os.path.dirname(__file__) db = TPermaLinksPerson(permalinks_folder) db.create_db() db.save_dataset(setup_logging()) db.recreate_auto_increment_table() db.close_db() section1.person = None section1.save() section2.person = None section2.save() person.delete() run_dedupe = RunDedupe(None, None) run_dedupe.handle(None, permalinks_folder=permalinks_folder, write_to_db=True, fake_dedupe=True, surname_bounds=',', take_sections_with_empty_income=True, rebuild=True) self.assertEqual(models.Person.objects.count(), 1) sec1 = models.Section.objects.get(id=1) self.assertEqual(sec1.person_id, person_id) sec2 = models.Section.objects.get(id=2) self.assertEqual(sec2.person_id, person_id) sec3 = models.Section.objects.get(id=3) self.assertEqual(sec3.person_id, person_id)
def test(self): logger = setup_logging(logger_name="test_real_dedupe") sql_script = os.path.join( os.path.dirname(__file__), "disclosures.sql.person_id_5295.n") run_sql_script(logger, sql_script) permalinks_folder = os.path.dirname(__file__) db = TPermaLinksPerson(permalinks_folder) db.create_db() db.save_dataset(setup_logging()) db.recreate_auto_increment_table() db.close_db() model_path = os.path.join(os.path.dirname(__file__), "../../../deduplicate/model/random_forest.pickle" ) run_dedupe = RunDedupe(None, None) run_dedupe.handle(None, permalinks_folder=permalinks_folder, write_to_db=True, surname_bounds=',', model_file=model_path, threshold=0.6 ) person_id = 5295 self.assertEqual(models.Person.objects.count(), 3) person = models.Person.objects.get(id=person_id) self.assertIsNotNone(person) self.assertEqual(5295, person.declarator_person_id) canon_sections = [ (451721, 5295, True), (452066, 5295, True), (452420, 5295, True), (453686, 5295, False), (455039, 5295, False), (1801614, 5296, True), (5105303, 5295, True), (6437989, 5297, True), (6672563, 5297, True), (6674154, 5297, True), (6773981, 5297, True), ] sections = [] for s in models.Section.objects.all(): sections.append ((s.id, s.person_id, s.dedupe_score is not None)) self.assertListEqual(canon_sections, sections)
def test(self): self.initialize() person_id = 2 declarator_person_id = 1111 person = models.Person(id=person_id, declarator_person_id=declarator_person_id, person_name="Иванов Иван Иванович") person.save() self.create_section(1, "Иванов Иван Иванович", person) self.create_section(2, "Иванов И. И.") permalinks_folder = os.path.dirname(__file__) db = TPermaLinksPerson(permalinks_folder) db.create_db() db.save_dataset(setup_logging()) #db.save_max_plus_one_primary_key(3) db.recreate_auto_increment_table() db.close_db() run_dedupe = RunDedupe(None, None) run_dedupe.handle(None, permalinks_folder=permalinks_folder, write_to_db=True, fake_dedupe=True, surname_bounds=',', take_sections_with_empty_income=True, rebuild=True) self.assertEqual(models.Person.objects.count(), 1) person = models.Person.objects.get(id=person_id) self.assertIsNotNone(person) self.assertEqual(declarator_person_id, person.declarator_person_id) sec1 = models.Section.objects.get(id=1) self.assertEqual(sec1.person_id, person.id) sec2 = models.Section.objects.get(id=2) self.assertEqual(sec2.person_id, person.id)
def test(self): TPermaLinksPerson(CopyPersonIdTestCaseBase.permalinks_folder ).create_and_save_empty_db() self.run_copy_person_id(False, False) # check that we reuse old person ids CreatePermalinksStorageCommand(None, None).handle( None, directory=CopyPersonIdTestCaseBase.permalinks_folder) permalinks_db = TPermaLinksPerson( CopyPersonIdTestCaseBase.permalinks_folder) permalinks_db.open_db_read_only() permalinks_db.recreate_auto_increment_table() new_declarator_person_id = self.declarator_person_id + 1 self.run_copy_person_id(False, False, declarator_person_id=new_declarator_person_id) self.assertEqual(models.Person.objects.count(), 1) section1 = models.Section.objects.get(id=self.section_id1) self.assertEqual(section1.person.declarator_person_id, new_declarator_person_id) self.assertEqual(section1.person.id, 1)