def create_test_db(self): models.Section.objects.all().delete() models.Declarator_File_Reference.objects.all().delete() models.Source_Document.objects.all().delete() models.Person.objects.all().delete() self.assertGreater(models.Office.objects.count(), 0) src_doc = models.Source_Document(id=1) src_doc.save() models.Declarator_File_Reference( source_document=src_doc, declarator_document_id=self.declarator_document_id).save() section1 = models.Section(id=self.section_id1, source_document=src_doc, person_name=self.fio, office_id=1) section1.save() section2 = models.Section(id=self.section_id2, source_document=src_doc, person_name=self.fio, office_id=1) section2.save() models.Income(section=section1, size=self.income_main1, relative=models.Relative.main_declarant_code).save() models.Income(section=section2, size=self.income_main2, relative=models.Relative.main_declarant_code).save()
def test(self): src_doc = create_default_source_document() person = models.Person(id=2, declarator_person_id=1111, person_name="Иванов Иван Иванович") person.save() models.Section(id=1, source_document=src_doc, person_name="Иванов Иван Иванович", person=person).save() models.Section(id=2, source_document=src_doc, person_name="Иванов И. И.").save() person.refresh_from_db() permalinks_path = os.path.join(os.path.dirname(__file__), "permalinks.dbm") p = TPermaLinksDB(permalinks_path) p.create_db() p.save_next_primary_key_value(models.Person, 3) p.create_sql_sequences() p.close() run_dedupe = RunDedupe(None, None) run_dedupe.handle(None, permanent_links_db=permalinks_path, write_to_db=True, fake_dedupe=True, surname_bounds=',', rebuild=True) sec1 = models.Section.objects.get(id=1) self.assertEqual(sec1.person_id, person.id) sec2 = models.Section.objects.get(id=2) self.assertEqual(sec2.person_id, person.id)
def test(self): src_doc = create_default_source_document() models.Section(id=1, source_document=src_doc, person_name="Иванов Иван Иванович").save() models.Section(id=2, source_document=src_doc, person_name="Иванов И. И.").save() permalinks_path = os.path.join(os.path.dirname(__file__), "permalinks.dbm") db = TPermaLinksDB(permalinks_path) db.create_db() person = models.Person(id=99) person.tmp_section_set = {str(1), str(2)} db.put_record_id(person) db.save_next_primary_key_value(models.Person, 100) db.create_sql_sequences() db.close() run_dedupe = RunDedupe(None, None) run_dedupe.handle(None, permanent_links_db=permalinks_path, write_to_db=True, fake_dedupe=True, surname_bounds=',', rebuild=True) sec1 = models.Section.objects.get(id=1) self.assertEqual(sec1.person_id, person.id) sec2 = models.Section.objects.get(id=2) self.assertEqual(sec2.person_id, person.id)
def test_search_section_by_partial_person_name(self): models.Section.objects.all().delete() models.Source_Document.objects.all().delete() src_doc = models.Source_Document(id=1) src_doc.save() models.Section(id=1, person_name="Один Иван Ильич", source_document=src_doc, office_id=1).save() models.Section(id=2, person_name="Два Иван Ильич", source_document=src_doc, office_id=1).save() models.Section(id=3, person_name="Иван Ильич", source_document=src_doc, office_id=1).save() BuildElasticIndex(None, None).handle(None, model="section") res = self.search_sections_by_fio("Один Иван") self.assertEqual(len(res), 1) self.assertEqual(res[0].id, 1) res = self.search_sections_by_fio("Два Иван") self.assertEqual(len(res), 1) self.assertEqual(res[0].id, 2) res = self.search_sections_by_fio("Один Иван Ильич") self.assertEqual(len(res), 1) self.assertEqual(res[0].id, 1) res = self.search_sections_by_fio("Иван Ильич") self.assertEqual(len(res), 3) res = self.search_sections_by_fio("Ильич") self.assertEqual(len(res), 3) res = self.search_sections_by_fio("Один") self.assertEqual(len(res), 1)
def test_elastic(self): self.assertGreater(models.Office.objects.count(), 0) #delete all documents index = Index(settings.ELASTICSEARCH_INDEX_NAMES['section_index_name'], Elasticsearch()) index.delete() index.create() time.sleep(2) #search to get no results people = list(ElasticSectionDocument.search().query( 'match', person_name='Иванов')) self.assertEqual(len(people), 0) models.Section.objects.all().delete() models.Source_Document.objects.all().delete() ofc = models.Office.objects.get(id=1) src_doc = models.Source_Document() src_doc.id = 1 src_doc.save() models.Section(id=1, person_name="Иванов Иван", source_document=src_doc, office=ofc).save() models.Section(id=2, person_name="Петров Петр", source_document=src_doc, office=ofc).save() models.Section(id=3, person_name="Сидоров Федор", source_document=src_doc, office=ofc).save() #reindex section index TSectionElasticIndexator.chunk_size = 2 BuildElasticIndex(None, None).handle(None, model="section") time.sleep(2) people = list(ElasticSectionDocument.search().query( 'match', person_name='Иванов')) self.assertEqual(len(people), 1) people = list(ElasticSectionDocument.search().query( 'match', person_name='Петров')) self.assertEqual(len(people), 1) people = list(ElasticSectionDocument.search().query( 'match', person_name='Сидоров')) self.assertEqual(len(people), 1) people = list(ElasticSectionDocument.search().query( 'match', person_name='Сокирко')) self.assertEqual(len(people), 0)
def test_rating(self): models.Person.objects.all().delete() models.Section.objects.all().delete() models.Source_Document.objects.all().delete() src_doc = models.Source_Document(id=1) src_doc.save() person_id = 99 person = models.Person(id=person_id) person.save() models.Section(id=1, source_document=src_doc, person_name="i1", income_year=2019, office_id=1, person=person).save() models.Section(id=2, source_document=src_doc, person_name="i2", income_year=2019, office_id=1, person=person).save() models.Section(id=3, source_document=src_doc, person_name="i3", income_year=2019, office_id=1, person=person).save() models.Income(section_id=1, size=1, relative=models.Relative.main_declarant_code).save() models.Income(section_id=2, size=2, relative=models.Relative.main_declarant_code).save() models.Income(section_id=3, size=3, relative=models.Relative.main_declarant_code).save() builder = BuildRatingCommand(None, None) builder.handle(None, min_members_count=3) self.assertEqual(models.Person_Rating_Items.objects.count(), 3) rating = list(models.Person_Rating_Items.objects.all()) self.assertEqual(rating[0].rating_value, 3) self.assertEqual(rating[0].person_place, 1) self.assertEqual(rating[1].rating_value, 2) self.assertEqual(rating[1].person_place, 2) self.assertEqual(rating[2].rating_value, 1) self.assertEqual(rating[2].person_place, 3)
def test_elastic(self): ElasticSectionDocument.init() ElasticSectionDocument._index._name.endswith("_test") ElasticSectionDocument.search().query().delete() time.sleep(2) people = list(ElasticSectionDocument.search().query( 'match', person_name='Иванов')) self.assertEqual(len(people), 0) people = list(ElasticSectionDocument.search().query( 'match', person_name='Иванов')) models.Section.objects.all().delete() models.Source_Document.objects.all().delete() ofc = models.Office() ofc.name = "some name" ofc.save() src_doc = models.Source_Document() src_doc.id = 1 src_doc.office = ofc src_doc.save() section = models.Section() section.id = 1 section.person_name = "Иванов Иван" section.source_document = src_doc section.save() print("sleep 2 seconds till elastic processes records") time.sleep(2) people = list(ElasticSectionDocument.search().query( 'match', person_name='Иванов')) print(len(people)) self.assertEqual(len(people), 1)
def check_case(self, use_only_surname, check_ambiguity): models.Section.objects.all().delete() self.assertGreater(models.Office.objects.count(), 0) fio = "Иванов Иван Иванович" document_id = 1784 income_main = 12534 declarator_person_id = 178 person_ids_path = os.path.join(os.path.dirname(__file__), "person_ids.json") with open(person_ids_path, "w") as outp: fio_key = fio if use_only_surname: fio_key = fio.split()[0] value = declarator_person_id if check_ambiguity: value = "AMBIGUOUS_KEY" record = { build_section_passport(document_id, fio_key, income_main): value } json.dump(record, outp, ensure_ascii=False, indent=4) src_doc = models.Source_Document() src_doc.office_id = 1 src_doc.id = 1 src_doc.save() decl_info = models.Declarator_File_Reference(source_document=src_doc, declarator_document_id=document_id) decl_info.save() section = models.Section(source_document=src_doc, person_name=fio) section.id = 1 section.save() income = models.Income(section=section, size=income_main, relative=models.Relative.main_declarant_code ) income.save() permalinks_path = os.path.join(os.path.dirname(__file__), "permalinks.dbm") p = TPermaLinksDB(permalinks_path) p.create_db() p.close_db() copier = CopyPersonIdCommand(None, None) copier.handle(None, read_person_from_json=person_ids_path, permanent_links_db=permalinks_path) section.refresh_from_db() if check_ambiguity: self.assertEqual(section.person, None) else: self.assertEqual(section.person.declarator_person_id, declarator_person_id)
def test_search_section_by_person_name(self): self.assertGreater(models.Office.objects.count(), 0) models.Section.objects.all().delete() models.Source_Document.objects.all().delete() src_doc = models.Source_Document(id=1) src_doc.save() models.Section(id=1, person_name="Иванов Иван Иванович", source_document=src_doc, office_id=1).save() BuildElasticIndex(None, None).handle(None, model="section") self.assertEqual(self.search_sections_by_fio("Иванов И.И.")[0].id, 1) self.assertEqual(self.search_sections_by_fio("Иванов Иван Иванович")[0].id, 1) self.assertEqual(self.search_sections_by_fio("Иванов Иван")[0].id, 1)
def test_corrected_person(self): models.Section.objects.all().delete() models.Source_Document.objects.all().delete() models.Person.objects.all().delete() src_doc = models.Source_Document(id=1) src_doc.save() assert SECTION_CORRECTIONS.get_corrected_section_id(8048661) == 9798543 models.Person(id=1, person_name="Иванов Иван Ильич").save() models.Section(id=8048661, income_year=2016, person_name="Иванов Иван Ильич", source_document=src_doc, office_id=1, person_id=1).save() models.Section(id=9798543, income_year=2016, person_name="Иванов Иван Ильич", source_document=src_doc, office_id=1, person_id=1).save() person = models.Person.objects.get(id=1) sections = person.sections_ordered_by_year self.assertEqual(1, len(sections))
def read_dumped_objects(self, file_name): if self.options.get('recreate_db'): assert models.Section.objects.count() == 0 with open(file_name) as inp: for line in inp: js = json.loads(line) o = TDeduplicationObject().from_json(js) if self.options.get('recreate_db'): if o.record_id.source_table == TDeduplicationObject.SECTION: assert len(o.offices) == 1 s = models.Section(id=o.record_id.id, office_id=list(o.offices)[0]) self.section_cache[o.record_id.id] = s s.save() else: models.Person(id=o.record_id.id).save() self.cluster_by_minimal_fio[ o.fio.build_fio_with_initials()].append(o)
def create_records(self, records): models.Section.objects.all().delete() models.Source_Document.objects.all().delete() models.Person.objects.all().delete() models.PersonRedirect.objects.all().delete() assert models.Office.objects.all().count() > 0 for d in records.get('source_documents', []): d = models.Source_Document(**d) d.save() for d in records.get('persons', []): models.Person(**d).save() for d in records.get('sections', []): if len(models.Office.objects.filter(id=d['office_id'])) == 0: o = models.Office(id=d['office_id'], name="aaa") o.save() models.Section(**d).save() for d in records.get('redirects', []): models.PersonRedirect(**d).save()
def create_section(self, section_id, person_name, person=None): section = models.Section(id=section_id, office_id=TEST_OFFICE_ID, source_document=self.src_doc, person_name=person_name, person=person) section.save() return section