Пример #1
0
    def create_test_db(self):
        models.Section.objects.all().delete()
        models.Declarator_File_Reference.objects.all().delete()
        models.Source_Document.objects.all().delete()
        models.Person.objects.all().delete()
        self.assertGreater(models.Office.objects.count(), 0)

        src_doc = models.Source_Document(id=1)
        src_doc.save()

        models.Declarator_File_Reference(
            source_document=src_doc,
            declarator_document_id=self.declarator_document_id).save()

        section1 = models.Section(id=self.section_id1,
                                  source_document=src_doc,
                                  person_name=self.fio,
                                  office_id=1)
        section1.save()

        section2 = models.Section(id=self.section_id2,
                                  source_document=src_doc,
                                  person_name=self.fio,
                                  office_id=1)
        section2.save()

        models.Income(section=section1,
                      size=self.income_main1,
                      relative=models.Relative.main_declarant_code).save()
        models.Income(section=section2,
                      size=self.income_main2,
                      relative=models.Relative.main_declarant_code).save()
Пример #2
0
    def test(self):
        src_doc = create_default_source_document()

        person = models.Person(id=2, declarator_person_id=1111, person_name="Иванов Иван Иванович")
        person.save()

        models.Section(id=1, source_document=src_doc, person_name="Иванов Иван Иванович", person=person).save()
        models.Section(id=2, source_document=src_doc, person_name="Иванов И. И.").save()

        person.refresh_from_db()

        permalinks_path = os.path.join(os.path.dirname(__file__), "permalinks.dbm")
        p = TPermaLinksDB(permalinks_path)
        p.create_db()
        p.save_next_primary_key_value(models.Person, 3)
        p.create_sql_sequences()
        p.close()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permanent_links_db=permalinks_path,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          rebuild=True)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person.id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person.id)
Пример #3
0
    def test(self):
        src_doc = create_default_source_document()
        models.Section(id=1, source_document=src_doc, person_name="Иванов Иван Иванович").save()
        models.Section(id=2, source_document=src_doc, person_name="Иванов И. И.").save()

        permalinks_path = os.path.join(os.path.dirname(__file__), "permalinks.dbm")
        db = TPermaLinksDB(permalinks_path)
        db.create_db()
        person = models.Person(id=99)
        person.tmp_section_set = {str(1), str(2)}
        db.put_record_id(person)
        db.save_next_primary_key_value(models.Person, 100)
        db.create_sql_sequences()
        db.close()

        run_dedupe = RunDedupe(None, None)
        run_dedupe.handle(None,
                          permanent_links_db=permalinks_path,
                          write_to_db=True,
                          fake_dedupe=True,
                          surname_bounds=',',
                          rebuild=True)

        sec1 = models.Section.objects.get(id=1)
        self.assertEqual(sec1.person_id, person.id)

        sec2 = models.Section.objects.get(id=2)
        self.assertEqual(sec2.person_id, person.id)
Пример #4
0
    def test_search_section_by_partial_person_name(self):
        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()
        src_doc = models.Source_Document(id=1)
        src_doc.save()
        models.Section(id=1, person_name="Один Иван Ильич", source_document=src_doc, office_id=1).save()
        models.Section(id=2, person_name="Два Иван Ильич", source_document=src_doc, office_id=1).save()
        models.Section(id=3, person_name="Иван Ильич", source_document=src_doc, office_id=1).save()
        BuildElasticIndex(None, None).handle(None, model="section")

        res = self.search_sections_by_fio("Один Иван")
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0].id, 1)

        res = self.search_sections_by_fio("Два Иван")
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0].id, 2)

        res = self.search_sections_by_fio("Один Иван Ильич")
        self.assertEqual(len(res), 1)
        self.assertEqual(res[0].id, 1)

        res = self.search_sections_by_fio("Иван Ильич")
        self.assertEqual(len(res), 3)

        res = self.search_sections_by_fio("Ильич")
        self.assertEqual(len(res), 3)

        res = self.search_sections_by_fio("Один")
        self.assertEqual(len(res), 1)
Пример #5
0
    def test_elastic(self):
        self.assertGreater(models.Office.objects.count(), 0)

        #delete all documents
        index = Index(settings.ELASTICSEARCH_INDEX_NAMES['section_index_name'],
                      Elasticsearch())
        index.delete()
        index.create()
        time.sleep(2)

        #search to get no results
        people = list(ElasticSectionDocument.search().query(
            'match', person_name='Иванов'))
        self.assertEqual(len(people), 0)

        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()

        ofc = models.Office.objects.get(id=1)

        src_doc = models.Source_Document()
        src_doc.id = 1
        src_doc.save()

        models.Section(id=1,
                       person_name="Иванов Иван",
                       source_document=src_doc,
                       office=ofc).save()
        models.Section(id=2,
                       person_name="Петров Петр",
                       source_document=src_doc,
                       office=ofc).save()
        models.Section(id=3,
                       person_name="Сидоров Федор",
                       source_document=src_doc,
                       office=ofc).save()

        #reindex section index
        TSectionElasticIndexator.chunk_size = 2
        BuildElasticIndex(None, None).handle(None, model="section")
        time.sleep(2)
        people = list(ElasticSectionDocument.search().query(
            'match', person_name='Иванов'))
        self.assertEqual(len(people), 1)

        people = list(ElasticSectionDocument.search().query(
            'match', person_name='Петров'))
        self.assertEqual(len(people), 1)

        people = list(ElasticSectionDocument.search().query(
            'match', person_name='Сидоров'))
        self.assertEqual(len(people), 1)

        people = list(ElasticSectionDocument.search().query(
            'match', person_name='Сокирко'))
        self.assertEqual(len(people), 0)
Пример #6
0
    def test_rating(self):
        models.Person.objects.all().delete()
        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()

        src_doc = models.Source_Document(id=1)
        src_doc.save()

        person_id = 99
        person = models.Person(id=person_id)
        person.save()

        models.Section(id=1,
                       source_document=src_doc,
                       person_name="i1",
                       income_year=2019,
                       office_id=1,
                       person=person).save()
        models.Section(id=2,
                       source_document=src_doc,
                       person_name="i2",
                       income_year=2019,
                       office_id=1,
                       person=person).save()
        models.Section(id=3,
                       source_document=src_doc,
                       person_name="i3",
                       income_year=2019,
                       office_id=1,
                       person=person).save()
        models.Income(section_id=1, size=1, relative=models.Relative.main_declarant_code).save()
        models.Income(section_id=2, size=2, relative=models.Relative.main_declarant_code).save()
        models.Income(section_id=3, size=3, relative=models.Relative.main_declarant_code).save()

        builder = BuildRatingCommand(None, None)
        builder.handle(None, min_members_count=3)

        self.assertEqual(models.Person_Rating_Items.objects.count(), 3)
        rating = list(models.Person_Rating_Items.objects.all())

        self.assertEqual(rating[0].rating_value, 3)
        self.assertEqual(rating[0].person_place, 1)

        self.assertEqual(rating[1].rating_value, 2)
        self.assertEqual(rating[1].person_place, 2)

        self.assertEqual(rating[2].rating_value,  1)
        self.assertEqual(rating[2].person_place, 3)
Пример #7
0
    def test_elastic(self):
        ElasticSectionDocument.init()
        ElasticSectionDocument._index._name.endswith("_test")
        ElasticSectionDocument.search().query().delete()
        time.sleep(2)
        people = list(ElasticSectionDocument.search().query(
            'match', person_name='Иванов'))
        self.assertEqual(len(people), 0)
        people = list(ElasticSectionDocument.search().query(
            'match', person_name='Иванов'))
        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()

        ofc = models.Office()
        ofc.name = "some name"
        ofc.save()

        src_doc = models.Source_Document()
        src_doc.id = 1
        src_doc.office = ofc
        src_doc.save()

        section = models.Section()
        section.id = 1
        section.person_name = "Иванов Иван"
        section.source_document = src_doc
        section.save()
        print("sleep 2 seconds till elastic processes records")
        time.sleep(2)

        people = list(ElasticSectionDocument.search().query(
            'match', person_name='Иванов'))
        print(len(people))
        self.assertEqual(len(people), 1)
Пример #8
0
    def check_case(self, use_only_surname, check_ambiguity):
        models.Section.objects.all().delete()
        self.assertGreater(models.Office.objects.count(), 0)

        fio = "Иванов Иван Иванович"
        document_id = 1784
        income_main = 12534
        declarator_person_id = 178
        person_ids_path = os.path.join(os.path.dirname(__file__), "person_ids.json")
        with open(person_ids_path, "w") as outp:
            fio_key = fio
            if use_only_surname:
                fio_key = fio.split()[0]
            value = declarator_person_id
            if check_ambiguity:
                value = "AMBIGUOUS_KEY"
            record = {
                build_section_passport(document_id, fio_key, income_main): value
            }
            json.dump(record, outp, ensure_ascii=False, indent=4)
        src_doc = models.Source_Document()
        src_doc.office_id = 1
        src_doc.id = 1
        src_doc.save()

        decl_info = models.Declarator_File_Reference(source_document=src_doc,
                                                     declarator_document_id=document_id)
        decl_info.save()

        section = models.Section(source_document=src_doc,
                                 person_name=fio)
        section.id = 1
        section.save()

        income = models.Income(section=section,
                               size=income_main,
                               relative=models.Relative.main_declarant_code
                               )
        income.save()

        permalinks_path = os.path.join(os.path.dirname(__file__), "permalinks.dbm")
        p = TPermaLinksDB(permalinks_path)
        p.create_db()
        p.close_db()

        copier = CopyPersonIdCommand(None, None)
        copier.handle(None, read_person_from_json=person_ids_path, permanent_links_db=permalinks_path)

        section.refresh_from_db()
        if check_ambiguity:
            self.assertEqual(section.person, None)
        else:
            self.assertEqual(section.person.declarator_person_id, declarator_person_id)
Пример #9
0
    def test_search_section_by_person_name(self):
        self.assertGreater(models.Office.objects.count(), 0)
        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()
        src_doc = models.Source_Document(id=1)
        src_doc.save()
        models.Section(id=1, person_name="Иванов Иван Иванович", source_document=src_doc, office_id=1).save()
        BuildElasticIndex(None, None).handle(None, model="section")

        self.assertEqual(self.search_sections_by_fio("Иванов И.И.")[0].id, 1)
        self.assertEqual(self.search_sections_by_fio("Иванов Иван Иванович")[0].id, 1)
        self.assertEqual(self.search_sections_by_fio("Иванов Иван")[0].id, 1)
Пример #10
0
    def test_corrected_person(self):
        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()
        models.Person.objects.all().delete()
        src_doc = models.Source_Document(id=1)
        src_doc.save()
        assert SECTION_CORRECTIONS.get_corrected_section_id(8048661) == 9798543
        models.Person(id=1, person_name="Иванов Иван Ильич").save()
        models.Section(id=8048661,
                       income_year=2016,
                       person_name="Иванов Иван Ильич",
                       source_document=src_doc,
                       office_id=1,
                       person_id=1).save()
        models.Section(id=9798543,
                       income_year=2016,
                       person_name="Иванов Иван Ильич",
                       source_document=src_doc,
                       office_id=1,
                       person_id=1).save()

        person = models.Person.objects.get(id=1)
        sections = person.sections_ordered_by_year
        self.assertEqual(1, len(sections))
Пример #11
0
 def read_dumped_objects(self, file_name):
     if self.options.get('recreate_db'):
         assert models.Section.objects.count() == 0
     with open(file_name) as inp:
         for line in inp:
             js = json.loads(line)
             o = TDeduplicationObject().from_json(js)
             if self.options.get('recreate_db'):
                 if o.record_id.source_table == TDeduplicationObject.SECTION:
                     assert len(o.offices) == 1
                     s = models.Section(id=o.record_id.id,
                                        office_id=list(o.offices)[0])
                     self.section_cache[o.record_id.id] = s
                     s.save()
                 else:
                     models.Person(id=o.record_id.id).save()
             self.cluster_by_minimal_fio[
                 o.fio.build_fio_with_initials()].append(o)
Пример #12
0
    def create_records(self, records):
        models.Section.objects.all().delete()
        models.Source_Document.objects.all().delete()
        models.Person.objects.all().delete()
        models.PersonRedirect.objects.all().delete()
        assert models.Office.objects.all().count() > 0
        for d in records.get('source_documents', []):
            d = models.Source_Document(**d)
            d.save()

        for d in records.get('persons', []):
            models.Person(**d).save()

        for d in records.get('sections', []):
            if len(models.Office.objects.filter(id=d['office_id'])) == 0:
                o = models.Office(id=d['office_id'], name="aaa")
                o.save()
            models.Section(**d).save()

        for d in records.get('redirects', []):
            models.PersonRedirect(**d).save()
Пример #13
0
 def create_section(self, section_id, person_name, person=None):
     section = models.Section(id=section_id, office_id=TEST_OFFICE_ID,
                           source_document=self.src_doc, person_name=person_name, person=person)
     section.save()
     return section