Пример #1
0
    def to_dict(self):
        """
        Convert Minion model to an indexable presentation for ES.
        """
        all_persons = set()
        names_autocomplete = set()

        d = model_to_dict(self, fields=["paid"])

        d["mp"] = self.mp2convocation.to_dict()

        all_persons |= parse_and_generate(self.mp2convocation.mp.name,
                                          "Депутат")
        names_autocomplete |= autocomplete_suggestions(
            self.mp2convocation.mp.name)

        all_persons |= parse_and_generate(self.minion.name, "Помічник")
        names_autocomplete |= autocomplete_suggestions(self.minion.name)

        d["_id"] = self.id
        d["id"] = self.minion.id
        d["name"] = self.minion.name
        d["companies"] = [self.mp2convocation.party]
        d["persons"] = list(filter(None, all_persons))
        d["names_autocomplete"] = list(names_autocomplete)

        return d
Пример #2
0
    def to_dict(self):
        dt = self.data
        dt_g = dt["general"]
        res = {
            "_id": self.pk,
            "last_updated_from_dataset": self.last_updated_from_dataset,
            "first_updated_from_dataset": self.first_updated_from_dataset,
        }

        names_autocomplete = set()
        addresses = set()
        companies = set()
        persons = set()

        persons |= generate_all_names(dt_g["last_name"], dt_g["name"],
                                      dt_g["patronymic"], dt_g["post"]["post"])
        names_autocomplete |= autocomplete_suggestions(
            concat_name(dt_g["last_name"], dt_g["name"], dt_g["patronymic"]))

        companies.add(dt_g["post"]["office"])

        for fam in dt_g["family"]:
            persons |= generate_all_names(fam["last_name"], fam["name"],
                                          fam["patronymic"], fam["relation"])
            names_autocomplete |= autocomplete_suggestions(
                concat_name(fam["last_name"], fam["name"], fam["patronymic"]))

            for career in fam["career"]:
                companies.add(career["workplace"])

        for fam in dt_g["family_conflicts"]:
            anything_new = autocomplete_suggestions(
                concat_name(fam["last_name"], fam["name"], fam["patronymic"]))
            if anything_new and not anything_new.issubset(names_autocomplete):
                persons |= generate_all_names(fam["last_name"], fam["name"],
                                              fam["patronymic"],
                                              "Пов'язана особа")
                logger.warning(
                    "Oh wow, second section has persons missing from first one {}"
                    .format(anything_new))
                names_autocomplete |= anything_new

        res.update(dt)
        res.update({
            "companies":
            list(filter(None, companies)),
            "addresses":
            list(filter(None, addresses)),
            "persons":
            list(filter(None, persons)),
            "names_autocomplete":
            list(filter(None, names_autocomplete)),
        })

        return res
Пример #3
0
    def handle(self, *args, **options):
        translator = Translator()
        translator.fetch_full_dict_from_db()

        self.apply_migrations()
        all_decls = Declaration.search().query('match_all').scan()
        for decl in tqdm.tqdm(all_decls):
            decl_dct = decl.to_dict()

            decl.general.full_name = replace_apostrophes(decl.general.full_name)
            decl.general.name = replace_apostrophes(decl.general.name)
            decl.general.last_name = replace_apostrophes(decl.general.last_name)
            decl.general.patronymic = replace_apostrophes(decl.general.patronymic)
            decl.general.full_name_suggest = {
                'input': [
                    decl.general.full_name,
                    ' '.join([decl.general.name,
                              decl.general.patronymic,
                              decl.general.last_name]),
                    ' '.join([decl.general.name,
                              decl.general.last_name])
                ]
            }

            decl_dct["ft_src"] = ""
            terms = filter_only_interesting(decl_dct)
            terms += [translator.translate(x)["translation"] for x in terms]
            decl.ft_src = "\n".join(terms)

            decl.general.full_name_for_sorting = keyword_for_sorting(decl.general.full_name)
            decl.index_card = concat_fields(decl_dct,
                                            Declaration.INDEX_CARD_FIELDS)

            extracted_names = [(decl.general.last_name, decl.general.name, decl.general.patronymic, None)]
            persons = set()
            names_autocomplete = set()

            for person in decl.general.family:
                l, f, p, _ = parse_fullname(person.family_name)
                extracted_names.append((l, f, p, person.relations))

            for name in extracted_names:
                persons |= generate_all_names(
                    *name
                )

                names_autocomplete |= autocomplete_suggestions(
                    concat_name(*name[:-1])
                )


            decl.persons = list(filter(None, persons))
            decl.names_autocomplete = list(filter(None, names_autocomplete))

            decl.save()
Пример #4
0
    def to_dict(self):
        dt = self.data
        res = {
            "_id": self.pk,
            "last_updated_from_dataset": self.last_updated_from_dataset,
            "first_updated_from_dataset": self.first_updated_from_dataset,
        }

        companies = set()
        addresses = set()
        persons = set()

        if dt["TIN_S"]:
            companies |= deal_with_mixed_lang(dt["NAME"])
            companies |= generate_edrpou_options(dt["TIN_S"])
            persons |= parse_and_generate(dt["PIB"], "боржник")
        else:
            persons |= parse_and_generate(dt["NAME"], "боржник")

        companies |= deal_with_mixed_lang(dt["DPI"])
        persons |= parse_and_generate(dt["DPI_BOSS"], "керівник податкової")

        names_autocomplete = (
            companies
            | autocomplete_suggestions(dt["NAME"])
            | autocomplete_suggestions(dt["PIB"])
            | autocomplete_suggestions(dt["DPI_BOSS"])
        )

        res.update(dt)
        res.update(
            {
                "companies": list(filter(None, companies)),
                "addresses": list(filter(None, addresses)),
                "persons": list(filter(None, persons)),
                "names_autocomplete": list(filter(None, names_autocomplete)),
            }
        )

        return res
Пример #5
0
    def handle(self, *args, **options):
        self.apply_migrations()
        all_decls = Declaration.search().query('match_all').scan()
        for decl in all_decls:
            decl_dct = decl.to_dict()
            sys.stdout.write('Processing decl for {}\n'.format(
                decl.general.full_name))
            sys.stdout.flush()

            decl.general.full_name = replace_apostrophes(
                decl.general.full_name)
            decl.general.name = replace_apostrophes(decl.general.name)
            decl.general.last_name = replace_apostrophes(
                decl.general.last_name)
            decl.general.patronymic = replace_apostrophes(
                decl.general.patronymic)
            decl.general.full_name_suggest = {
                'input': [
                    decl.general.full_name, ' '.join([
                        decl.general.name, decl.general.patronymic,
                        decl.general.last_name
                    ]), ' '.join([decl.general.name, decl.general.last_name])
                ]
            }

            decl_dct["ft_src"] = ""
            decl.ft_src = "\n".join(filter_only_interesting(decl_dct))

            decl.general.full_name_for_sorting = keyword_for_sorting(
                decl.general.full_name)
            decl.index_card = concat_fields(decl_dct,
                                            Declaration.INDEX_CARD_FIELDS)

            extracted_names = [(decl.general.last_name, decl.general.name,
                                decl.general.patronymic, None)]
            persons = set()
            names_autocomplete = set()

            for person in decl.general.family:
                l, f, p, _ = parse_fullname(person.family_name)
                extracted_names.append((l, f, p, person.relations))

            for name in extracted_names:
                persons |= generate_all_names(*name)

                names_autocomplete |= autocomplete_suggestions(
                    concat_name(*name[:-1]))

            decl.persons = list(filter(None, persons))
            decl.names_autocomplete = list(filter(None, names_autocomplete))

            decl.save()
Пример #6
0
    def to_dict(self):
        dt = self.data
        res = {"_id": self.pk}

        names_autocomplete = set()
        companies = (
            set([dt["party"]])
            | generate_edrpou_options(dt["donator_code"])
            | generate_edrpou_options(dt["party"])
        )

        if dt.get("branch_code"):
            companies |= generate_edrpou_options(dt["branch_code"])

        if dt.get("branch_name"):
            companies |= generate_edrpou_options(dt["branch_name"])

        addresses = set([dt["donator_location"]])
        persons = set([dt.get("candidate_name")])

        if dt["donator_code"]:
            companies |= set([dt["donator_name"]])
        else:
            persons |= parse_and_generate(dt["donator_name"], "Донор")
            names_autocomplete |= autocomplete_suggestions(dt["donator_name"])

        names_autocomplete |= companies
        raw_records = set(
            [
                dt.get("account_number"),
                dt.get("payment_subject"),
                dt["transaction_doc_number"],
            ]
        )

        res.update(dt)
        res.update(
            {
                "companies": list(filter(None, companies)),
                "addresses": list(filter(None, addresses)),
                "persons": list(filter(None, persons)),
                "names_autocomplete": list(filter(None, names_autocomplete)),
                "raw_records": list(filter(None, raw_records)),
                "type": self.get_type_display(),
                "period": self.period,
                "ultimate_recepient": self.ultimate_recepient,
            }
        )

        return res
Пример #7
0
    def to_dict(self):
        dt = self.data
        res = {
            "_id": self.pk,
            "details_url": "https://z.texty.org.ua/deal/{}".format(dt["id"]),
            "last_updated_from_dataset": self.last_updated_from_dataset,
            "first_updated_from_dataset": self.first_updated_from_dataset,
        }

        companies = (set([
            dt["purchase"]["buyer"]["name"],
            dt["purchase"]["buyer"]["name_en"],
            dt["seller"]["name"],
        ])
                     | generate_edrpou_options(dt["purchase"]["buyer"]["code"])
                     | generate_edrpou_options(dt["seller"]["code"])
                     | generate_edrpou_options(
                         dt["purchase"]["cost_dispatcher_code"]))

        addresses = set([
            dt["seller"]["address"],
            dt["seller"]["address_full"],
            dt["purchase"]["buyer"]["address"],
            dt["purchase"]["buyer"]["address_en"],
        ])

        persons = set()

        if dt["purchase"]["buyer"]["person"]:
            persons |= parse_and_generate(dt["purchase"]["buyer"]["person"],
                                          "Представник замовника")

        raw_records = set(
            [dt["purchase"]["goods_name"], dt["purchase"]["goods_name_short"]])

        names_autocomplete = companies
        if dt["purchase"]["buyer"]["person"]:
            names_autocomplete |= autocomplete_suggestions(
                dt["purchase"]["buyer"]["person"])

        res.update(dt)
        res.update({
            "companies": list(filter(None, companies)),
            "addresses": list(filter(None, addresses)),
            "persons": list(filter(None, persons)),
            "names_autocomplete": list(filter(None, names_autocomplete)),
            "raw_records": list(filter(None, raw_records)),
        })

        return res
Пример #8
0
    def to_dict(self):
        dt = self.data
        res = {
            "_id": self.pk,
            "last_updated_from_dataset": self.last_updated_from_dataset,
            "first_updated_from_dataset": self.first_updated_from_dataset,
        }

        companies = set(dt["CORR_WORK_PLACE"])
        addresses = set()
        persons = set()

        persons |= generate_all_names(dt["CORRUPTIONER_LAST_NAME"],
                                      dt["CORRUPTIONER_FIRST_NAME"],
                                      dt["CORRUPTIONER_SURNAME"],
                                      "Корупціонер")

        names_autocomplete = autocomplete_suggestions(
            concat_name(dt["CORRUPTIONER_LAST_NAME"],
                        dt["CORRUPTIONER_FIRST_NAME"],
                        dt["CORRUPTIONER_SURNAME"]))

        raw_records = set([
            dt["CORR_WORK_POS"],
            dt["ACTIVITY_SPH_NAME"],
            dt["OFFENSE_NAME"],
            dt["PUNISHMENT"],
            dt["COURT_CASE_NUM"],
            dt["SENTENCE_NUMBER"],
            dt["CODEX_ARTICLES_LIST_CODEX_ART_NUMBER"],
            dt["CODEX_ARTICLES_LIST_CODEX_ART_NAME"],
        ])

        res.update(dt)
        res.update({
            "companies": list(filter(None, companies)),
            "addresses": list(filter(None, addresses)),
            "persons": list(filter(None, persons)),
            "names_autocomplete": list(filter(None, names_autocomplete)),
            "raw_records": list(filter(None, raw_records)),
        })

        return res
Пример #9
0
    def to_dict(self):
        dt = self.data
        res = {
            "_id": self.pk,
        }

        names_autocomplete = set()
        companies = set([dt["party"]])
        persons = set()

        persons |= parse_and_generate(dt["name"], "Кандидат в депутати")

        names_autocomplete |= autocomplete_suggestions(dt["name"])

        if "область" in dt["body"].lower() or "київ" in dt["body"].lower():
            splits = re.split(r"область", dt["body"], flags=re.I, maxsplit=1)
            if len(splits) != 2:
                splits = re.split(r"київ", dt["body"], flags=re.I, maxsplit=1)

            if len(splits) == 2:
                companies.add(splits[1])
                dt["body_name"] = splits[1]
                dt["body_region"] = dt["body"].replace(dt["body_name"],
                                                       "").strip()
            else:
                logger.warning("Cannot parse body name out of {}".format(
                    dt["body"]))

        names_autocomplete |= companies

        res.update(dt)

        res.update({
            "companies":
            list(filter(None, companies)),
            "persons":
            list(filter(None, persons)),
            "names_autocomplete":
            list(filter(None, names_autocomplete)),
        })

        return res
Пример #10
0
    def to_dict(self):
        dt = self.data
        res = {
            "_id": self.pk,
            "report_id": dt["report"]["id"],
            "timestamp": dt_parse(dt["report"]["timestamp"]),
            "last_updated_from_dataset": self.last_updated_from_dataset,
            "first_updated_from_dataset": self.first_updated_from_dataset,
        }

        names_autocomplete = set()
        companies = set()
        persons = set()
        addresses = set()

        title1 = self.title1_jmespath.search(dt)
        title2 = self.title2_jmespath.search(dt)
        report_title = self.title3_jmespath.search(dt)
        if not report_title:
            return None

        report_title = report_title[0]

        report_title["STD"] = dt_parse(report_title["STD"])
        report_title["FID"] = dt_parse(report_title["FID"])

        titles = title1 + title2
        if titles:
            title = titles[0]

            address = ", ".join(
                filter(
                    None,
                    [
                        title.get("E_CONT"),
                        title.get("E_ADRES"),
                        title.get("E_POST"),
                        title.get("E_RAYON"),
                        title.get("E_STREET"),
                    ],
                )
            )
            addresses.add(address)

            res["detailed_title"] = title
            companies |= deal_with_mixed_lang(title.get("E_NAME"))

            if title.get("FIO_PODP"):
                for p in deal_with_mixed_lang(title["FIO_PODP"]):
                    persons |= parse_and_generate(
                        p, title["POS_PODP"] or ""
                    )

                    names_autocomplete |= autocomplete_suggestions(p)

        res["report_title"] = report_title
        companies |= generate_edrpou_options(report_title.get("D_EDRPOU"))
        companies |= deal_with_mixed_lang(report_title.get("D_NAME"))

        associates = self.current_persons_jmespath.search(dt)
        dismissed_associates = self.fired_persons_jmespath.search(dt)

        res["associates"] = associates
        res["dismissed_associates"] = dismissed_associates

        for assoc in associates + dismissed_associates:
            assoc["DAT_PASP"] = assoc.get("DAT_PASP")
            if assoc["DAT_PASP"]:
                assoc["DAT_PASP"] = dt_parse(assoc["DAT_PASP"])

            full_name = assoc.get("P_I_B", "") or ""

            if full_name.strip():
                parsed_name = ""
                parsed_chunks = []

                # TODO: better word splitting
                for chunk in full_name.split():
                    # TODO: better detection of latin
                    chunk = try_to_fix_mixed_charset(chunk)

                    if (
                        is_eng(chunk)
                        or chunk.startswith("(")
                        or chunk.endswith(")")
                        or chunk in "-"
                        or chunk.startswith("-")
                    ):
                        break
                    elif chunk:
                        parsed_chunks.append(chunk)

                # Looks like real person
                if len(parsed_chunks) in [2, 3]:
                    persons |= parse_and_generate(
                        " ".join(parsed_chunks), assoc.get("POSADA", "") or ""
                    )

                    names_autocomplete |= autocomplete_suggestions(" ".join(parsed_chunks))

                    persons |= parse_and_generate(
                        full_name, assoc.get("POSADA", "") or ""
                    )
                    names_autocomplete |= autocomplete_suggestions(full_name)
                else:
                    companies.add(" ".join(parsed_chunks))
                    companies |= deal_with_mixed_lang(full_name)


        names_autocomplete |= companies

        res.update(
            {
                "companies": list(filter(None, companies)),
                "addresses": list(filter(None, addresses)),
                "persons": list(filter(None, persons)),
                "names_autocomplete": list(filter(None, names_autocomplete)),
            }
        )

        return res
Пример #11
0
    def to_dict(self):
        addresses = set()
        persons = set()
        all_persons = set()
        names_autocomplete = set()
        companies = set()
        company_profiles = set()
        raw_records = set()

        companies.add(self.full_edrpou)
        companies.add(str(self.pk))

        latest_record = None
        latest_revision = 0
        for company_record in (
            self.records.all()
            .defer("company_hash", "location_parsing_quality")
            .nocache()
        ):
            addresses.add(company_record.location)
            addresses.add(company_record.parsed_location)
            addresses.add(company_record.validated_location)
            raw_records |= phone_variants(company_record.phone1)
            raw_records |= phone_variants(company_record.phone2)
            raw_records |= phone_variants(company_record.fax)
            raw_records.add(company_record.email)
            raw_records.add(company_record.form)

            company_profiles.add(company_record.company_profile)
            companies.add(company_record.name)

            names_autocomplete.add(company_record.name)
            names_autocomplete.add(company_record.short_name)
            names_autocomplete.add(self.full_edrpou)
            names_autocomplete.add(str(self.pk))

            companies.add(company_record.short_name)

            if company_record.revisions:
                if max(company_record.revisions) > latest_revision:
                    latest_record = company_record
                    latest_revision = max(company_record.revisions)
            else:
                logger.warning(
                    "Cannot find revisions for the CompanyRecord {}".format(self.pk)
                )

        for person in (
            self.persons.all().defer("tokenized_record", "share", "revisions").nocache()
        ):
            for name in person.name:
                persons.add((name, person.get_person_type_display()))

                for addr in person.address:
                    addresses.add(addr)

                for country in person.country:
                    addresses.add(country)

            raw_records.add(person.raw_record)

        snapshot = self.snapshot_stats.order_by("-revision_id").first()
        flags = None
        if snapshot:
            flags = snapshot.to_dict()

        for name, position in persons:
            all_persons |= parse_and_generate(name, position)
            names_autocomplete |= autocomplete_suggestions(name)

        return {
            "full_edrpou": self.full_edrpou,
            "addresses": list(filter(None, addresses)),
            "raw_persons": list(filter(None, persons)),
            "persons": list(filter(None, all_persons)),
            "companies": list(filter(None, companies)),
            "company_profiles": list(filter(None, company_profiles)),
            "latest_record": latest_record.to_dict(),
            "raw_records": list(filter(None, raw_records)),
            "names_autocomplete": list(filter(None, names_autocomplete)),
            "internals": {"flags": flags},
        }