Exemplo n.º 1
0
    def search_by_name(self, name, lang=["uk"]):
        try:
            if not lang:
                lang = []
                if is_cyr(name):
                    lang.append("ru")
                    lang.append("uk")

                if is_greek(name):
                    lang.append("el")

                if is_eng(name):
                    lang.append("en")

            ids = set()

            for l in lang:
                resp = requests.get("https://www.wikidata.org/w/api.php",
                                    params={
                                        "action": "wbsearchentities",
                                        "search": name,
                                        "language": l,
                                        "format": "json",
                                    },
                                    timeout=60)

                for r in resp.json().get("search", []):
                    ids.add(r["id"])

            return ids
        except RequestException as e:
            self.stderr.write(
                "Cannot get a response for name {}, error message is {}".
                format(name, e))
            return set()
Exemplo n.º 2
0
    def handle(self, *args, **options):
        try:
            file_path = args[0]
        except IndexError:
            raise CommandError('First argument must be a result file')

        ru_translations = {}

        for t in Ua2RuDictionary.objects.all():
            ru_translations[t.term.lower()] = filter(
                None, [t.translation, t.alt_translation])

        not_translated_first_names = []
        not_translated_last_names = []
        not_translated_patronymics = []
        for p in Person.objects.all():
            if (p.first_name.lower() not in ru_translations
                    and is_cyr(p.first_name)):
                not_translated_first_names.append(title(p.first_name))

            if (p.last_name.lower() not in ru_translations
                    and is_cyr(p.last_name)):
                not_translated_last_names.append(title(p.last_name))

            if (p.patronymic.lower() not in ru_translations
                    and is_cyr(p.patronymic)):
                not_translated_patronymics.append(title(p.patronymic))

        with open(file_path, "w") as fp:
            w = writer(fp)

            for x in set(not_translated_first_names):
                w.writerow([x, ""])

            for x in set(not_translated_patronymics):
                w.writerow([x, ""])

            for x in set(not_translated_last_names):
                w.writerow([x, ""])
Exemplo n.º 3
0
def company_details(request, company_id):
    company = get_object_or_404(Company, pk=company_id)
    context = {"company": company}

    if is_cyr(company.name_uk):
        context["filename"] = translit(company.name_uk.lower().strip().replace(
            " ", "_").replace("\n", ""))
    else:
        context["filename"] = company.pk

    context["feedback_form_override"] = FeedbackForm(
        initial={"person": unicode(company.name)})

    return context
Exemplo n.º 4
0
def company_details(request, company_id):
    company = get_object_or_404(Company, pk=company_id)
    context = {
        "company":
        company,
        "articles":
        company.articles.filter(kind="i", publish=True).order_by("-date"),
    }

    if is_cyr(company.name_uk):
        context["filename"] = translit(company.name_uk.lower().strip().replace(
            " ", "_").replace("\n", ""))
    else:
        context["filename"] = company.pk

    context["feedback_form_override"] = FeedbackForm(
        initial={"person": unicode(company.name)})

    return context
Exemplo n.º 5
0
def person_details(request, person_id):
    person = get_object_or_404(Person, pk=person_id)

    flags_qs = person.flags.select_related("rule").order_by("rule__id")
    context = {
        "person":
        person,
        "query":
        "",
        "all_declarations":
        person.get_declarations(),
        "charts_data":
        person.get_charts_data(),
        "scoring_score":
        sum(x[0] * x[1]
            for x in flags_qs.values_list("rule__weight", "rule__scale")),
        "scoring_flags":
        flags_qs.order_by("-rule__weight", "pk").nocache(),
        "country_connections_titles":
        Person2Country._relationships_explained,
        "articles":
        person.articles.filter(kind="i", publish=True).order_by("-date"),
    }

    full_name = "%s %s %s" % (
        person.last_name_uk,
        person.first_name_uk,
        person.patronymic_uk,
    )

    if is_cyr(full_name):
        context["filename"] = translit(full_name.lower().strip().replace(
            " ", "_").replace("\n", ""))
    else:
        context["filename"] = person.pk

    context["feedback_form_override"] = FeedbackForm(
        initial={"person": unicode(person)})

    return context
Exemplo n.º 6
0
def person_details(request, person_id):
    person = get_object_or_404(Person, pk=person_id)
    context = {
        "person": person,
        "query": "",
        "all_declarations": person.get_declarations(),
    }

    full_name = "%s %s %s" % (
        person.last_name_uk,
        person.first_name_uk,
        person.patronymic_uk,
    )

    if is_cyr(full_name):
        context["filename"] = translit(full_name.lower().strip().replace(
            " ", "_").replace("\n", ""))
    else:
        context["filename"] = person.pk

    context["feedback_form_override"] = FeedbackForm(
        initial={"person": unicode(person)})

    return context
Exemplo n.º 7
0
    def handle(self, *args, **options):
        q = Person.objects.all()
        created_matches = 0
        not_found_matches = 0
        not_found_shallow_matches = 0
        updated_matches = 0

        with tqdm.tqdm(total=q.count()) as pbar:
            for p in q.nocache().iterator():
                pbar.update(1)
                ids = self.search_by_name(p.full_name, ["uk"])
                ids |= self.search_by_name(p.full_name_en, ["en"])

                if not ids:
                    not_found_shallow_matches += 1

                if p.also_known_as_uk:
                    for aka_name in filter(None,
                                           p.also_known_as_uk.split("\n")):
                        ids |= self.search_by_name(aka_name, None)

                if p.also_known_as_en:
                    for aka_name in filter(None,
                                           p.also_known_as_en.split("\n")):
                        ids |= self.search_by_name(aka_name, None)

                if not ids:
                    for name in filter(None, p.names.split("\n")):
                        # Trying just russian translations if deep search is off
                        if options["deep_search"] or is_cyr(name):
                            ids |= self.search_by_name(name, None)
                            sleep(0.1)

                if not ids:
                    not_found_matches += 1
                    continue

                details = self.fetch_details(ids)

                obj, created = WikiMatch.objects.get_or_create(
                    person_id=p.id,
                    defaults={
                        "pep_name":
                        p.full_name,
                        "pep_position":
                        "{} @ {}".format(
                            getattr(p, "last_job_title", ""),
                            getattr(p, "last_workplace", ""),
                        ),
                        "matched_json":
                        details
                    },
                )

                if not created:
                    updated_matches += 1
                    obj.matched_json = details
                    obj.save()
                else:
                    created_matches += 1

        self.stdout.write(
            "Created: {}\nUpdated: {}\nNot found by name: {}\nNot found at all: {}"
            .format(created_matches, updated_matches,
                    not_found_shallow_matches, not_found_matches))
Exemplo n.º 8
0
    def transliterate(self, person_last_name, person_first_name,
                      person_patronymic):
        first_names = []
        last_names = []
        patronymics = []

        original = [(person_last_name, person_first_name, person_patronymic)]

        result = set()

        if (person_first_name.lower() in self.ru_translations and
                is_cyr(person_first_name)):
            first_names = self.ru_translations[person_first_name.lower()]
        else:
            first_names = [person_first_name]
            self.add_for_translation(person_first_name)

        if (person_last_name.lower() in self.ru_translations and
                is_cyr(person_last_name)):
            last_names = self.ru_translations[person_last_name.lower()]
        else:
            last_names = [person_last_name]
            self.add_for_translation(person_last_name)

        if (person_patronymic.lower() in self.ru_translations and
                is_cyr(person_patronymic)):
            patronymics = self.ru_translations[person_patronymic.lower()]
        else:
            patronymics = [person_patronymic]
            self.add_for_translation(person_patronymic)

        translated = [
            (l, f, p)
            for f in first_names
            for p in patronymics
            for l in last_names
        ]

        for n in original:
            name = self.get_name(n)
            if is_cyr(name):
                for ua_table in ALL_UKRAINIAN:
                    result.add(translit(name, ua_table))

                for sc_rex, replacements in self.special_replacements.items():
                    if re.search(sc_rex, name, flags=re.I | re.U):
                        for repl in replacements:
                            optional_n = re.sub(sc_rex, repl, name, flags=re.I | re.U)
                            result.add(translit(title(optional_n), UkrainianKMU))

                for sc, replacements in self.special_cases.items():
                    if sc in n:
                        for repl in replacements:
                            optional_n = self.replace_item(n, sc, repl)
                            result.add(translit(self.get_name(optional_n), UkrainianKMU))

        for n in translated:
            name = self.get_name(n)
            if not is_ukr(name):
                for ru_table in ALL_RUSSIAN:
                    result.add(translit(name, ru_table))

            for sc_rex, replacements in self.special_replacements.items():
                if re.search(sc_rex, name, flags=re.I | re.U):
                    for repl in replacements:
                        optional_n = re.sub(sc_rex, repl, name, flags=re.I | re.U)
                        result.add(translit(title(optional_n), RussianInternationalPassport))

            for sc, replacements in self.special_cases.items():
                if sc in n:
                    for repl in replacements:
                        optional_n = self.replace_item(n, sc, repl)
                        result.add(translit(
                            self.get_name(optional_n),
                            RussianInternationalPassport)
                        )

        return result | set(map(self.get_name, translated))