def search_by_name(self, name, lang=["uk"]): try: if not lang: lang = [] if is_cyr(name): lang.append("ru") lang.append("uk") if is_greek(name): lang.append("el") if is_eng(name): lang.append("en") ids = set() for l in lang: resp = requests.get("https://www.wikidata.org/w/api.php", params={ "action": "wbsearchentities", "search": name, "language": l, "format": "json", }, timeout=60) for r in resp.json().get("search", []): ids.add(r["id"]) return ids except RequestException as e: self.stderr.write( "Cannot get a response for name {}, error message is {}". format(name, e)) return set()
def handle(self, *args, **options): try: file_path = args[0] except IndexError: raise CommandError('First argument must be a result file') ru_translations = {} for t in Ua2RuDictionary.objects.all(): ru_translations[t.term.lower()] = filter( None, [t.translation, t.alt_translation]) not_translated_first_names = [] not_translated_last_names = [] not_translated_patronymics = [] for p in Person.objects.all(): if (p.first_name.lower() not in ru_translations and is_cyr(p.first_name)): not_translated_first_names.append(title(p.first_name)) if (p.last_name.lower() not in ru_translations and is_cyr(p.last_name)): not_translated_last_names.append(title(p.last_name)) if (p.patronymic.lower() not in ru_translations and is_cyr(p.patronymic)): not_translated_patronymics.append(title(p.patronymic)) with open(file_path, "w") as fp: w = writer(fp) for x in set(not_translated_first_names): w.writerow([x, ""]) for x in set(not_translated_patronymics): w.writerow([x, ""]) for x in set(not_translated_last_names): w.writerow([x, ""])
def company_details(request, company_id): company = get_object_or_404(Company, pk=company_id) context = {"company": company} if is_cyr(company.name_uk): context["filename"] = translit(company.name_uk.lower().strip().replace( " ", "_").replace("\n", "")) else: context["filename"] = company.pk context["feedback_form_override"] = FeedbackForm( initial={"person": unicode(company.name)}) return context
def company_details(request, company_id): company = get_object_or_404(Company, pk=company_id) context = { "company": company, "articles": company.articles.filter(kind="i", publish=True).order_by("-date"), } if is_cyr(company.name_uk): context["filename"] = translit(company.name_uk.lower().strip().replace( " ", "_").replace("\n", "")) else: context["filename"] = company.pk context["feedback_form_override"] = FeedbackForm( initial={"person": unicode(company.name)}) return context
def person_details(request, person_id): person = get_object_or_404(Person, pk=person_id) flags_qs = person.flags.select_related("rule").order_by("rule__id") context = { "person": person, "query": "", "all_declarations": person.get_declarations(), "charts_data": person.get_charts_data(), "scoring_score": sum(x[0] * x[1] for x in flags_qs.values_list("rule__weight", "rule__scale")), "scoring_flags": flags_qs.order_by("-rule__weight", "pk").nocache(), "country_connections_titles": Person2Country._relationships_explained, "articles": person.articles.filter(kind="i", publish=True).order_by("-date"), } full_name = "%s %s %s" % ( person.last_name_uk, person.first_name_uk, person.patronymic_uk, ) if is_cyr(full_name): context["filename"] = translit(full_name.lower().strip().replace( " ", "_").replace("\n", "")) else: context["filename"] = person.pk context["feedback_form_override"] = FeedbackForm( initial={"person": unicode(person)}) return context
def person_details(request, person_id): person = get_object_or_404(Person, pk=person_id) context = { "person": person, "query": "", "all_declarations": person.get_declarations(), } full_name = "%s %s %s" % ( person.last_name_uk, person.first_name_uk, person.patronymic_uk, ) if is_cyr(full_name): context["filename"] = translit(full_name.lower().strip().replace( " ", "_").replace("\n", "")) else: context["filename"] = person.pk context["feedback_form_override"] = FeedbackForm( initial={"person": unicode(person)}) return context
def handle(self, *args, **options): q = Person.objects.all() created_matches = 0 not_found_matches = 0 not_found_shallow_matches = 0 updated_matches = 0 with tqdm.tqdm(total=q.count()) as pbar: for p in q.nocache().iterator(): pbar.update(1) ids = self.search_by_name(p.full_name, ["uk"]) ids |= self.search_by_name(p.full_name_en, ["en"]) if not ids: not_found_shallow_matches += 1 if p.also_known_as_uk: for aka_name in filter(None, p.also_known_as_uk.split("\n")): ids |= self.search_by_name(aka_name, None) if p.also_known_as_en: for aka_name in filter(None, p.also_known_as_en.split("\n")): ids |= self.search_by_name(aka_name, None) if not ids: for name in filter(None, p.names.split("\n")): # Trying just russian translations if deep search is off if options["deep_search"] or is_cyr(name): ids |= self.search_by_name(name, None) sleep(0.1) if not ids: not_found_matches += 1 continue details = self.fetch_details(ids) obj, created = WikiMatch.objects.get_or_create( person_id=p.id, defaults={ "pep_name": p.full_name, "pep_position": "{} @ {}".format( getattr(p, "last_job_title", ""), getattr(p, "last_workplace", ""), ), "matched_json": details }, ) if not created: updated_matches += 1 obj.matched_json = details obj.save() else: created_matches += 1 self.stdout.write( "Created: {}\nUpdated: {}\nNot found by name: {}\nNot found at all: {}" .format(created_matches, updated_matches, not_found_shallow_matches, not_found_matches))
def transliterate(self, person_last_name, person_first_name, person_patronymic): first_names = [] last_names = [] patronymics = [] original = [(person_last_name, person_first_name, person_patronymic)] result = set() if (person_first_name.lower() in self.ru_translations and is_cyr(person_first_name)): first_names = self.ru_translations[person_first_name.lower()] else: first_names = [person_first_name] self.add_for_translation(person_first_name) if (person_last_name.lower() in self.ru_translations and is_cyr(person_last_name)): last_names = self.ru_translations[person_last_name.lower()] else: last_names = [person_last_name] self.add_for_translation(person_last_name) if (person_patronymic.lower() in self.ru_translations and is_cyr(person_patronymic)): patronymics = self.ru_translations[person_patronymic.lower()] else: patronymics = [person_patronymic] self.add_for_translation(person_patronymic) translated = [ (l, f, p) for f in first_names for p in patronymics for l in last_names ] for n in original: name = self.get_name(n) if is_cyr(name): for ua_table in ALL_UKRAINIAN: result.add(translit(name, ua_table)) for sc_rex, replacements in self.special_replacements.items(): if re.search(sc_rex, name, flags=re.I | re.U): for repl in replacements: optional_n = re.sub(sc_rex, repl, name, flags=re.I | re.U) result.add(translit(title(optional_n), UkrainianKMU)) for sc, replacements in self.special_cases.items(): if sc in n: for repl in replacements: optional_n = self.replace_item(n, sc, repl) result.add(translit(self.get_name(optional_n), UkrainianKMU)) for n in translated: name = self.get_name(n) if not is_ukr(name): for ru_table in ALL_RUSSIAN: result.add(translit(name, ru_table)) for sc_rex, replacements in self.special_replacements.items(): if re.search(sc_rex, name, flags=re.I | re.U): for repl in replacements: optional_n = re.sub(sc_rex, repl, name, flags=re.I | re.U) result.add(translit(title(optional_n), RussianInternationalPassport)) for sc, replacements in self.special_cases.items(): if sc in n: for repl in replacements: optional_n = self.replace_item(n, sc, repl) result.add(translit( self.get_name(optional_n), RussianInternationalPassport) ) return result | set(map(self.get_name, translated))