Пример #1
0
    def translate(self, phrase, just_transliterate=False):
        if not is_cyr(phrase):
            return dict(
                term=phrase, translation=phrase, source="not_required", quality=10
            )

        if just_transliterate:
            return dict(
                term=phrase, translation=translit(phrase), source="translit", quality=10
            )

        term_id = self.get_id(phrase)
        if term_id in self.inner_dict:
            return self.inner_dict[term_id]

        loose_term_id = self.get_loose_id(phrase)

        if loose_term_id in self.inner_dict:
            return self.inner_dict[loose_term_id]

        any_luck = self.auto_translate(phrase)
        if any_luck:
            return any_luck

        if phrase.strip() and self.store_unseen:
            self.unseen.update([phrase.strip()])

        return dict(
            term=phrase, translation=translit(phrase), source="translit", quality=1
        )
Пример #2
0
def tranliterate_all_the_things(name):
    """
    Transliterate both russian and Ukrainian characters to latin.

    This is our last hope to deal with mixed alphabets
    >>> tranliterate_all_the_things("bэdtєd і тыква")
    'bedtied i tykva'
    """
    return translit(translit(name), RussianInternationalPassport)
Пример #3
0
    def get_nodes(self):
        language = get_language()
        persons = set()
        companies = set()
        name = self.name

        if language == "en":
            name = translit(name)

        for d in self.declarations.exclude(doc_type="Форма змін").exclude(
                exclude=True):
            persons |= set(d.source["related_entities"]["people"]["family"])
            companies |= set(
                d.source["related_entities"]["companies"]["owned"])
            companies |= set(
                d.source["related_entities"]["companies"]["related"])

        nodes = [{
            "data": {
                "id": "root",
                "label": name
            },
            "classes": ["root", "person"]
        }]

        edges = []

        for p in persons:
            id_ = hashlib.sha256(p.encode("utf-8")).hexdigest()
            if language == "en":
                p = translit(p)

            nodes.append({
                "data": {
                    "id": id_,
                    "label": p
                },
                "classes": ["person"]
            })
            edges.append({"data": {"source": "root", "target": id_}})

        for c in companies:
            id_ = hashlib.sha256(c.encode("utf-8")).hexdigest()

            nodes.append({
                "data": {
                    "id": id_,
                    "label": c
                },
                "classes": ["company"]
            })
            edges.append({"data": {"source": "root", "target": id_}})

        return {"nodes": nodes, "edges": edges}
Пример #4
0
    def transliterate(self, person_last_name, person_first_name,
                      person_patronymic):
        first_names = []
        last_names = []
        patronymics = []

        original = [
            "{} {} {}".format(person_last_name, person_first_name,
                              person_patronymic).strip().replace("  ", " ")
        ]

        result = set()

        if (person_first_name.lower() in self.ru_translations
                and is_cyr(person_first_name)):
            first_names = self.ru_translations[person_first_name.lower()]
        else:
            first_names = [person_first_name]

        if (person_last_name.lower() in self.ru_translations
                and is_cyr(person_last_name)):
            last_names = self.ru_translations[person_last_name.lower()]
        else:
            last_names = [person_last_name]

        if (person_patronymic.lower() in self.ru_translations
                and is_cyr(person_patronymic)):
            patronymics = self.ru_translations[person_patronymic.lower()]
        else:
            patronymics = [person_patronymic]

        translated = [
            "{} {} {}".format(l, f, p).strip().replace("  ", " ")
            for f in first_names for p in patronymics for l in last_names
        ]

        for n in original:
            if is_cyr(n):
                # TODO: also replace double ж, х, ц, ч, ш with single chars
                for ua_table in ALL_UKRAINIAN:
                    result.add(translit(n, ua_table))

        for n in translated:
            if not is_ukr(n):
                for ru_table in ALL_RUSSIAN:
                    result.add(translit(n, ru_table))

        return result | set(translated)
Пример #5
0
def conv_link(value):
    anchor = translit(value)
    anchor = translit_ru(anchor, 'ru', reversed=True)
    for i in anchor:
        if i == ' ':
            anchor = anchor.replace(i, '-')
        elif i == '\'':
            anchor = anchor.replace(i, '')
    anchor = quote_plus(anchor)
    anchor = anchor.lower()
    return anchor
Пример #6
0
def company_details(request, company_id):
    company = get_object_or_404(Company, pk=company_id)
    context = {"company": company}

    if is_cyr(company.name_uk):
        context["filename"] = translit(company.name_uk.lower().strip().replace(
            " ", "_").replace("\n", ""))
    else:
        context["filename"] = company.pk

    context["feedback_form_override"] = FeedbackForm(
        initial={"person": unicode(company.name)})

    return context
Пример #7
0
def _get_unique_slug(text, Model):
    slug = translit(text, RussianSimple)
    slug = slugify(slug)

    if len(slug) > settings.SLUG_MAX_LENGTH:
        slug = slug[:settings.SLUG_MAX_LENGTH - 1]

    unique_slug = slug
    num = 1

    while Model.objects.filter(slug=unique_slug).exists():
        unique_slug = '{}-{}'.format(slug, num)
        num += 1

    return unique_slug
Пример #8
0
def company_details(request, company_id):
    company = get_object_or_404(Company, pk=company_id)
    context = {
        "company":
        company,
        "articles":
        company.articles.filter(kind="i", publish=True).order_by("-date"),
    }

    if is_cyr(company.name_uk):
        context["filename"] = translit(company.name_uk.lower().strip().replace(
            " ", "_").replace("\n", ""))
    else:
        context["filename"] = company.pk

    context["feedback_form_override"] = FeedbackForm(
        initial={"person": unicode(company.name)})

    return context
Пример #9
0
def person_details(request, person_id):
    person = get_object_or_404(Person, pk=person_id)

    flags_qs = person.flags.select_related("rule").order_by("rule__id")
    context = {
        "person":
        person,
        "query":
        "",
        "all_declarations":
        person.get_declarations(),
        "charts_data":
        person.get_charts_data(),
        "scoring_score":
        sum(x[0] * x[1]
            for x in flags_qs.values_list("rule__weight", "rule__scale")),
        "scoring_flags":
        flags_qs.order_by("-rule__weight", "pk").nocache(),
        "country_connections_titles":
        Person2Country._relationships_explained,
        "articles":
        person.articles.filter(kind="i", publish=True).order_by("-date"),
    }

    full_name = "%s %s %s" % (
        person.last_name_uk,
        person.first_name_uk,
        person.patronymic_uk,
    )

    if is_cyr(full_name):
        context["filename"] = translit(full_name.lower().strip().replace(
            " ", "_").replace("\n", ""))
    else:
        context["filename"] = person.pk

    context["feedback_form_override"] = FeedbackForm(
        initial={"person": unicode(person)})

    return context
Пример #10
0
def person_details(request, person_id):
    person = get_object_or_404(Person, pk=person_id)
    context = {
        "person": person,
        "query": "",
        "all_declarations": person.get_declarations(),
    }

    full_name = "%s %s %s" % (
        person.last_name_uk,
        person.first_name_uk,
        person.patronymic_uk,
    )

    if is_cyr(full_name):
        context["filename"] = translit(full_name.lower().strip().replace(
            " ", "_").replace("\n", ""))
    else:
        context["filename"] = person.pk

    context["feedback_form_override"] = FeedbackForm(
        initial={"person": unicode(person)})

    return context
Пример #11
0
    def transform_company(self, company):
        """
        Applies pre-process, categorization and parsing steps to record from the
        company registry

        :param company: One record from the input file
        :type company: dict
        :returns: Results of processing
        :rtype: Dict
        """

        assert (
            self.preprocessor and
            self.beneficiary_categorizer and self.parser
        )

        base_rec = {
            "Company name": company["name"],
            "Company name EN": translit(company["name"]),
            "Company short name": company["short_name"],
            "Company short name EN": translit(company["short_name"]),
            "Company number": company["edrpou"],
            "Company address": company["location"],
            "Company head": company["head"],
            "Company head EN": translit(company["head"]),
            "Company profile": company["company_profile"],
            "Company status": company["status"],
            "Is beneficial owner": False,
            "BO is absent": False,
            "Has reference": False,
            "Was dereferenced": False,
        }

        founders = self.preprocessor.process_founders(company)

        if not founders:
            yield base_rec

        had_references = False
        true_founders = []

        for founder in founders:
            rec = base_rec.copy()

            rec["Raw founder record"] = founder

            if self.beneficiary_categorizer.classify(founder):
                rec["Is beneficial owner"] = True
                owner = self.parser.parse_founders_record(founder, include_stats=True)
                owner["Name EN"] = list(map(translit, owner["Name"]))

                owner["BO is absent"] = self.beneficiary_categorizer.is_absent(founder)

                if not owner["Name"] and not owner["BO is absent"]:
                    if self.beneficiary_categorizer.is_reference(founder):
                        owner["Has reference"] = True
                        had_references = True

                rec.update(owner)

                yield rec
            else:
                true_founders.append(founder)

        # Second pass: checking if there was BO records that references to founders
        # so we parse founders records too:
        for founder in true_founders:
            rec = base_rec.copy()

            rec["Raw founder record"] = founder

            if had_references:
                rec["Is beneficial owner"] = True
                owner = self.parser.parse_founders_record(founder, include_stats=True)
                owner["Name EN"] = list(map(translit, owner["Name"]))

                owner["Was dereferenced"] = True
                rec.update(owner)

            yield rec
Пример #12
0
def autocomplete_suggestions(name):
    return {title(name), translit(title(name), UkrainianKMU)}
Пример #13
0
def get_all_unique(text):
    return list(set((map(lambda t: translit(text, t), ALL_UKRAINIAN))))
Пример #14
0
def translit_to_en(value):
    return translit(value)
Пример #15
0
    def to_dict(self):
        d = model_to_dict(
            self,
            fields=[
                "id",
                "name_uk",
                "short_name_uk",
                "name_en",
                "short_name_en",
                "state_company",
                "edrpou",
                "wiki",
                "city",
                "street",
                "other_founders",
                "other_recipient",
                "other_owners",
                "other_managers",
                "bank_name",
                "also_known_as",
            ],
        )

        d["related_persons"] = [
            i.to_person_dict()
            for i in self.from_persons.prefetch_related("from_person")
        ]

        d["related_countries"] = [
            i.to_dict()
            for i in self.from_countries.prefetch_related("to_country")
        ]

        d["related_companies"] = [
            i.to_dict()
            for i in self.to_companies.prefetch_related("to_company")
        ] + [
            i.to_dict_reverse()
            for i in self.from_companies.prefetch_related("from_company")
        ]

        d["status"] = self.get_status_display()
        d["status_en"] = translate_into(self.get_status_display())
        d["founded"] = self.founded_human
        d["category"] = self.category
        d["closed"] = self.closed_on_human
        d["last_modified"] = self.last_modified

        suggestions = []
        names = set([
            d["name_uk"],
            d["short_name_uk"],
        ])

        for name in list(names):
            for ua_table in ALL_UKRAINIAN:
                names.add(translit(name, ua_table))

        names.add(d["name_en"])
        names.add(d["short_name_en"])

        for field in names:
            if not field:
                continue

            chunks = list(
                map(lambda x: x.strip("'\",.-“”«»"), field.split(" ")))

            for i in xrange(len(chunks)):
                variant = copy(chunks)
                variant = [variant[i]] + variant[:i] + variant[i + 1:]
                suggestions.append(" ".join(variant))

        if self.edrpou:
            edrpou_chunks = list(
                filter(
                    None,
                    map(
                        unicode.strip,
                        re.split("([a-z]+)", self.edrpou, flags=re.IGNORECASE),
                    ),
                ))

            suggestions += edrpou_chunks
            suggestions.append(self.edrpou.lstrip("0"))

            if self.edrpou.isdigit():
                suggestions.append(self.edrpou.rjust(8, "0"))

            d["code_chunks"] = edrpou_chunks

        d["name_suggest"] = [{"input": x} for x in set(suggestions)]

        d["name_suggest_output"] = d["short_name_uk"] or d["name_uk"]
        d["name_suggest_output_en"] = d["short_name_en"] or d["name_en"]

        d["_id"] = d["id"]

        return d
Пример #16
0
 def _generate_slug(self):
     value = translit(self.title)
     slug_candidate = slugify(value, allow_unicode=True)
     ctime = datetime.datetime.now().ctime()
     self.slug = f'{slug_candidate}-{ctime}'
Пример #17
0
 def _generate_slug(self):
     value = translit(self.name)
     self.slug = slugify(value, allow_unicode=True)
Пример #18
0
def from_cyrillic_to_eng(text: str):
    text = text.replace(' ', '_').lower()
    tmp = translit(text)
    return tmp
Пример #19
0
    def transliterate(self, person_last_name, person_first_name,
                      person_patronymic):
        first_names = []
        last_names = []
        patronymics = []

        original = [(person_last_name, person_first_name, person_patronymic)]

        result = set()

        if (person_first_name.lower() in self.ru_translations and
                is_cyr(person_first_name)):
            first_names = self.ru_translations[person_first_name.lower()]
        else:
            first_names = [person_first_name]
            self.add_for_translation(person_first_name)

        if (person_last_name.lower() in self.ru_translations and
                is_cyr(person_last_name)):
            last_names = self.ru_translations[person_last_name.lower()]
        else:
            last_names = [person_last_name]
            self.add_for_translation(person_last_name)

        if (person_patronymic.lower() in self.ru_translations and
                is_cyr(person_patronymic)):
            patronymics = self.ru_translations[person_patronymic.lower()]
        else:
            patronymics = [person_patronymic]
            self.add_for_translation(person_patronymic)

        translated = [
            (l, f, p)
            for f in first_names
            for p in patronymics
            for l in last_names
        ]

        for n in original:
            name = self.get_name(n)
            if is_cyr(name):
                for ua_table in ALL_UKRAINIAN:
                    result.add(translit(name, ua_table))

                for sc_rex, replacements in self.special_replacements.items():
                    if re.search(sc_rex, name, flags=re.I | re.U):
                        for repl in replacements:
                            optional_n = re.sub(sc_rex, repl, name, flags=re.I | re.U)
                            result.add(translit(title(optional_n), UkrainianKMU))

                for sc, replacements in self.special_cases.items():
                    if sc in n:
                        for repl in replacements:
                            optional_n = self.replace_item(n, sc, repl)
                            result.add(translit(self.get_name(optional_n), UkrainianKMU))

        for n in translated:
            name = self.get_name(n)
            if not is_ukr(name):
                for ru_table in ALL_RUSSIAN:
                    result.add(translit(name, ru_table))

            for sc_rex, replacements in self.special_replacements.items():
                if re.search(sc_rex, name, flags=re.I | re.U):
                    for repl in replacements:
                        optional_n = re.sub(sc_rex, repl, name, flags=re.I | re.U)
                        result.add(translit(title(optional_n), RussianInternationalPassport))

            for sc, replacements in self.special_cases.items():
                if sc in n:
                    for repl in replacements:
                        optional_n = self.replace_item(n, sc, repl)
                        result.add(translit(
                            self.get_name(optional_n),
                            RussianInternationalPassport)
                        )

        return result | set(map(self.get_name, translated))
Пример #20
0
from translitua import translit

a = translit('Тіні забутих предків')
print(a)