Exemplo n.º 1
0
def corr_gem_cutting(text, search_result):
    """
    >>> corr_gem_cutting("(бриолетовый восковые опалы)")
    '(бриолетовые восковые опалы)'
    >>> corr_gem_cutting("большой шерлы")
    'большие шерлы'
    """
    words = search_result.group(1).split()
    if words[-1] in body_parts:
        return corr_item_body_parts(text)

    gender = get_gender(words[-1], {"NOUN", "nomn"})

    new_list = []
    for word in words[:-1]:
        if word in make_adjective:
            adj = make_adjective[word]
            word = inflect_adjective(adj, gender)
        elif is_adjective(word):
            word = inflect_adjective(word, gender)
        new_list.append(word)

    new_list.append(words[-1])

    return text.replace(search_result.group(0), " ".join(new_list))
Exemplo n.º 2
0
def corr_settlement(_, search_result):
    """
    >>> corr_settlement(" человеческий крепость Belrokalle")
    'Человеческая крепость Belrokalle'
    >>> corr_settlement(" эльфийский лесное убежище Etathuatha")
    'Эльфийское лесное убежище Etathuatha'
    >>> corr_settlement(" дварфийский горный город КилрудОстач")
    'Дварфийский горный город Килрудостач'
    >>> corr_settlement(" лесное убежище Cinilidisa")
    'Лесное убежище Cinilidisa'
    """
    adjective = search_result.group(1).strip()
    settlement = search_result.group(2)
    name = search_result.group(3)

    if len(adjective) == 0:
        return "{} {}".format(settlement.capitalize(), name.capitalize())

    if adjective in {"Покинуть", "Разрушить"}:
        return

    gender = get_main_word_gender(settlement)

    if " " not in adjective:
        adjective_2 = inflect_adjective(adjective, gender)
    else:
        adjective_2 = " ".join(inflect_adjective(word, gender) for word in adjective.split(" "))

    if adjective_2 is None:
        adjective_2 = adjective

    return "{} {} {}".format(adjective_2.capitalize(), settlement, name.capitalize())
Exemplo n.º 3
0
def corr_adjective_relief(text, search_result):
    """
    >>> corr_adjective_relief("Заснеженный Густой овсяница")
    'Заснеженная густая овсяница'
    >>> corr_adjective_relief("Густой мюленбергия")
    'Густая мюленбергия'
    >>> corr_adjective_relief("Густой морошка")
    'Густая морошка'
    >>> corr_adjective_relief("Заснеженный Густой куропаточья трава")
    'Заснеженная густая куропаточья трава'
    """
    adjective = search_result.group(1)
    obj = search_result.group(2)

    if " " in obj:
        words = obj.split(" ")
        if is_adjective(words[0]):
            gender = get_gender(words[-1])
            new_word = inflect_adjective(words[0], gender, "nomn")
            text = text.replace(words[0], new_word)
            new_word = inflect_adjective(adjective, gender, "nomn")
            text = text.replace(adjective, new_word)
    else:
        gender = get_gender(obj)
        new_word = inflect_adjective(adjective, gender, "nomn")
        if new_word:
            text = "{} {}".format(new_word, obj)

    return text.capitalize()
Exemplo n.º 4
0
def corr_jewelers_shop(_, search_result):
    """
    >>> corr_jewelers_shop("Огранить из необработанного адамантина")
    'Огранить необработанный адамантин'
    >>> corr_jewelers_shop("Инкрустировать Предметы обстановки с из необработанного адамантина")
    'Инкрустировать предметы обстановки необработанным адамантином'
    >>> corr_jewelers_shop("Огранить из фарфора")
    'Огранить фарфор'
    """

    first_part = search_result.group(1)
    words = search_result.group(2).split()
    if first_part == "Огранить":
        # accusative case
        tags = None
        if words[0] == "из":
            words = words[1:]
            tags = {"gent"}
        item = words[-1]
        gender = get_gender(item, known_tags=tags)
        words = [inflect_adjective(word, gender, "accs", animated=False) for word in words[:-1]]
        parse = list(filter(lambda x: {gender, "inan"} in x.tag, custom_parse(item)))
        if item == "адамантина":
            item = "адамантин"
        else:
            item = parse[0].inflect({"accs"}).word
        words.append(item)
    else:
        # instrumental/ablative case ('incrust with smth')
        words = [custom_parse(word)[0].inflect({"ablt"}).word for word in words if word != "из"]

    if first_part.endswith(" с"):
        first_part = first_part[:-2]
    text = first_part + " " + " ".join(words)
    return text.capitalize()
Exemplo n.º 5
0
def corr_craft_glass(text, search_result):  # TODO: Combine into single crafting-related function
    """
    >>> corr_craft_glass("Делать грубый зелёное стекло")
    'Варить грубое зелёное стекло'
    >>> corr_craft_glass("Делать гигантский хрусталь лезвие топора")
    'Делать гигантское лезвие топора из хрусталя'
    """
    material = search_result.group(3)
    material_gender = get_gender(material)
    words = search_result.group(2).split()
    product = search_result.group(4).split()
    verb = search_result.group(1)
    if not product:
        verb = "Варить"
        adjectives = (inflect_adjective(adj, material_gender, "accs", animated=False) for adj in words)
        result = "{} {} {}".format(verb, " ".join(adjectives), material)
    else:
        index = next(
            (i for i, item in enumerate(words) if item in {"грубое", "зелёное", "прозрачное", "грубый"}), len(words)
        )
        product_adjectives = words[:index]
        if any_in_tag({"NOUN", "nomn"}, custom_parse(product[0])):
            product_gender = get_gender(product[0])
            product[0] = inflect_noun(product[0], case="accs")
        else:
            product_gender = get_gender(product[-1])
            product_adjectives += product[:-1]
            product = [inflect_noun(product[-1], case="accs")]

        product_adjectives = [
            inflect_adjective(adj, product_gender, case="accs", animated=False) for adj in product_adjectives
        ]
Exemplo n.º 6
0
def corr_relief(_, search_result):
    """
    >>> corr_relief("Мёртвый клён деревце")
    'Мёртвое деревце (клён)'
    >>> corr_relief("Глинистый суглинок Стена")
    'Стена из глинистого суглинка'
    >>> corr_relief("кремень подъем")
    'Подъем из кремня'
    """

    group1 = search_result.group(1)
    obj = search_result.group(2)
    if obj == "деревце":
        if group1.split(" ")[0] == "Мёртвый":
            text = "Мёртвое деревце ({})".format("".join(search_result.group(0).split(" ")[1:-1]))
        else:
            text = "Деревце ({})".format(group1)
        return text.capitalize()

    if " " in group1:
        words = group1.split(" ")
        first_words = []
        gender = get_main_word_gender(obj)

        for word in words:
            if word in {"Заснеженный", "Неотесанный", "Влажный"}:
                if gender is not None:
                    new_word = inflect_adjective(word, gender)
                    if not new_word:
                        new_word = word
                else:
                    new_word = word
                first_words.append(new_word)
            else:
                break

        words = words[len(first_words) :]

        if words[0] == "из":
            words = words[1:]
        else:
            words = to_genitive_case_list(words)

        if not first_words:
            text = "{} из {}".format(obj, " ".join(words))
        else:
            text = "{} {} из {}".format(" ".join(first_words), obj, " ".join(words))
    else:
        material = group1
        text = "{} из {}".format(obj, to_genitive_case(material))

    if "иза" in text:
        text = text.replace(" иза", "")
    return text.capitalize()
Exemplo n.º 7
0
def corr_craft_general(text, search_result):
    """
    >>> corr_craft_general("Изготовить камень дверь")
    'Изготовить каменную дверь'
    >>> corr_craft_general("Делать деревянный ловушка для животных")
    'Делать деревянную ловушку для животных'
    >>> corr_craft_general("Украшать кость")
    'Украшать кость'
    >>> corr_craft_general("Делать деревянный изделия")
    'Делать деревянные изделия'
    """
    verb = search_result.group(1)
    words = search_result.group(2).split()
    product = None
    if len(words) > 1:
        for i, word in enumerate(words[1:], 1):
            if any_in_tag({"NOUN", "nomn"}, custom_parse(word)) and word not in make_adjective:
                product = " ".join(words[i:])
                words = words[:i]
                break
    else:
        product = words[0]
        words = []

    product_gender = get_main_word_gender(product)

    if " " not in product:
        orig_form = {"plur" if product_gender == "plur" else "sing", "inan"}
        product = inflect_noun(product, "accs", orig_form=orig_form)
        assert product is not None
    else:
        product = inflect_collocation(product, {"accs"})

    if words:
        if len(words) == 1 and words[0] not in make_adjective and not is_adjective(words[0]):
            material = inflect_noun(words[0], "gent", orig_form={"nomn", "inan"})  # рог -> (из) рога
            assert material is not None
            result = "{} {} из {}".format(verb, product, material)
        else:
            adjectives = [
                make_adjective[word] if word in make_adjective else word if is_adjective(word) else None
                for word in words
            ]
            assert all(adj is not None for adj in adjectives)
            adjectives = [inflect_adjective(adj, product_gender, "accs", animated=False) for adj in adjectives]
            result = "{} {} {}".format(verb, " ".join(adjectives), product)
    else:
        result = "{} {}".format(verb, product)

    return text.replace(search_result.group(0), result).capitalize()
Exemplo n.º 8
0
def corr_wooden_logs(text, search_result):
    """
    >>> corr_wooden_logs('древесина дуба брёвна')
    'дубовые брёвна'
    """
    of_wood = "из " + search_result.group(2)
    if of_wood in make_adjective:
        adj = inflect_adjective(make_adjective[of_wood], "plur")
        text = text.replace(search_result.group(0), adj + " " + search_result.group(3))  # берёзовые брёвна
    else:
        text = text.replace(
            search_result.group(0), search_result.group(1) + " " + search_result.group(2)
        )  # древесина акации
    return text
Exemplo n.º 9
0
def corr_clothiers_shop(_, search_result):
    """
    >>> corr_clothiers_shop("Делать ткань роба")
    'Шить робу из ткани'
    >>> corr_clothiers_shop("Делать шёлк роба")
    'Шить шёлковую робу'
    >>> corr_clothiers_shop("Изготовить ткань мешок")
    'Шить мешок из ткани'
    >>> corr_clothiers_shop("Вышивать кожа изображение")
    'Вышивать изображение на коже'
    >>> corr_clothiers_shop("Делать пряжа рубаха")
    'Вязать рубаху из пряжи'
    >>> corr_clothiers_shop("Делать ткань верёвка")
    'Вить верёвку из ткани'
    """
    verb = search_result.group(1)
    material = search_result.group(2)
    product = search_result.group(3).strip()

    if not product:
        return None  # Leave as is eg. 'Ткать шёлк'
    elif verb == "Вышивать":  # Sew
        if material == "пряжа":
            # вязать <product> из пряжи
            verb, preposition, material = "Вязать", "из", "пряжи"
        else:
            # вышивать <product> на <material>
            preposition = "на"
            material = inflect_noun(material, case="loct", orig_form={"nomn"})

        return "{} {} {} {}".format(verb, product, preposition, material)
    else:
        if product in {"щит", "баклер"}:
            _, of_material = cloth_subst[material]  # Don't change the verb, leave 'Делать'/'Изготовить'
        else:
            verb, of_material = cloth_subst[material]

        if product == "верёвка":
            verb = "Вить"

        product_accus = inflect_noun(product, case="accs", orig_form={"nomn"})

        if material in make_adjective:  # "шёлк" -> "шёлковый"
            gender = get_gender(product, {"nomn"})
            material_adj = inflect_adjective(make_adjective[material], gender, "accs", animated=False)
            return "{} {} {}".format(verb, material_adj, product_accus)  # {Шить} {шёлковую} {робу}
        else:
            return "{} {} {}".format(verb, product_accus, of_material)  # {Шить} {робу} {из ткани}
Exemplo n.º 10
0
def corr_weapon_trap_parts(text, search_result):
    """
    >>> corr_weapon_trap_parts('гигантский из меди лезвия топоров')
    'гигантские медные лезвия топоров'
    >>> corr_weapon_trap_parts('большой зазубренный из берёзы диски')
    'большие зазубренные берёзовые диски'
    """
    adj = search_result.group(1)
    words = search_result.group(2).split()
    if " ".join(words[:2]) in make_adjective:
        material = " ".join(words[:2])
        obj = " ".join(words[2:])
        gender = get_main_word_gender(obj)
        new_adj = inflect_as_adjective(adj, gender)
        new_word_2 = inflect_adjective(make_adjective[material], gender)
        text = text.replace(search_result.group(0), "{} {} {}".format(new_adj, new_word_2, obj))
    else:
        material = " ".join(words[:3])
        obj = " ".join(words[3:])
        gender = get_main_word_gender(obj)
        assert gender is not None
        new_adj = inflect_as_adjective(adj, gender)
        text = text.replace(search_result.group(0), "{} {} {}".format(new_adj, obj, material))
    return text
Exemplo n.º 11
0
def corr_container(text, _):
    """
    >>> corr_container('(дварфийское пиво бочка (из ольхи))')
    '(Бочка дварфийского пива (ольховая))'
    >>> corr_container("(дварфийское вино бочка (из клёна) <#8>)")
    '(Бочка дварфийского вина (кленовая) <#8>)'
    >>> corr_container("(Семя бочка (из лумбанга) <#10>)")
    '(Бочка семян (лумбанговая) <#10>)'
    """
    search_result = re_container.search(text)
    initial_string = search_result.group(0)
    containment = search_result.group(1)
    if containment in replace_containment:
        containment = replace_containment[containment]
    if containment.endswith("кровь"):
        words = containment.split()
        if words[0] in possessive_adjectives:
            words[0] = possessive_adjectives[words[0]]
            words = to_genitive_case_list(words)
        else:
            words = [to_genitive_case_single_noun(words[-1])] + list(to_genitive_case_list(words[:-1]))
        containment = " ".join(words)
    elif containment.startswith("из "):
        containment = containment[3:]  # Words after 'из' are already in genitive case
    elif containment in {"слитков/блоков", "специй"}:
        pass  # Already in genitive case
    elif containment.startswith("семена"):
        words = containment.split()
        words[0] = to_genitive_case(words[0])
        containment = " ".join(words)
    else:
        containment = to_genitive_case(containment)
    container = search_result.group(2)
    of_material = search_result.group(3)
    if not of_material:
        replacement_string = container + " " + containment
    elif (
        " " not in of_material
        and is_adjective(of_material)
        or of_material in make_adjective
        or of_material[3:] in make_adjective
    ):
        if " " not in of_material and is_adjective(of_material):
            adjective = of_material
        elif of_material in make_adjective:
            adjective = make_adjective[of_material]
        elif of_material[3:] in make_adjective:
            adjective = make_adjective[of_material[3:]]
        else:
            adjective = None
        gender = get_gender(container, {"nomn"})
        adjective = inflect_adjective(adjective, gender)
        replacement_string = "{} {} ({})".format(container, containment, adjective)
    else:
        words = of_material.split()
        material = None
        if of_material.startswith("из ") or len(of_material) <= 2:
            material = of_material
        elif (
            len(words) >= 2
            and words[-2] == "из"
            and (words[-1] in materials or any(mat.startswith(words[-1]) for mat in materials))
        ):
            # Try to fix truncated materail names, eg. '(ямный краситель мешок (гигантский пещерный паук из шёл'
            if words[-1] not in materials:  # Fix partial material name eg. 'шерст', 'шёлк'
                candidates = [mat for mat in materials if mat.startswith(words[-1])]
                if len(candidates) == 1:
                    words[-1] = candidates[0]
                else:
                    material = of_material  # Partial name is not recognized (too short)

            if not material:
                material = " ".join(words[-2:] + list(to_genitive_case_list(words[:-2])))
        else:
            gen_case = list(to_genitive_case_list(of_material.split()))
            if None not in gen_case:
                material = "из " + " ".join(gen_case)
            else:
                material = of_material
        replacement_string = "{} {} ({}".format(container, containment, material)
        if initial_string[-1] == ")":
            replacement_string += ")"
    text = text.replace(initial_string, replacement_string.capitalize())
    return text
Exemplo n.º 12
0
def corr_forge(_, search_result):
    """
    >>> corr_forge("Ковать из меди болты")
    'Ковать медные болты'
    >>> corr_forge("Кузница из железа Наконечники стрел баллисты")
    'Ковать железные наконечники стрел баллисты'
    >>> corr_forge("Делать из адамантина Колчан")
    'Делать адамантиновый колчан'
    """
    verb = search_result.group(1)
    words = search_result.group(2).split()
    assert len(words) >= 3

    assert words[0] == "из"
    # Second word ia adjective in gent case
    second_is_adjf_in_gent = any_in_tag({"ADJF", "gent"}, custom_parse(words[1]))
    # Third word is noun in gent case
    third_is_noun_in_gent = any_in_tag({"NOUN", "gent"}, custom_parse(words[2]))
    if second_is_adjf_in_gent and third_is_noun_in_gent:
        of_material = words[:3]
        obj = words[3:]
    else:
        # Second word is noun in gent case
        assert any_in_tag({"NOUN", "gent"}, custom_parse(words[1]))
        of_material = words[:2]
        obj = words[2:]

    of_material = " ".join(of_material)
    noun_index = None
    parse = None
    gender = None

    if len(obj) == 1:
        noun_index = 0
        parse = custom_parse(obj[noun_index])
        noun = filter_noun(parse)
        gender = get_gender(obj[noun_index], known_tags={"nomn"})
        if not any_in_tag({"accs"}, noun):
            obj[0] = noun[0].inflect({"accs"}).word
    else:
        for i, word in enumerate(obj):
            parse = custom_parse(word)
            noun = filter_noun(parse)
            if noun:
                noun_index = i
                gender = get_gender(obj[noun_index])
                obj[i] = noun[0].inflect({"accs"}).word
                break  # Words after the 'item' must be left in genitive case
            elif not any_in_tag("accs", parse):
                obj[i] = parse[0].inflect({"accs"}).word

    assert parse is not None
    if not any_in_tag("accs", parse):
        obj[noun_index] = parse[0].inflect({"accs"}).word

    if verb == "Кузница":
        verb = "Ковать"

    if of_material in make_adjective:
        assert gender is not None
        material = inflect_adjective(make_adjective[of_material], gender, "accs", animated=False)
        text = verb + " " + material + " " + " ".join(obj)
    else:
        text = verb + " " + " ".join(obj) + " " + of_material

    return text.capitalize()
Exemplo n.º 13
0
        replacement_string = " ".join(adjs) + " " + material
    # elif (words[2] not in corr_item_general_except and len(words) > 3 and
    elif (
        len(words) > 3
        and any_in_tag({"gent"}, custom_parse(words[1]))
        and any_in_tag({"NOUN", "gent"}, custom_parse(words[2]))  # The second word is in genitive
    ):  # The third word is a noun in genitive
        # Complex case, eg. "из висмутовой бронзы"
        of_material = " ".join(words[:3])
        words = words[3:]
        if len(words) == 1:
            first_part = words[0]
        else:
            obj = words[-1]
            gender = get_gender(obj, "NOUN")
            adjs = (inflect_adjective(adj, gender) or adj for adj in words[:-1])
            first_part = "{} {}".format(" ".join(adjs), obj)
        replacement_string = first_part + " " + of_material
    elif any_in_tag({"NOUN", "gent"}, custom_parse(words[1])) and words[1] != "древесины":
        # Simple case, eg. "из бронзы"
        of_material = " ".join(words[:2])
        words = words[2:]
        item = words[-1]

        for word in words:
            if any_in_tag({"NOUN", "nomn"}, custom_parse(word)):
                item = word
                break

        if of_material in make_adjective:
            gender = get_gender(item, {"nomn"})