def corr_gem_cutting(text, search_result): """ >>> corr_gem_cutting("(бриолетовый восковые опалы)") '(бриолетовые восковые опалы)' >>> corr_gem_cutting("большой шерлы") 'большие шерлы' """ words = search_result.group(1).split() if words[-1] in body_parts: return corr_item_body_parts(text) gender = get_gender(words[-1], {"NOUN", "nomn"}) new_list = [] for word in words[:-1]: if word in make_adjective: adj = make_adjective[word] word = inflect_adjective(adj, gender) elif is_adjective(word): word = inflect_adjective(word, gender) new_list.append(word) new_list.append(words[-1]) return text.replace(search_result.group(0), " ".join(new_list))
def corr_settlement(_, search_result): """ >>> corr_settlement(" человеческий крепость Belrokalle") 'Человеческая крепость Belrokalle' >>> corr_settlement(" эльфийский лесное убежище Etathuatha") 'Эльфийское лесное убежище Etathuatha' >>> corr_settlement(" дварфийский горный город КилрудОстач") 'Дварфийский горный город Килрудостач' >>> corr_settlement(" лесное убежище Cinilidisa") 'Лесное убежище Cinilidisa' """ adjective = search_result.group(1).strip() settlement = search_result.group(2) name = search_result.group(3) if len(adjective) == 0: return "{} {}".format(settlement.capitalize(), name.capitalize()) if adjective in {"Покинуть", "Разрушить"}: return gender = get_main_word_gender(settlement) if " " not in adjective: adjective_2 = inflect_adjective(adjective, gender) else: adjective_2 = " ".join(inflect_adjective(word, gender) for word in adjective.split(" ")) if adjective_2 is None: adjective_2 = adjective return "{} {} {}".format(adjective_2.capitalize(), settlement, name.capitalize())
def corr_adjective_relief(text, search_result): """ >>> corr_adjective_relief("Заснеженный Густой овсяница") 'Заснеженная густая овсяница' >>> corr_adjective_relief("Густой мюленбергия") 'Густая мюленбергия' >>> corr_adjective_relief("Густой морошка") 'Густая морошка' >>> corr_adjective_relief("Заснеженный Густой куропаточья трава") 'Заснеженная густая куропаточья трава' """ adjective = search_result.group(1) obj = search_result.group(2) if " " in obj: words = obj.split(" ") if is_adjective(words[0]): gender = get_gender(words[-1]) new_word = inflect_adjective(words[0], gender, "nomn") text = text.replace(words[0], new_word) new_word = inflect_adjective(adjective, gender, "nomn") text = text.replace(adjective, new_word) else: gender = get_gender(obj) new_word = inflect_adjective(adjective, gender, "nomn") if new_word: text = "{} {}".format(new_word, obj) return text.capitalize()
def corr_jewelers_shop(_, search_result): """ >>> corr_jewelers_shop("Огранить из необработанного адамантина") 'Огранить необработанный адамантин' >>> corr_jewelers_shop("Инкрустировать Предметы обстановки с из необработанного адамантина") 'Инкрустировать предметы обстановки необработанным адамантином' >>> corr_jewelers_shop("Огранить из фарфора") 'Огранить фарфор' """ first_part = search_result.group(1) words = search_result.group(2).split() if first_part == "Огранить": # accusative case tags = None if words[0] == "из": words = words[1:] tags = {"gent"} item = words[-1] gender = get_gender(item, known_tags=tags) words = [inflect_adjective(word, gender, "accs", animated=False) for word in words[:-1]] parse = list(filter(lambda x: {gender, "inan"} in x.tag, custom_parse(item))) if item == "адамантина": item = "адамантин" else: item = parse[0].inflect({"accs"}).word words.append(item) else: # instrumental/ablative case ('incrust with smth') words = [custom_parse(word)[0].inflect({"ablt"}).word for word in words if word != "из"] if first_part.endswith(" с"): first_part = first_part[:-2] text = first_part + " " + " ".join(words) return text.capitalize()
def corr_craft_glass(text, search_result): # TODO: Combine into single crafting-related function """ >>> corr_craft_glass("Делать грубый зелёное стекло") 'Варить грубое зелёное стекло' >>> corr_craft_glass("Делать гигантский хрусталь лезвие топора") 'Делать гигантское лезвие топора из хрусталя' """ material = search_result.group(3) material_gender = get_gender(material) words = search_result.group(2).split() product = search_result.group(4).split() verb = search_result.group(1) if not product: verb = "Варить" adjectives = (inflect_adjective(adj, material_gender, "accs", animated=False) for adj in words) result = "{} {} {}".format(verb, " ".join(adjectives), material) else: index = next( (i for i, item in enumerate(words) if item in {"грубое", "зелёное", "прозрачное", "грубый"}), len(words) ) product_adjectives = words[:index] if any_in_tag({"NOUN", "nomn"}, custom_parse(product[0])): product_gender = get_gender(product[0]) product[0] = inflect_noun(product[0], case="accs") else: product_gender = get_gender(product[-1]) product_adjectives += product[:-1] product = [inflect_noun(product[-1], case="accs")] product_adjectives = [ inflect_adjective(adj, product_gender, case="accs", animated=False) for adj in product_adjectives ]
def corr_relief(_, search_result): """ >>> corr_relief("Мёртвый клён деревце") 'Мёртвое деревце (клён)' >>> corr_relief("Глинистый суглинок Стена") 'Стена из глинистого суглинка' >>> corr_relief("кремень подъем") 'Подъем из кремня' """ group1 = search_result.group(1) obj = search_result.group(2) if obj == "деревце": if group1.split(" ")[0] == "Мёртвый": text = "Мёртвое деревце ({})".format("".join(search_result.group(0).split(" ")[1:-1])) else: text = "Деревце ({})".format(group1) return text.capitalize() if " " in group1: words = group1.split(" ") first_words = [] gender = get_main_word_gender(obj) for word in words: if word in {"Заснеженный", "Неотесанный", "Влажный"}: if gender is not None: new_word = inflect_adjective(word, gender) if not new_word: new_word = word else: new_word = word first_words.append(new_word) else: break words = words[len(first_words) :] if words[0] == "из": words = words[1:] else: words = to_genitive_case_list(words) if not first_words: text = "{} из {}".format(obj, " ".join(words)) else: text = "{} {} из {}".format(" ".join(first_words), obj, " ".join(words)) else: material = group1 text = "{} из {}".format(obj, to_genitive_case(material)) if "иза" in text: text = text.replace(" иза", "") return text.capitalize()
def corr_craft_general(text, search_result): """ >>> corr_craft_general("Изготовить камень дверь") 'Изготовить каменную дверь' >>> corr_craft_general("Делать деревянный ловушка для животных") 'Делать деревянную ловушку для животных' >>> corr_craft_general("Украшать кость") 'Украшать кость' >>> corr_craft_general("Делать деревянный изделия") 'Делать деревянные изделия' """ verb = search_result.group(1) words = search_result.group(2).split() product = None if len(words) > 1: for i, word in enumerate(words[1:], 1): if any_in_tag({"NOUN", "nomn"}, custom_parse(word)) and word not in make_adjective: product = " ".join(words[i:]) words = words[:i] break else: product = words[0] words = [] product_gender = get_main_word_gender(product) if " " not in product: orig_form = {"plur" if product_gender == "plur" else "sing", "inan"} product = inflect_noun(product, "accs", orig_form=orig_form) assert product is not None else: product = inflect_collocation(product, {"accs"}) if words: if len(words) == 1 and words[0] not in make_adjective and not is_adjective(words[0]): material = inflect_noun(words[0], "gent", orig_form={"nomn", "inan"}) # рог -> (из) рога assert material is not None result = "{} {} из {}".format(verb, product, material) else: adjectives = [ make_adjective[word] if word in make_adjective else word if is_adjective(word) else None for word in words ] assert all(adj is not None for adj in adjectives) adjectives = [inflect_adjective(adj, product_gender, "accs", animated=False) for adj in adjectives] result = "{} {} {}".format(verb, " ".join(adjectives), product) else: result = "{} {}".format(verb, product) return text.replace(search_result.group(0), result).capitalize()
def corr_wooden_logs(text, search_result): """ >>> corr_wooden_logs('древесина дуба брёвна') 'дубовые брёвна' """ of_wood = "из " + search_result.group(2) if of_wood in make_adjective: adj = inflect_adjective(make_adjective[of_wood], "plur") text = text.replace(search_result.group(0), adj + " " + search_result.group(3)) # берёзовые брёвна else: text = text.replace( search_result.group(0), search_result.group(1) + " " + search_result.group(2) ) # древесина акации return text
def corr_clothiers_shop(_, search_result): """ >>> corr_clothiers_shop("Делать ткань роба") 'Шить робу из ткани' >>> corr_clothiers_shop("Делать шёлк роба") 'Шить шёлковую робу' >>> corr_clothiers_shop("Изготовить ткань мешок") 'Шить мешок из ткани' >>> corr_clothiers_shop("Вышивать кожа изображение") 'Вышивать изображение на коже' >>> corr_clothiers_shop("Делать пряжа рубаха") 'Вязать рубаху из пряжи' >>> corr_clothiers_shop("Делать ткань верёвка") 'Вить верёвку из ткани' """ verb = search_result.group(1) material = search_result.group(2) product = search_result.group(3).strip() if not product: return None # Leave as is eg. 'Ткать шёлк' elif verb == "Вышивать": # Sew if material == "пряжа": # вязать <product> из пряжи verb, preposition, material = "Вязать", "из", "пряжи" else: # вышивать <product> на <material> preposition = "на" material = inflect_noun(material, case="loct", orig_form={"nomn"}) return "{} {} {} {}".format(verb, product, preposition, material) else: if product in {"щит", "баклер"}: _, of_material = cloth_subst[material] # Don't change the verb, leave 'Делать'/'Изготовить' else: verb, of_material = cloth_subst[material] if product == "верёвка": verb = "Вить" product_accus = inflect_noun(product, case="accs", orig_form={"nomn"}) if material in make_adjective: # "шёлк" -> "шёлковый" gender = get_gender(product, {"nomn"}) material_adj = inflect_adjective(make_adjective[material], gender, "accs", animated=False) return "{} {} {}".format(verb, material_adj, product_accus) # {Шить} {шёлковую} {робу} else: return "{} {} {}".format(verb, product_accus, of_material) # {Шить} {робу} {из ткани}
def corr_weapon_trap_parts(text, search_result): """ >>> corr_weapon_trap_parts('гигантский из меди лезвия топоров') 'гигантские медные лезвия топоров' >>> corr_weapon_trap_parts('большой зазубренный из берёзы диски') 'большие зазубренные берёзовые диски' """ adj = search_result.group(1) words = search_result.group(2).split() if " ".join(words[:2]) in make_adjective: material = " ".join(words[:2]) obj = " ".join(words[2:]) gender = get_main_word_gender(obj) new_adj = inflect_as_adjective(adj, gender) new_word_2 = inflect_adjective(make_adjective[material], gender) text = text.replace(search_result.group(0), "{} {} {}".format(new_adj, new_word_2, obj)) else: material = " ".join(words[:3]) obj = " ".join(words[3:]) gender = get_main_word_gender(obj) assert gender is not None new_adj = inflect_as_adjective(adj, gender) text = text.replace(search_result.group(0), "{} {} {}".format(new_adj, obj, material)) return text
def corr_container(text, _): """ >>> corr_container('(дварфийское пиво бочка (из ольхи))') '(Бочка дварфийского пива (ольховая))' >>> corr_container("(дварфийское вино бочка (из клёна) <#8>)") '(Бочка дварфийского вина (кленовая) <#8>)' >>> corr_container("(Семя бочка (из лумбанга) <#10>)") '(Бочка семян (лумбанговая) <#10>)' """ search_result = re_container.search(text) initial_string = search_result.group(0) containment = search_result.group(1) if containment in replace_containment: containment = replace_containment[containment] if containment.endswith("кровь"): words = containment.split() if words[0] in possessive_adjectives: words[0] = possessive_adjectives[words[0]] words = to_genitive_case_list(words) else: words = [to_genitive_case_single_noun(words[-1])] + list(to_genitive_case_list(words[:-1])) containment = " ".join(words) elif containment.startswith("из "): containment = containment[3:] # Words after 'из' are already in genitive case elif containment in {"слитков/блоков", "специй"}: pass # Already in genitive case elif containment.startswith("семена"): words = containment.split() words[0] = to_genitive_case(words[0]) containment = " ".join(words) else: containment = to_genitive_case(containment) container = search_result.group(2) of_material = search_result.group(3) if not of_material: replacement_string = container + " " + containment elif ( " " not in of_material and is_adjective(of_material) or of_material in make_adjective or of_material[3:] in make_adjective ): if " " not in of_material and is_adjective(of_material): adjective = of_material elif of_material in make_adjective: adjective = make_adjective[of_material] elif of_material[3:] in make_adjective: adjective = make_adjective[of_material[3:]] else: adjective = None gender = get_gender(container, {"nomn"}) adjective = inflect_adjective(adjective, gender) replacement_string = "{} {} ({})".format(container, containment, adjective) else: words = of_material.split() material = None if of_material.startswith("из ") or len(of_material) <= 2: material = of_material elif ( len(words) >= 2 and words[-2] == "из" and (words[-1] in materials or any(mat.startswith(words[-1]) for mat in materials)) ): # Try to fix truncated materail names, eg. '(ямный краситель мешок (гигантский пещерный паук из шёл' if words[-1] not in materials: # Fix partial material name eg. 'шерст', 'шёлк' candidates = [mat for mat in materials if mat.startswith(words[-1])] if len(candidates) == 1: words[-1] = candidates[0] else: material = of_material # Partial name is not recognized (too short) if not material: material = " ".join(words[-2:] + list(to_genitive_case_list(words[:-2]))) else: gen_case = list(to_genitive_case_list(of_material.split())) if None not in gen_case: material = "из " + " ".join(gen_case) else: material = of_material replacement_string = "{} {} ({}".format(container, containment, material) if initial_string[-1] == ")": replacement_string += ")" text = text.replace(initial_string, replacement_string.capitalize()) return text
def corr_forge(_, search_result): """ >>> corr_forge("Ковать из меди болты") 'Ковать медные болты' >>> corr_forge("Кузница из железа Наконечники стрел баллисты") 'Ковать железные наконечники стрел баллисты' >>> corr_forge("Делать из адамантина Колчан") 'Делать адамантиновый колчан' """ verb = search_result.group(1) words = search_result.group(2).split() assert len(words) >= 3 assert words[0] == "из" # Second word ia adjective in gent case second_is_adjf_in_gent = any_in_tag({"ADJF", "gent"}, custom_parse(words[1])) # Third word is noun in gent case third_is_noun_in_gent = any_in_tag({"NOUN", "gent"}, custom_parse(words[2])) if second_is_adjf_in_gent and third_is_noun_in_gent: of_material = words[:3] obj = words[3:] else: # Second word is noun in gent case assert any_in_tag({"NOUN", "gent"}, custom_parse(words[1])) of_material = words[:2] obj = words[2:] of_material = " ".join(of_material) noun_index = None parse = None gender = None if len(obj) == 1: noun_index = 0 parse = custom_parse(obj[noun_index]) noun = filter_noun(parse) gender = get_gender(obj[noun_index], known_tags={"nomn"}) if not any_in_tag({"accs"}, noun): obj[0] = noun[0].inflect({"accs"}).word else: for i, word in enumerate(obj): parse = custom_parse(word) noun = filter_noun(parse) if noun: noun_index = i gender = get_gender(obj[noun_index]) obj[i] = noun[0].inflect({"accs"}).word break # Words after the 'item' must be left in genitive case elif not any_in_tag("accs", parse): obj[i] = parse[0].inflect({"accs"}).word assert parse is not None if not any_in_tag("accs", parse): obj[noun_index] = parse[0].inflect({"accs"}).word if verb == "Кузница": verb = "Ковать" if of_material in make_adjective: assert gender is not None material = inflect_adjective(make_adjective[of_material], gender, "accs", animated=False) text = verb + " " + material + " " + " ".join(obj) else: text = verb + " " + " ".join(obj) + " " + of_material return text.capitalize()
replacement_string = " ".join(adjs) + " " + material # elif (words[2] not in corr_item_general_except and len(words) > 3 and elif ( len(words) > 3 and any_in_tag({"gent"}, custom_parse(words[1])) and any_in_tag({"NOUN", "gent"}, custom_parse(words[2])) # The second word is in genitive ): # The third word is a noun in genitive # Complex case, eg. "из висмутовой бронзы" of_material = " ".join(words[:3]) words = words[3:] if len(words) == 1: first_part = words[0] else: obj = words[-1] gender = get_gender(obj, "NOUN") adjs = (inflect_adjective(adj, gender) or adj for adj in words[:-1]) first_part = "{} {}".format(" ".join(adjs), obj) replacement_string = first_part + " " + of_material elif any_in_tag({"NOUN", "gent"}, custom_parse(words[1])) and words[1] != "древесины": # Simple case, eg. "из бронзы" of_material = " ".join(words[:2]) words = words[2:] item = words[-1] for word in words: if any_in_tag({"NOUN", "nomn"}, custom_parse(word)): item = word break if of_material in make_adjective: gender = get_gender(item, {"nomn"})