def _remove_unsupported(aot_tag): pos, info = aot.split_tag(aot_tag) info.difference_update( set(['од', 'но', '2', 'имя', 'фам', 'лок', 'кач', 'разг'])) # if pos == 'ДЕЕПРИЧАСТИЕ': info.discard('дст') if pos == 'С': if 'аббр' in info: info.difference_update(set(aot.CASES.keys())) return aot.join_tag(pos, info)
def to_aot(dialog_tag): pos, info = aot.split_tag(dialog_tag) new_form = (GRAMINFO_MAP_INV[tag] for tag in info if tag in GRAMINFO_MAP_INV) new_pos = POS_INV[pos] if pos == "V": if "inf" in info: new_pos = "ИНФИНИТИВ" elif "partcp" in info: new_pos = "ПРИЧАСТИЕ" elif "ger" in info: new_pos = "ДЕЕПРИЧАСТИЕ" return ",".join(itertools.chain([new_pos], new_form))
def to_aot(dialog_tag, word=None): pos, info = aot.split_tag(dialog_tag) new_form = (GRAMINFO_MAP_INV[tag] for tag in info if tag in GRAMINFO_MAP_INV) new_pos = POS_INV[pos] if pos == 'V': if 'inf' in info: new_pos = 'ИНФИНИТИВ' elif 'partcp' in info: new_pos = 'ПРИЧАСТИЕ' elif 'ger' in info: new_pos = 'ДЕЕПРИЧАСТИЕ' return ",".join(itertools.chain([new_pos], new_form))
def _remove_unsupported(aot_tag): pos, info = aot.split_tag(aot_tag) info.difference_update( set(['од', 'но', '2', 'имя', 'фам', 'лок', 'кач', 'разг']) ) # if pos == 'ДЕЕПРИЧАСТИЕ': info.discard('дст') if pos == 'С': if 'аббр' in info: info.difference_update(set(aot.CASES.keys())) return aot.join_tag(pos, info)
def to_aot(dialog_tag): pos, info = aot.split_tag(dialog_tag) new_form = (GRAMINFO_MAP_INV[tag] for tag in info if tag in GRAMINFO_MAP_INV) new_pos = POS_INV[pos] if pos == 'V': if 'inf' in info: new_pos = 'ИНФИНИТИВ' elif 'partcp' in info: new_pos = 'ПРИЧАСТИЕ' elif 'ger' in info: new_pos = 'ДЕЕПРИЧАСТИЕ' return ",".join(itertools.chain([new_pos], new_form))
def from_aot(aot_tag): pos, info = aot.split_tag(aot_tag) extra_info = set() if pos in ['ПРИЧАСТИЕ', 'КР_ПРИЧАСТИЕ']: extra_info.add('partcp') else: info.discard('дст') info.discard('стр') if pos == 'ИНФИНИТИВ': extra_info.add('inf') elif pos == 'ДЕЕПРИЧАСТИЕ': extra_info.add('ger') new_form = (GRAMINFO_MAP[attr] for attr in info if attr in GRAMINFO_MAP) return ",".join(itertools.chain([POS[pos]], extra_info, new_form))
def from_aot(aot_tag): pos, info = aot.split_tag(aot_tag) extra_info = set() if pos in ["ПРИЧАСТИЕ", "КР_ПРИЧАСТИЕ"]: extra_info.add("partcp") else: info.discard("дст") info.discard("стр") if pos == "ИНФИНИТИВ": extra_info.add("inf") elif pos == "ДЕЕПРИЧАСТИЕ": extra_info.add("ger") new_form = (GRAMINFO_MAP[attr] for attr in info if attr in GRAMINFO_MAP) return ",".join(itertools.chain([POS[pos]], extra_info, new_form))
def from_aot(aot_tag, word=None): pos, info = aot.split_tag(aot_tag) extra_info = set() if pos in ['ПРИЧАСТИЕ', 'КР_ПРИЧАСТИЕ']: extra_info.add('partcp') else: info.discard('дст') info.discard('стр') if pos == 'ИНФИНИТИВ': extra_info.add('inf') elif pos == 'ДЕЕПРИЧАСТИЕ': extra_info.add('ger') new_form = (GRAMINFO_MAP[attr] for attr in info if attr in GRAMINFO_MAP) return ",".join(itertools.chain([POS[pos]], extra_info, new_form))
def _remove_unsupported_aot(tag): pos, info = aot.split_tag(tag) info.discard('од') info.discard('но') info.discard('указат') if pos == 'ПРЕДЛ': info.difference_update(set(aot.CASES.keys())) if pos == 'ЧИСЛ': info.discard('ед') info.discard('мн') if pos in ['ДЕЕПРИЧАСТИЕ', 'КР_ПРИЧАСТИЕ']: info.difference_update(set(aot.TENSES.keys())) return pos, info
def _remove_unsupported(tag): pos, info = aot.split_tag(tag) if pos == 'КР_ПРИЧАСТИЕ': pos = 'ПРИЧАСТИЕ' if pos == 'КР_ПРИЛ': pos = 'П' if 'буд' in info: info.discard('буд') info.add('нст') info.difference_update(set(['од', 'но', '2', 'имя'])) if pos != 'ПРИЧАСТИЕ': info.discard('дст') info.discard('стр') return aot.join_tag(pos, info)
def _remove_unsupported(tag): pos, info = aot.split_tag(tag) if pos == 'КР_ПРИЧАСТИЕ': pos = 'ПРИЧАСТИЕ' if pos == 'КР_ПРИЛ': pos = 'П' if 'буд' in info: info.discard('буд') info.add('нст') info.difference_update( set(['од', 'но', '2', 'имя']) ) if pos != 'ПРИЧАСТИЕ': info.discard('дст') info.discard('стр') return aot.join_tag(pos, info)
def assertTagEqual(self, converted, gold): pos_gold, info_gold = aot.split_tag_raw(gold) pos_got, info_got = aot.split_tag(converted) assert pos_gold == pos_got assert _gram_info_match(info_gold, info_got), (converted, gold)
def test_to_aot(self, word, open_tag, aot_tag): converted = converters.convert(open_tag, 'opencorpora-ext', 'aot') assert aot.split_tag(_remove_unsupported(converted)) == aot.split_tag( _remove_unsupported(aot_tag))
def test_to_aot(self, word, open_tag, aot_tag): converted = converters.convert(open_tag, 'opencorpora-ext', 'aot') assert aot.split_tag(_remove_unsupported(converted)) == aot.split_tag(_remove_unsupported(aot_tag))