def handle_possession(ud_tag: UdTag) -> UmFeat: psor_parts = [p for p in ud_tag if "[psor]" in p] if not psor_parts: return EMPTY_FEAT if "None" in str(psor_parts): return UmFeat("PSSD") try: assert len(psor_parts) <= 2 except AssertionError: print(psor_parts) raise if "Number[psor]=Plur" in psor_parts: number = "P" elif "Number[psor]=Sing" in psor_parts: number = "S" elif "Number[psor]=Dual" in psor_parts: number = "D" else: assert "Number" not in str(psor_parts) number = "" if "Person[psor]=1" in psor_parts: person = "1" elif "Person[psor]=2" in psor_parts: person = "2" elif "Person[psor]=3" in psor_parts: person = "3" else: assert "Person" not in str(psor_parts) person = "" return UmFeat(f"PSS{person}{number}")
def ud2um(ud_tag: UdTag) -> UmTag: um_tag: List[UmFeat] = [] possession = handle_possession(ud_tag) um_tag.append(possession) arguments = handle_arguments(ud_tag) um_tag.extend(arguments) for part in ud_tag: if "," not in part: tag = process_tag(part) um_tag.append(tag) else: key, vals = part.split("=") vals = vals.split(",") all_parts = [] for val in vals: tag = process_tag(UdFeat(f"{key}={val}")) all_parts.append(tag) all_parts = [p for p in all_parts if p != "_"] um_tag.append( UmFeat(f"{{{'/'.join(all_parts)}}}" ) if all_parts else EMPTY_FEAT) um_tag = [f for f in um_tag if str(f) != "_"] or [EMPTY_FEAT] # print(um_tag) return UmTag(";".join(um_tag))
import re from collections import defaultdict from typing import List, Set from languages import languages from utils import CoNLLRow, UdFeat, UdTag, UmFeat, UmTag, ud2um_mapping EMPTY_FEAT = UmFeat("_") def handle_arguments(ud: UdTag) -> List[UmFeat]: def handle_argument(parts): parts = str(parts) if "[psed]" in parts or "[gram]" in parts: return "_" if "[erg]" in parts: kind = "ER" elif "[dat]" in parts: kind = "DA" elif "[abs]" in parts: kind = "AB" else: print(parts) raise AssertionError if "=Plur" in parts: number = "P" elif "=Sing" in parts: number = "S" elif "=Dual" in parts: