Пример #1
0
def handle_possession(ud_tag: UdTag) -> UmFeat:
    psor_parts = [p for p in ud_tag if "[psor]" in p]
    if not psor_parts:
        return EMPTY_FEAT

    if "None" in str(psor_parts):
        return UmFeat("PSSD")

    try:
        assert len(psor_parts) <= 2
    except AssertionError:
        print(psor_parts)
        raise

    if "Number[psor]=Plur" in psor_parts:
        number = "P"
    elif "Number[psor]=Sing" in psor_parts:
        number = "S"
    elif "Number[psor]=Dual" in psor_parts:
        number = "D"
    else:
        assert "Number" not in str(psor_parts)
        number = ""

    if "Person[psor]=1" in psor_parts:
        person = "1"
    elif "Person[psor]=2" in psor_parts:
        person = "2"
    elif "Person[psor]=3" in psor_parts:
        person = "3"
    else:
        assert "Person" not in str(psor_parts)
        person = ""

    return UmFeat(f"PSS{person}{number}")
Пример #2
0
def ud2um(ud_tag: UdTag) -> UmTag:
    um_tag: List[UmFeat] = []
    possession = handle_possession(ud_tag)
    um_tag.append(possession)
    arguments = handle_arguments(ud_tag)
    um_tag.extend(arguments)

    for part in ud_tag:
        if "," not in part:
            tag = process_tag(part)
            um_tag.append(tag)
        else:
            key, vals = part.split("=")
            vals = vals.split(",")
            all_parts = []
            for val in vals:
                tag = process_tag(UdFeat(f"{key}={val}"))
                all_parts.append(tag)
            all_parts = [p for p in all_parts if p != "_"]
            um_tag.append(
                UmFeat(f"{{{'/'.join(all_parts)}}}"
                       ) if all_parts else EMPTY_FEAT)
    um_tag = [f for f in um_tag if str(f) != "_"] or [EMPTY_FEAT]
    # print(um_tag)
    return UmTag(";".join(um_tag))
Пример #3
0
import re
from collections import defaultdict
from typing import List, Set

from languages import languages
from utils import CoNLLRow, UdFeat, UdTag, UmFeat, UmTag, ud2um_mapping

EMPTY_FEAT = UmFeat("_")


def handle_arguments(ud: UdTag) -> List[UmFeat]:
    def handle_argument(parts):
        parts = str(parts)
        if "[psed]" in parts or "[gram]" in parts:
            return "_"

        if "[erg]" in parts:
            kind = "ER"
        elif "[dat]" in parts:
            kind = "DA"
        elif "[abs]" in parts:
            kind = "AB"
        else:
            print(parts)
            raise AssertionError

        if "=Plur" in parts:
            number = "P"
        elif "=Sing" in parts:
            number = "S"
        elif "=Dual" in parts: