Пример #1
0
def test_pseudoprojectivity(EN):
    tree = [1, 2, 2]
    nonproj_tree = [1, 2, 2, 4, 5, 2, 7, 4, 2]
    labels = ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl", "punct"]
    nonproj_tree2 = [9, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1]
    labels2 = [
        "advmod",
        "root",
        "det",
        "nsubj",
        "advmod",
        "det",
        "dobj",
        "det",
        "nmod",
        "aux",
        "nmod",
        "advmod",
        "det",
        "amod",
        "punct",
    ]

    assert PseudoProjectivity.decompose("X||Y") == ("X", "Y")
    assert PseudoProjectivity.decompose("X") == ("X", "")

    assert PseudoProjectivity.is_decorated("X||Y") == True
    assert PseudoProjectivity.is_decorated("X") == False

    PseudoProjectivity._lift(0, tree)
    assert tree == [2, 2, 2]

    np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree)
    assert np_arc == 7

    np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree2)
    assert np_arc == 10

    proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree, labels)
    assert proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2]
    assert deco_labels == ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl||dobj", "punct"]
    deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN)
    assert deproj_heads == nonproj_tree
    assert undeco_labels == labels

    proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree2, labels2)
    assert proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1]
    assert deco_labels == [
        "advmod||aux",
        "root",
        "det",
        "nsubj",
        "advmod",
        "det",
        "dobj",
        "det",
        "nmod",
        "aux",
        "nmod||dobj",
        "advmod",
        "det",
        "amod",
        "punct",
    ]
    deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN)
    assert deproj_heads == nonproj_tree2
    assert undeco_labels == labels2

    # if decoration is wrong such that there is no head with the desired label
    # the structure is kept and the label is undecorated
    proj_heads = [1, 2, 2, 4, 5, 2, 7, 5, 2]
    deco_labels = ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl||iobj", "punct"]
    deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN)
    assert deproj_heads == proj_heads
    assert undeco_labels == ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl", "punct"]

    # if there are two potential new heads, the first one is chosen even if it's wrong
    proj_heads = [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1]
    deco_labels = [
        "advmod||aux",
        "root",
        "det",
        "aux",
        "advmod",
        "det",
        "dobj",
        "det",
        "nmod",
        "aux",
        "nmod||dobj",
        "advmod",
        "det",
        "amod",
        "punct",
    ]
    deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN)
    assert deproj_heads == [3, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1]
    assert undeco_labels == [
        "advmod",
        "root",
        "det",
        "aux",
        "advmod",
        "det",
        "dobj",
        "det",
        "nmod",
        "aux",
        "nmod",
        "advmod",
        "det",
        "amod",
        "punct",
    ]
Пример #2
0
def test_pseudoprojectivity(EN):
    tree = [1, 2, 2]
    nonproj_tree = [1, 2, 2, 4, 5, 2, 7, 4, 2]
    labels = [
        'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct'
    ]
    nonproj_tree2 = [9, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1]
    labels2 = [
        'advmod', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det',
        'nmod', 'aux', 'nmod', 'advmod', 'det', 'amod', 'punct'
    ]

    assert (PseudoProjectivity.decompose('X||Y') == ('X', 'Y'))
    assert (PseudoProjectivity.decompose('X') == ('X', ''))

    assert (PseudoProjectivity.is_decorated('X||Y') == True)
    assert (PseudoProjectivity.is_decorated('X') == False)

    PseudoProjectivity._lift(0, tree)
    assert (tree == [2, 2, 2])

    np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree)
    assert (np_arc == 7)

    np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree2)
    assert (np_arc == 10)

    proj_heads, deco_labels = PseudoProjectivity.projectivize(
        nonproj_tree, labels)
    assert (proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2])
    assert (deco_labels == [
        'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl||dobj',
        'punct'
    ])
    deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN)
    assert (deproj_heads == nonproj_tree)
    assert (undeco_labels == labels)

    proj_heads, deco_labels = PseudoProjectivity.projectivize(
        nonproj_tree2, labels2)
    assert (proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1])
    assert (deco_labels == [
        'advmod||aux', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det',
        'nmod', 'aux', 'nmod||dobj', 'advmod', 'det', 'amod', 'punct'
    ])
    deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN)
    assert (deproj_heads == nonproj_tree2)
    assert (undeco_labels == labels2)

    # if decoration is wrong such that there is no head with the desired label
    # the structure is kept and the label is undecorated
    proj_heads = [1, 2, 2, 4, 5, 2, 7, 5, 2]
    deco_labels = [
        'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl||iobj',
        'punct'
    ]
    deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN)
    assert (deproj_heads == proj_heads)
    assert (undeco_labels == [
        'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct'
    ])

    # if there are two potential new heads, the first one is chosen even if it's wrong
    proj_heads = [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1]
    deco_labels = [
        'advmod||aux', 'root', 'det', 'aux', 'advmod', 'det', 'dobj', 'det',
        'nmod', 'aux', 'nmod||dobj', 'advmod', 'det', 'amod', 'punct'
    ]
    deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN)
    assert (deproj_heads == [3, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1])
    assert (undeco_labels == [
        'advmod', 'root', 'det', 'aux', 'advmod', 'det', 'dobj', 'det', 'nmod',
        'aux', 'nmod', 'advmod', 'det', 'amod', 'punct'
    ])