def test_pseudoprojectivity(EN): tree = [1, 2, 2] nonproj_tree = [1, 2, 2, 4, 5, 2, 7, 4, 2] labels = ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl", "punct"] nonproj_tree2 = [9, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1] labels2 = [ "advmod", "root", "det", "nsubj", "advmod", "det", "dobj", "det", "nmod", "aux", "nmod", "advmod", "det", "amod", "punct", ] assert PseudoProjectivity.decompose("X||Y") == ("X", "Y") assert PseudoProjectivity.decompose("X") == ("X", "") assert PseudoProjectivity.is_decorated("X||Y") == True assert PseudoProjectivity.is_decorated("X") == False PseudoProjectivity._lift(0, tree) assert tree == [2, 2, 2] np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree) assert np_arc == 7 np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree2) assert np_arc == 10 proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree, labels) assert proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2] assert deco_labels == ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl||dobj", "punct"] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert deproj_heads == nonproj_tree assert undeco_labels == labels proj_heads, deco_labels = PseudoProjectivity.projectivize(nonproj_tree2, labels2) assert proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1] assert deco_labels == [ "advmod||aux", "root", "det", "nsubj", "advmod", "det", "dobj", "det", "nmod", "aux", "nmod||dobj", "advmod", "det", "amod", "punct", ] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert deproj_heads == nonproj_tree2 assert undeco_labels == labels2 # if decoration is wrong such that there is no head with the desired label # the structure is kept and the label is undecorated proj_heads = [1, 2, 2, 4, 5, 2, 7, 5, 2] deco_labels = ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl||iobj", "punct"] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert deproj_heads == proj_heads assert undeco_labels == ["det", "nsubj", "root", "det", "dobj", "aux", "nsubj", "acl", "punct"] # if there are two potential new heads, the first one is chosen even if it's wrong proj_heads = [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1] deco_labels = [ "advmod||aux", "root", "det", "aux", "advmod", "det", "dobj", "det", "nmod", "aux", "nmod||dobj", "advmod", "det", "amod", "punct", ] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert deproj_heads == [3, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1] assert undeco_labels == [ "advmod", "root", "det", "aux", "advmod", "det", "dobj", "det", "nmod", "aux", "nmod", "advmod", "det", "amod", "punct", ]
def test_pseudoprojectivity(EN): tree = [1, 2, 2] nonproj_tree = [1, 2, 2, 4, 5, 2, 7, 4, 2] labels = [ 'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct' ] nonproj_tree2 = [9, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1] labels2 = [ 'advmod', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod', 'advmod', 'det', 'amod', 'punct' ] assert (PseudoProjectivity.decompose('X||Y') == ('X', 'Y')) assert (PseudoProjectivity.decompose('X') == ('X', '')) assert (PseudoProjectivity.is_decorated('X||Y') == True) assert (PseudoProjectivity.is_decorated('X') == False) PseudoProjectivity._lift(0, tree) assert (tree == [2, 2, 2]) np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree) assert (np_arc == 7) np_arc = PseudoProjectivity._get_smallest_nonproj_arc(nonproj_tree2) assert (np_arc == 10) proj_heads, deco_labels = PseudoProjectivity.projectivize( nonproj_tree, labels) assert (proj_heads == [1, 2, 2, 4, 5, 2, 7, 5, 2]) assert (deco_labels == [ 'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl||dobj', 'punct' ]) deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert (deproj_heads == nonproj_tree) assert (undeco_labels == labels) proj_heads, deco_labels = PseudoProjectivity.projectivize( nonproj_tree2, labels2) assert (proj_heads == [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1]) assert (deco_labels == [ 'advmod||aux', 'root', 'det', 'nsubj', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod||dobj', 'advmod', 'det', 'amod', 'punct' ]) deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert (deproj_heads == nonproj_tree2) assert (undeco_labels == labels2) # if decoration is wrong such that there is no head with the desired label # the structure is kept and the label is undecorated proj_heads = [1, 2, 2, 4, 5, 2, 7, 5, 2] deco_labels = [ 'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl||iobj', 'punct' ] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert (deproj_heads == proj_heads) assert (undeco_labels == [ 'det', 'nsubj', 'root', 'det', 'dobj', 'aux', 'nsubj', 'acl', 'punct' ]) # if there are two potential new heads, the first one is chosen even if it's wrong proj_heads = [1, 1, 3, 1, 5, 6, 9, 8, 6, 1, 9, 12, 13, 10, 1] deco_labels = [ 'advmod||aux', 'root', 'det', 'aux', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod||dobj', 'advmod', 'det', 'amod', 'punct' ] deproj_heads, undeco_labels = deprojectivize(proj_heads, deco_labels, EN) assert (deproj_heads == [3, 1, 3, 1, 5, 6, 9, 8, 6, 1, 6, 12, 13, 10, 1]) assert (undeco_labels == [ 'advmod', 'root', 'det', 'aux', 'advmod', 'det', 'dobj', 'det', 'nmod', 'aux', 'nmod', 'advmod', 'det', 'amod', 'punct' ])