Exemplo n.º 1
0
def test_punctuation_verylow(discont_tree, cont_tree):
    """transform.punctuation_verylow
    """
    terminals = trees.terminals(discont_tree)
    old_vp_children = terminals[0].parent.children
    old_q_parent = terminals[-1].parent
    terminals[0].data['word'] = "("
    terminals[7].data['word'] = ")"
    discont_tree = transform.punctuation_verylow(discont_tree)
    new_vp_children = terminals[0].parent.children
    new_q_parent = terminals[-1].parent
    new_labels_test = [u'VROOT', u'S', u'VP', u'SBAR', u'VP', u'WP',
                       u'VB', u'?', u'IN', u'NP', u'NNP', u'VB',
                       u'NNP', u'VB', u'NNP']
    assert old_q_parent == discont_tree
    assert old_vp_children == new_vp_children
    assert new_q_parent == terminals[-2].parent
    assert new_labels_test == [node.data['label'] for node in
                               trees.preorder(discont_tree)]
    terminals = trees.terminals(cont_tree)
    old_vp_children = terminals[0].parent.children
    old_q_parent = terminals[-1].parent
    terminals[0].data['word'] = "("
    terminals[7].data['word'] = ")"
    cont_tree = transform.punctuation_verylow(cont_tree)
    new_vp_children = terminals[0].parent.children
    new_q_parent = terminals[-1].parent
    new_labels_test = [u'VROOT', u'S', u'WP', u'VB', u'NNP',
                       u'VP', u'VB', u'NNP', u'SBAR', u'IN',
                       u'NP', u'NNP', u'VP', u'VB', u'?']
    assert old_q_parent == cont_tree
    assert old_vp_children == new_vp_children
    assert new_q_parent == terminals[-2].parent
    assert new_labels_test == [node.data['label'] for node in
                               trees.preorder(cont_tree)]
Exemplo n.º 2
0
def test_ptb_delete_traces(cont_tree):
    """transform.ptb_delete_traces
    """
    terms = trees.terminals(cont_tree)
    terms[-2].data['label'] = "-NONE-"
    cont_tree = transform.ptb_delete_traces(cont_tree)
    assert len(trees.terminals(cont_tree)) == len(terms) - 1
Exemplo n.º 3
0
def test_delete_terminal(discont_tree, cont_tree):
    """trees.delete_terminal
    """
    # discont
    old_num_nodes = len([node for node in trees.preorder(discont_tree)])
    terminals = trees.terminals(discont_tree)
    to_remove = terminals[0]
    to_remove_p = to_remove.parent
    result = trees.delete_terminal(discont_tree, to_remove)
    res_words = [node.data['word'] for node in trees.terminals(discont_tree)]
    res_num_nodes = len([node for node in trees.preorder(discont_tree)])
    assert result == to_remove_p
    assert res_words == testdata.WORDS[1:]
    assert res_num_nodes == old_num_nodes - 1
    # cont
    old_num_nodes = len([node for node in trees.preorder(cont_tree)])
    terminals = trees.terminals(cont_tree)
    to_remove = terminals[0]
    to_remove_p = to_remove.parent
    result = trees.delete_terminal(cont_tree, to_remove)
    res_words = [node.data['word'] for node in trees.terminals(cont_tree)]
    res_num_nodes = len([node for node in trees.preorder(cont_tree)])
    assert result == to_remove_p
    assert res_words == testdata.WORDS[1:]
    assert res_num_nodes == old_num_nodes - 1
Exemplo n.º 4
0
def test_dominance(discont_tree, cont_tree):
    """trees.dominance
    """
    dterms = trees.terminals(discont_tree)
    ddom = [node.data['label'] for node in trees.dominance(dterms[0])]
    cterms = trees.terminals(cont_tree)
    cdom = [node.data['label'] for node in trees.dominance(cterms[0])]
    assert ddom == testdata.DISCONT_DOM_FIRST
    assert cdom == testdata.CONT_DOM_FIRST
Exemplo n.º 5
0
def test_discont_general(discont_tree):
    """General tests concerning discontinuous trees.
    """
    tree = discont_tree
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    terms = trees.terminals(tree)
    words = [node.data['word'] for node in terms]
    uterms = trees.unordered_terminals(tree)
    uwords = [node.data['word'] for node in uterms]
    tree = transform.negra_mark_heads(tree)
    tree = transform.binarize(tree)
    left_reorder = [node.data['num'] for node
                    in treeanalysis.disco_order(tree, 'left')]
    rightd_reorder = [node.data['num'] for node
                      in treeanalysis.disco_order(tree, 'rightd')]
    assert left_reorder == testdata.DISCONT_LEFT_REORDER
    assert rightd_reorder == testdata.DISCONT_RIGHTD_REORDER
    assert all(['num' in node.data for node in terms])
    assert all([node in uterms for node in terms])
    assert len(terms) == 9
    assert len(uterms) == 9
    assert len(nodes) == 15
    assert labels == testdata.DISCONT_LABELS_PREORDER
    assert words == testdata.WORDS
    assert set(uwords) == set(testdata.WORDS)
Exemplo n.º 6
0
def test_punctuation_root(discont_tree, cont_tree):
    """transform.punctuation_root
    """
    terms = trees.terminals(discont_tree)
    terms[0].data['word'] = ","
    old_p = terms[0].parent
    assert len(trees.children(old_p)) == 2
    discont_tree = transform.punctuation_root(discont_tree)
    assert len(trees.children(old_p)) == 1
    assert terms[0].parent == discont_tree
    terms = trees.terminals(cont_tree)
    terms[3].data['word'] = ","
    old_p = terms[3].parent
    assert len(trees.children(old_p)) == 3
    cont_tree = transform.punctuation_root(cont_tree)
    assert len(trees.children(old_p)) == 2
    assert terms[3].parent == cont_tree
Exemplo n.º 7
0
def test_lca(discont_tree, cont_tree):
    """trees.lca
    """
    tree = discont_tree
    ctree = cont_tree
    terms = trees.terminals(tree)
    cterms = trees.terminals(ctree)
    root_children = trees.children(tree)
    croot_children = trees.children(ctree)
    lca = trees.lca(terms[0], terms[1])
    clca = trees.lca(cterms[0], cterms[1])
    assert terms[0].data['word'] == 'Who'
    assert cterms[0].data['word'] == 'Who'
    assert terms[1].data['word'] == 'did'
    assert cterms[1].data['word'] == 'did'
    assert root_children[0].data['label'] == 'S'
    assert croot_children[0].data['label'] == 'S'
    assert root_children[0] == lca
    assert croot_children[0] == clca
Exemplo n.º 8
0
def test_punctuation_symetrify(discont_tree, cont_tree):
    """transform.punctuation_symetrify
    """
    temp = tempfile.NamedTemporaryFile(mode='w')
    temp.write('1\t3\t"\t$(\n')
    temp.write('1\t5\t"\t$(\n')
    temp.write('1\t8\t,\t$,\n')
    temp.flush()
    params = {'terminalfile': temp.name, 'quiet': True}
    old_terms = trees.terminals(discont_tree)
    discont_tree = transform.insert_terminals(discont_tree,
                                              **params)
    new_terms = trees.terminals(discont_tree)
    assert len(old_terms) == len(new_terms) - 3
    discont_tree = transform.root_attach(discont_tree)
    discont_tree = transform.punctuation_symetrify(discont_tree)
    treeoutput.compute_export_numbering(discont_tree)
    assert new_terms[2].parent.data['num'] == 504
    assert new_terms[4].parent.data['num'] == 504
    assert new_terms[7].parent.data['num'] == 503
    # cont
    temp = tempfile.NamedTemporaryFile(mode='w')
    temp.write('1\t3\t"\t$(\n')
    temp.write('1\t5\t"\t$(\n')
    temp.write('1\t8\t,\t$,\n')
    temp.flush()
    params = {'terminalfile': temp.name, 'quiet': True}
    old_terms = trees.terminals(cont_tree)
    cont_tree = transform.insert_terminals(cont_tree,
                                           **params)
    new_terms = trees.terminals(cont_tree)
    assert len(old_terms) == len(new_terms) - 3
    cont_tree = transform.root_attach(cont_tree)
    cont_tree = transform.punctuation_symetrify(cont_tree)
    treeoutput.compute_export_numbering(cont_tree)
    assert new_terms[2].parent.data['num'] == 504
    assert new_terms[4].parent.data['num'] == 504
    assert new_terms[7].parent.data['num'] == 503
Exemplo n.º 9
0
def test_boyd(discont_tree):
    """See transform.boyd_split
    """
    tree = discont_tree
    tree = transform.root_attach(tree)
    tree = transform.negra_mark_heads(tree)
    tree = transform.boyd_split(tree)
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    terms = trees.terminals(tree)
    words = [node.data['word'] for node in terms]
    uterms = trees.unordered_terminals(tree)
    uwords = [node.data['word'] for node in uterms]
    assert labels == testdata.DISCONT_LABELSBOYD_PREORDER
    assert words == testdata.WORDS
    assert set(uwords) == set(testdata.WORDS)
Exemplo n.º 10
0
def test_root_attach(discont_tree):
    """See transform.root_attach
    """
    tree = discont_tree
    tree = transform.root_attach(tree)
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    terms = trees.terminals(tree)
    words = [node.data['word'] for node in terms]
    uterms = trees.unordered_terminals(tree)
    uwords = [node.data['word'] for node in uterms]
    assert labels == testdata.DISCONT_LABELS_PREORDER
    assert words == testdata.WORDS
    assert set(uwords) == set(testdata.WORDS)
    with pytest.raises(ValueError):
        transform.boyd_split(tree)
Exemplo n.º 11
0
def test_cont_general(cont_tree):
    """General tests concerning continuous trees.
    """
    tree = cont_tree
    terms = trees.terminals(tree)
    uterms = trees.unordered_terminals(tree)
    nodes = [node for node in trees.preorder(tree)]
    labels = [node.data['label'] for node in nodes]
    words = [node.data['word'] for node in terms]
    uwords = [node.data['word'] for node in uterms]
    assert all(['num' in node.data for node in terms])
    assert all([node in uterms for node in terms])
    assert len(terms) == 9
    assert len(uterms) == 9
    assert len(nodes) == 15
    assert labels == testdata.CONT_LABELS_PREORDER
    assert words == testdata.WORDS
    assert set(uwords) == set(testdata.WORDS)
Exemplo n.º 12
0
def cont_tree(request):
    """Load continuous tree samples
    """
    tempfile_name = None
    with tempfile.NamedTemporaryFile(mode='w', delete=False) as temp:
        tempfile_name = temp.name
        temp.write(request.param[1])
        temp.flush()
    request.param[2]['quiet'] = True
    reader = request.param[0](tempfile_name, 'utf8', **request.param[2])

    def fin():
        os.remove(tempfile_name)
    tree = next(reader)
    # 'fix' POS tags for brackets_emptypos mode
    terms = trees.terminals(tree)
    if all([term.data['label'] == trees.DEFAULT_LABEL for term in terms]):
        for term, pos in zip(terms, testdata.POS):
            term.data['label'] = pos
    return tree
Exemplo n.º 13
0
def test_insert_terminal(discont_tree, cont_tree):
    """transform.insert_terminals
    """
    temp = tempfile.NamedTemporaryFile(mode='w')
    temp.write('1\t0\tTest1\tPosTest1\n')
    temp.write('1\t2\tTest1\tPosTest1\n')
    temp.write('1\t6\tTest2\tPosTest2\n')
    temp.write('1\t6\tTest2\tPosTest2\n')
    temp.write('1\t100\tTest2\tPosTest2\n')
    temp.flush()
    params = {'terminalfile': temp.name, 'quiet': True}
    with pytest.raises(ValueError):
        transform.insert_terminals(discont_tree,
                                   **params)
    temp = tempfile.NamedTemporaryFile(mode='w')
    temp.write('1\t0\tTest1\tPosTest1\n')
    temp.write('1\t2\tTest1\tPosTest1\n')
    temp.write('1\t6\tTest2\tPosTest2\n')
    temp.write('1\t100\tTest2\tPosTest2\n')
    temp.flush()
    params = {'terminalfile': temp.name, 'quiet': True}
    old_terms = trees.terminals(discont_tree)
    discont_tree = transform.insert_terminals(discont_tree,
                                              **params)
    new_terms = trees.terminals(discont_tree)
    assert len(old_terms) == len(new_terms) - 2
    gold_words = list(testdata.WORDS)
    out_words = [term.data['word'] for term in new_terms]
    gold_words[1:1] = ['Test1']
    gold_words[5:5] = ['Test2']
    assert gold_words == out_words
    gold_pos = list(testdata.POS)
    out_pos = [term.data['label'] for term in new_terms]
    gold_pos[1:1] = ['PosTest1']
    gold_pos[5:5] = ['PosTest2']
    assert gold_pos == out_pos
    # cont
    temp = tempfile.NamedTemporaryFile(mode='w')
    temp.write('1\t0\tTest1\tPosTest1\n')
    temp.write('1\t2\tTest1\tPosTest1\n')
    temp.write('1\t6\tTest2\tPosTest2\n')
    temp.write('1\t6\tTest2\tPosTest2\n')
    temp.write('1\t100\tTest2\tPosTest2\n')
    temp.flush()
    params = {'terminalfile': temp.name, 'quiet': True}
    with pytest.raises(ValueError):
        transform.insert_terminals(cont_tree,
                                   **params)
    temp = tempfile.NamedTemporaryFile(mode='w')
    temp.write('1\t0\tTest1\tPosTest1\n')
    temp.write('1\t2\tTest1\tPosTest1\n')
    temp.write('1\t6\tTest2\tPosTest2\n')
    temp.write('1\t100\tTest2\tPosTest2\n')
    temp.flush()
    params = {'terminalfile': temp.name, 'quiet': True}
    old_terms = trees.terminals(cont_tree)
    cont_tree = transform.insert_terminals(cont_tree,
                                           **params)
    new_terms = trees.terminals(cont_tree)
    assert len(old_terms) == len(new_terms) - 2
    gold_words = list(testdata.WORDS)
    out_words = [term.data['word'] for term in new_terms]
    gold_words[1:1] = ['Test1']
    gold_words[5:5] = ['Test2']
    assert gold_words == out_words
    gold_pos = list(testdata.POS)
    out_pos = [term.data['label'] for term in new_terms]
    gold_pos[1:1] = ['PosTest1']
    gold_pos[5:5] = ['PosTest2']
    assert gold_pos == out_pos