def test_read_sd_corpus_multiple_extra_space(): sample_deps = ''' det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) nsubj(cooks-2, Ed-1) nsubj(sells-4, Ed-1) root(ROOT-0, cooks-2) conj_and(cooks-2, sells-4) dobj(cooks-2, burritos-5) prep_with(burritos-5, beans-7) prep_with(burritos-5, rice-10) conj_negcc(beans-7, rice-10) punct(cooks-2, .-11) '''.splitlines() corpus = Corpus.from_stanford_dependencies(sample_deps, [trees_sd.tree4, trees_sd.tree5]) assert len(corpus) == 2 assert stringify_sentence(corpus[0]) == trees_sd.tree4_out_CCprocessed assert stringify_sentence(corpus[1]) == trees_sd.tree5_out_CCprocessed
def test_read_sd_corpus_multiple_extra_space(): sample_deps = ''' det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) nsubj(cooks-2, Ed-1) nsubj(sells-4, Ed-1) root(ROOT-0, cooks-2) conj_and(cooks-2, sells-4) dobj(cooks-2, burritos-5) prep_with(burritos-5, beans-7) prep_with(burritos-5, rice-10) conj_negcc(beans-7, rice-10) punct(cooks-2, .-11) '''.splitlines() corpus = Corpus.from_stanford_dependencies( sample_deps, [trees_sd.tree4, trees_sd.tree5]) assert len(corpus) == 2 assert stringify_sentence(corpus[0]) == trees_sd.tree4_out_CCprocessed assert stringify_sentence(corpus[1]) == trees_sd.tree5_out_CCprocessed
def test_conll_readwrite_corpus_multiple_sentences(): corpus = Corpus.from_conll(conll_example.splitlines() + [''] + conll_example2.splitlines()) assert len(corpus) == 2 assert corpus.as_conll() == conll_example.strip() + '\n\n' + \ conll_example2.strip() + '\n' assert corpus[0].as_conll() == conll_example.strip() assert corpus[1].as_conll() == conll_example2.strip()
def test_read_sd_corpus_single(): sample_deps = ''' det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) '''.strip().splitlines() corpus = Corpus.from_stanford_dependencies(sample_deps, [trees_sd.tree4]) assert len(corpus) == 1 assert stringify_sentence(corpus[0]) == trees_sd.tree4_out_CCprocessed
def test_read_sd_corpus_single(): sample_deps = """ det(burrito-2, A-1) root(ROOT-0, burrito-2) prep_with(burrito-2, beans-4) prep_with(burrito-2, chicken-7) conj_negcc(beans-4, chicken-7) punct(burrito-2, .-8) """.strip().splitlines() corpus = Corpus.from_stanford_dependencies(sample_deps, [tree4]) assert len(corpus) == 1 assert stringify_sentence(corpus[0]) == tree4_out_CCprocessed
def test_conll_readwrite_corpus_empty(): corpus = Corpus.from_conll([]) assert len(corpus) == 0 assert corpus.as_conll() == ''
def test_conll_readwrite_corpus(): corpus = Corpus.from_conll(conll_example.splitlines()) assert len(corpus) == 1 assert corpus.as_conll() == conll_example.strip() + '\n'
def test_conll_readwrite_corpus(): corpus = Corpus.from_conll(conll_example.splitlines()) assert corpus.as_conll() == conll_example.strip()