def test_write_conll(spacy_doc): result = export.doc_to_conll(spacy_doc) assert len(re.findall(r"^# sent_id \d$", result, flags=re.MULTILINE)) == 2 assert all( line.count("\t") == 9 for line in result.split("\n") if re.search(r"^\d+\s", line)) assert all(line == re.search(r"\d+\s([\w=\.\$\-]+\s?)+", line).group() for line in result.split("\n") if re.search(r"^\d+\s", line))
def test_write_conll(self): expected = '# sent_id 1\n1\tI\ti\tPRON\tPRP\t_\t4\tnsubj\t_\t_\n2\twould\twould\tVERB\tMD\t_\t4\taux\t_\t_\n3\thave\thave\tVERB\tVB\t_\t4\taux\t_\t_\n4\tlived\tlive\tVERB\tVBN\t_\t0\troot\t_\t_\n5\tin\tin\tADP\tIN\t_\t4\tprep\t_\t_\n6\tpeace\tpeace\tNOUN\tNN\t_\t5\tpobj\t_\tSpaceAfter=No\n7\t.\t.\tPUNCT\t.\t_\t4\tpunct\t_\t_\n\n# sent_id 2\n1\tBut\tbut\tCONJ\tCC\t_\t4\tcc\t_\t_\n2\tmy\tmy\tADJ\tPRP$\t_\t3\tposs\t_\t_\n3\tenemies\tenemy\tNOUN\tNNS\t_\t4\tnsubj\t_\t_\n4\tbrought\tbring\tVERB\tVBD\t_\t0\troot\t_\t_\n5\tme\tme\tPRON\tPRP\t_\t4\tdative\t_\t_\n6\twar\twar\tNOUN\tNN\t_\t4\tdobj\t_\tSpaceAfter=No\n7\t.\t.\tPUNCT\t.\t_\t4\tpunct\t_\tSpaceAfter=No\n' observed = export.doc_to_conll(self.spacy_doc) self.assertEqual(observed, expected)