def test_parser_load_none(self): parser = PGCParser() pg_corpus = parser.parse("data/corpus-2.pgc", graph_loading=LOAD_NONE) for graph_pair in pg_corpus: for graph in graph_pair._graphs: self.assertTrue(isinstance(graph, GraphStub)) for bank in graph_pair._banks: self.assertEqual(bank.__class__, SparseGraphBank) self.assertEqual(len(bank), 3)
def test_missing_graphbank(self): file_elem = self.tree.find("//file") file_elem.text = "XXX" self.tree.write(self.tmpfn) parser = PGCParser() self.assertRaises(DaesoError, parser.parse, self.tmpfn)
def test_merge_2(self): """ merging a corpus with another corpus should change the number of graphbanks """ parser = PGCParser() pg_corpus2 = parser.parse("data/corpus-2.pgc") self.pg_corpus.extend(pg_corpus2) self.assertEqual(len(self.pg_corpus), 6) tree = self.generator.generate(self.pg_corpus, outf=self.tmpfn, pprint=True, abs_path=True) gb_elem = tree.find("//graphbanks") self.assertEqual(len(gb_elem), 4) self.assertTrue(self._can_be_parsed(self.tmpfn))
def test_unresolved_graphbanks(self): graph_pair_elem = self.tree.find("//graph_pair") graph_pair_elem.set("from_bank_id", "XXX") self.tree.write(self.tmpfn) for graph_loading in (LOAD_ALL, LOAD_SPARSE, LOAD_NONE): parser = PGCParser() ##parser.parse(self.tmpfn, graph_loading=graph_loading) self.assertRaises(DaesoError, parser.parse, self.tmpfn, graph_loading=graph_loading)
def test_unresolved_graphs(self): graph_pair_elem = self.tree.find("//graph_pair") graph_pair_elem.set("from_graph_id", "XXX") self.tree.write(self.tmpfn) # will pas unnoticed with LOAD_NONE for graph_loading in (LOAD_ALL, LOAD_SPARSE): parser = PGCParser() self.assertRaises(DaesoError, parser.parse, self.tmpfn, graph_loading=graph_loading)
def test_parser_node_pairs(self): """ check if all node_pair are correctly read """ parser = PGCParser() pg_corpus = parser.parse("data/corpus-2.pgc") true_align = [(Pair("4", "4"), "equals"), (Pair("8", "11"), "equals"), (Pair("5", "5"), "equals"), (Pair("11", "10"), "intersects"), (Pair("19", "8"), "intersects"), (Pair("1", "1"), "restates"), (Pair("0", "0"), "restates")] read_align = pg_corpus[1].alignments() self.assertEqual(len(read_align), len(true_align)) for e in read_align: true_align.remove(e) self.assertFalse(true_align)
def test_parser_node_pairs(self): """ check if all node_pair are correctly read """ parser = PGCParser() pg_corpus = parser.parse("data/corpus-2.pgc") true_align = [ (Pair("4","4"), "equals"), (Pair("8","11"), "equals"), (Pair("5","5"), "equals"), (Pair("11","10"), "intersects"), (Pair("19","8"), "intersects"), (Pair("1","1"), "restates"), (Pair("0","0"), "restates") ] read_align = pg_corpus[1].alignments() self.assertEqual(len(read_align), len(true_align)) for e in read_align: true_align.remove(e) self.assertFalse(true_align)
def _can_be_parsed(self, corpus_filename): parser = PGCParser() corpus = parser.parse(corpus_filename) return len(corpus)
def setUp(self): parser = PGCParser() self.pg_corpus = parser.parse("data/corpus-1.pgc") self.generator = PGCGenerator() self.tmpfn = tempfile.NamedTemporaryFile().name