class Aligner(object): """ the Algraeph application model """ def __init__(self): self._corpus = ParallelGraphCorpus() # the domain model self._changed = False self._filename = None self._graph_pair = None self._graph_pair_index = None self._graphs = Pair(None, None) self._nodes = Pair(None, None) # the special relation which stands for "no relation" self._no_relation = "none" self._co_node_selection = False # ------------------------------------------------------------------------------ # Corpus # ------------------------------------------------------------------------------ def open_corpus(self, filename): send(self.open_corpus, "statusDescription", "Loading corpus %s ..." % filename) # May raise errors such IOErrors, not an xml file, corrupt format, etc. # Use of relax_gb_paths allows graphbank files to be located in the # same direcory as the corpus file instead of the location specified # in the <file> element corpus = ParallelGraphCorpus() corpus.read(inf=filename, relax_gb_paths=True) if not corpus: raise AlgraephException("Parallel graph corpus contains no alignments") self._corpus = corpus self._filename = filename self._changed = False send(self.open_corpus, "statusDescription") send(self.open_corpus, "newCorpus") send(self.open_corpus, "newCorpusName") self.goto_graph_pair(0) # implies send("newGraphPair"), and sets self._graph_pair, # self._graph_pair_index, self._graphs and self._nodes def save_corpus(self, filename=None): if filename: self._filename = filename send(self.save_corpus, "newCorpusName") send(self.save_corpus, "statusDescription", "Saving corpus %s ..." % self._filename) self._corpus.write(self._filename, pprint=True) self._changed = False send(self.save_corpus, "statusDescription") def get_corpus_len(self): return len(self._corpus) def get_corpus_filename(self): return self._filename def get_corpus_dir(self): try: return dirname(self._filename) except (AttributeError, TypeError): return None def corpus_changed(self): """ returns True if the corpus has unsaved changes """ return self._changed # ------------------------------------------------------------------------------ # Treebanks # ------------------------------------------------------------------------------ def get_graphbanks_format(self): # The ParallelGraphCorpus class in principle supports graphbanks in # different formats, although untested for the time being. Formats are # therefore stored as a property of the graphbanks, but there is no # global format defined as a property of the corpus. So getting "the # graphbanks format" is not straightforward. We will make the # assumption that all graphbanks are in the same format, and there it # is sufficient to look at any graphbank linked to an arbitary graph # pair. return self._corpus[0].get_source_bank().get_format() # ------------------------------------------------------------------------------ # Graphs (GraphPair and DaesoGraph) # ------------------------------------------------------------------------------ def get_graph_pair(self): return self._graph_pair def goto_prev_graph_pair(self): self.goto_graph_pair(self._graph_pair_index - 1) def goto_next_graph_pair(self): self.goto_graph_pair(self._graph_pair_index + 1) def goto_graph_pair(self, index): # don't use try-except here, because negative index is allowed for list if 0 <= index < len(self._corpus): self._graph_pair = self._corpus[index] self._graph_pair_index = index self._graphs = self._graph_pair.get_graphs() self._nodes = Pair(None, None) send(self.goto_graph_pair, "newGraphPair.viz") send(self.goto_graph_pair, "newGraphPair.gui") def get_from_graph(self): return self._graphs.source def get_to_graph(self): return self._graphs.target def get_from_graph_tokens(self): return self._graphs.source.get_graph_token_string() def get_to_graph_tokens(self): return self._graphs.target.get_graph_token_string() def get_graph_pair_counter(self): # counting starts from 1 return (self._graph_pair_index + 1, len(self._corpus)) # ------------------------------------------------------------------------------ # Nodes # ------------------------------------------------------------------------------ def co_node_selection_mode(self, state=False): self._co_node_selection = state def set_from_node(self, node=None): self._nodes.source = node if self._co_node_selection: self._nodes.target = self.get_aligned_to_node() send(self.set_from_node, "newNodeSelect.viz") send(self.set_from_node, "newNodeSelect.gui") def set_to_node(self, node=None): self._nodes.target = node if self._co_node_selection: self._nodes.source = self.get_aligned_from_node() send(self.set_to_node, "newNodeSelect.viz") send(self.set_to_node, "newNodeSelect.gui") def get_from_node(self): return self._nodes.source def get_to_node(self): return self._nodes.target def nodes_are_selected(self): return all(self._nodes) def get_from_node_tokens(self): return ( self._graphs.source.get_node_token_string(self._nodes.source) or "" ) def get_to_node_tokens(self): return ( self._graphs.target.get_node_token_string(self._nodes.target) or "" ) # ------------------------------------------------------------------------------ # Alignment # ------------------------------------------------------------------------------ def get_relation_set(self): try: return [self._no_relation] + self._corpus.get_relations() except TypeError: return [self._no_relation] def get_node_pair_relation(self): return self._graph_pair.get_align(self._nodes) or self._no_relation def set_node_pair_relation(self, relation): if self.nodes_are_selected(): if relation != self._no_relation: self._graph_pair.add_align(self._nodes, relation) else: self._graph_pair.del_align(self._nodes) self._changed = True send(self.set_node_pair_relation, "newRelation.viz") send(self.set_node_pair_relation, "newRelation.gui") def get_aligned_to_node(self): """ Get 'to' node aligned to the selected 'from' node """ return self._graph_pair.get_aligned_target_node(self._nodes.source) def get_aligned_from_node(self): """ Get 'from' node aligned to the selected 'to' node """ return self._graph_pair.get_aligned_source_node(self._nodes.target) def get_auto_fold_equal_nodes(self): """ Get lists of non-terminal 'from' and 'to' nodes aligned with an 'equals' relation """ # ignoring terminals, so the list may be of unequal size from_nodes = [] to_nodes = [] for (nodes, rel) in self._graph_pair.alignments_iter(): if rel == "equals": if self._graphs.source.node_is_non_terminal(nodes.source): from_nodes.append(nodes.source) if self._graphs.target.node_is_non_terminal(nodes.target): to_nodes.append(nodes.target) return from_nodes, to_nodes #------------------------------------------------------------------------------ # Comments #------------------------------------------------------------------------------ def get_comment(self): try: return self._graph_pair.get_meta_data().find("comment").text except AttributeError: return "" def set_comment(self, text): meta_data_elem = self._graph_pair.get_meta_data() comment_elem = meta_data_elem.find("comment") if text.strip(): if comment_elem is None: comment_elem = SubElement(meta_data_elem, "comment") comment_elem.text = text elif comment_elem: meta_data_elem.remove(comment_elem) self._changed = True
class Test_ParallelGraphCorpus(unittest.TestCase): def setUp(self): self.pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc") def test__init(self): """ init from another corpus """ ParallelGraphCorpus(self.pgc1, self.pgc1.get_relations()) def test__add__(self): """ corpus + other """ pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc") pgc3 = self.pgc1 + pgc2 self.assertEqual(len(pgc3), len(self.pgc1) + len(pgc2)) def test__deepcopy__(self): """ copy.deepcopy(corpus) """ pgc2 = copy.deepcopy(self.pgc1) self.assertTrue(isinstance(pgc2, ParallelGraphCorpus)) self.assertFalse(self.pgc1._relations is pgc2._relations) self.assertFalse(self.pgc1._meta_data is pgc2._meta_data) for gp1, gp2 in zip(self.pgc1, pgc2): self.assertFalse(gp1 is gp2) # however, graphbanks and graphs are still shared self.assertTrue(gp1._banks is gp2._banks) self.assertTrue(gp1._graphs is gp2._graphs) def test__delitem__(self): """ del corpus[1] """ pg = self.pgc1[0] del self.pgc1[0] self.assertFalse(pg in self.pgc1) def test__delslice__(self): """ del [:1] """ pg = self.pgc1[0] del self.pgc1[:1] self.assertFalse(pg in self.pgc1) del self.pgc1[:] self.assertEqual(len(self.pgc1), 0) def test__eq__(self): self.assertEqual(self.pgc1, self.pgc1) pgc2 = self.pgc1[:] self.assertEqual(self.pgc1, pgc2) pgc2 = copy.deepcopy(self.pgc1) self.assertEqual(self.pgc1, pgc2) def test__getitem__(self): self.assertTrue(isinstance(self.pgc1[0], GraphPair)) def test__getslice__(self): # or shallow copy pgc2 = self.pgc1[1:1:1] self.assertTrue(isinstance(pgc2, ParallelGraphCorpus)) self.assertTrue(self.pgc1._relations is pgc2._relations) self.assertTrue(self.pgc1._meta_data is pgc2._meta_data) for gp1, gp2 in zip(self.pgc1, pgc2): self.assertTrue(gp1 is gp2) def test__iadd__(self): self.pgc1 += self.pgc1 self.assertEquals(len(self.pgc1), 6) pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc") pgc2 += self.pgc1 self.assertEquals(len(pgc2), 9) def test__repr__(self): self.assertTrue(repr(self.pgc1)) def test__str__(self): self.assertTrue(str(self.pgc1)) def test__setitem__(self): self.pgc1[0] = self.pgc1[-1] self.assertEqual(self.pgc1[0], self.pgc1[-1]) self.assertRaises(TypeError, ParallelGraphCorpus.__setitem__, self.pgc1, 1) def test__setslice__(self): pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc") self.pgc1[-1:] = pgc2[:2] self.assertEqual(len(self.pgc1), 4) self.assertRaises(TypeError, ParallelGraphCorpus.__setslice__, self.pgc1, 1, 1, ["x"]) def test_append(self): pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc") self.pgc1.append(pgc2[2]) self.assertEqual(len(self.pgc1), 4) self.assertRaises(TypeError, ParallelGraphCorpus.__setslice__, self.pgc1, 1, 1, ["x"]) def test_clear(self): self.pgc1.clear() self.assertFalse(self.pgc1) self.assertTrue(isinstance(self.pgc1, ParallelGraphCorpus)) def test_extend(self): pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc") self.pgc1.extend(iter(pgc2)) self.assertEqual(len(self.pgc1), 6) def test_purge(self): # adding graph pairs with identical graphbanks pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc") pgc1 += pgc1 graphbanks_before = pgc1._graphbanks() self.assertEqual(len(graphbanks_before), 2) pgc1.purge() graphbanks_after = pgc1._graphbanks() self.assertEqual(graphbanks_before, graphbanks_after) # adding graph pairs with equal graphbanks pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc") pgc2 = ParallelGraphCorpus(inf="data/corpus-1.pgc") pgc1 += pgc2 graphbanks_before = pgc1._graphbanks() self.assertEqual(len(graphbanks_before), 4) pgc1.purge() graphbanks_after = pgc1._graphbanks() self.assertEqual(len(graphbanks_after), 2) # adding graph pairs with different graphbanks pgc1 = ParallelGraphCorpus(inf="data/corpus-1.pgc") pgc2 = ParallelGraphCorpus(inf="data/corpus-2.pgc") pgc1 += pgc2 graphbanks_before = pgc1._graphbanks() self.assertEqual(len(graphbanks_before), 4) pgc1.purge() graphbanks_after = pgc1._graphbanks() self.assertEqual(graphbanks_before, graphbanks_after) # removing graphpairs and thus dependencies on graphbanks del pgc1[:] graphbanks = pgc1._graphbanks() self.assertEqual(len(graphbanks), 0) def test__graph_banks(self): graphbanks = self.pgc1._graphbanks() self.assertEqual(len(graphbanks), 2) for gb in graphbanks: self.assertTrue(isinstance(gb, GraphBank)) def test_annotator(self): self.assertFalse(self.pgc1.get_annotator()) self.pgc1.set_annotator("AA") self.assertEqual(self.pgc1.get_annotator(), "AA") self.pgc1.set_annotator("BB") self.assertEqual(self.pgc1.get_annotator(), "AA + BB") self.pgc1.set_annotator("CC", append=False) self.assertEqual(self.pgc1.get_annotator(), "CC")
class Aligner(object): """ the Algraeph application model """ def __init__(self): self._corpus = ParallelGraphCorpus() # the domain model self._changed = False self._filename = None self._graph_pair = None self._graph_pair_index = None self._graphs = Pair(None, None) self._nodes = Pair(None, None) # the special relation which stands for "no relation" self._no_relation = "none" self._co_node_selection = False # ------------------------------------------------------------------------------ # Corpus # ------------------------------------------------------------------------------ def open_corpus(self, filename): send(self.open_corpus, "statusDescription", "Loading corpus %s ..." % filename) # May raise errors such IOErrors, not an xml file, corrupt format, etc. # Use of relax_gb_paths allows graphbank files to be located in the # same direcory as the corpus file instead of the location specified # in the <file> element corpus = ParallelGraphCorpus() corpus.read(inf=filename, relax_gb_paths=True) if not corpus: raise AlgraephException( "Parallel graph corpus contains no alignments") self._corpus = corpus self._filename = filename self._changed = False send(self.open_corpus, "statusDescription") send(self.open_corpus, "newCorpus") send(self.open_corpus, "newCorpusName") self.goto_graph_pair(0) # implies send("newGraphPair"), and sets self._graph_pair, # self._graph_pair_index, self._graphs and self._nodes def save_corpus(self, filename=None): if filename: self._filename = filename send(self.save_corpus, "newCorpusName") send(self.save_corpus, "statusDescription", "Saving corpus %s ..." % self._filename) self._corpus.write(self._filename, pprint=True) self._changed = False send(self.save_corpus, "statusDescription") def get_corpus_len(self): return len(self._corpus) def get_corpus_filename(self): return self._filename def get_corpus_dir(self): try: return dirname(self._filename) except (AttributeError, TypeError): return None def corpus_changed(self): """ returns True if the corpus has unsaved changes """ return self._changed # ------------------------------------------------------------------------------ # Treebanks # ------------------------------------------------------------------------------ def get_graphbanks_format(self): # The ParallelGraphCorpus class in principle supports graphbanks in # different formats, although untested for the time being. Formats are # therefore stored as a property of the graphbanks, but there is no # global format defined as a property of the corpus. So getting "the # graphbanks format" is not straightforward. We will make the # assumption that all graphbanks are in the same format, and there it # is sufficient to look at any graphbank linked to an arbitary graph # pair. return self._corpus[0].get_source_bank().get_format() # ------------------------------------------------------------------------------ # Graphs (GraphPair and DaesoGraph) # ------------------------------------------------------------------------------ def get_graph_pair(self): return self._graph_pair def goto_prev_graph_pair(self): self.goto_graph_pair(self._graph_pair_index - 1) def goto_next_graph_pair(self): self.goto_graph_pair(self._graph_pair_index + 1) def goto_graph_pair(self, index): # don't use try-except here, because negative index is allowed for list if 0 <= index < len(self._corpus): self._graph_pair = self._corpus[index] self._graph_pair_index = index self._graphs = self._graph_pair.get_graphs() self._nodes = Pair(None, None) send(self.goto_graph_pair, "newGraphPair.viz") send(self.goto_graph_pair, "newGraphPair.gui") def get_from_graph(self): return self._graphs.source def get_to_graph(self): return self._graphs.target def get_from_graph_tokens(self): return self._graphs.source.get_graph_token_string() def get_to_graph_tokens(self): return self._graphs.target.get_graph_token_string() def get_graph_pair_counter(self): # counting starts from 1 return (self._graph_pair_index + 1, len(self._corpus)) # ------------------------------------------------------------------------------ # Nodes # ------------------------------------------------------------------------------ def co_node_selection_mode(self, state=False): self._co_node_selection = state def set_from_node(self, node=None): self._nodes.source = node if self._co_node_selection: self._nodes.target = self.get_aligned_to_node() send(self.set_from_node, "newNodeSelect.viz") send(self.set_from_node, "newNodeSelect.gui") def set_to_node(self, node=None): self._nodes.target = node if self._co_node_selection: self._nodes.source = self.get_aligned_from_node() send(self.set_to_node, "newNodeSelect.viz") send(self.set_to_node, "newNodeSelect.gui") def get_from_node(self): return self._nodes.source def get_to_node(self): return self._nodes.target def nodes_are_selected(self): return all(self._nodes) def get_from_node_tokens(self): return (self._graphs.source.get_node_token_string(self._nodes.source) or "") def get_to_node_tokens(self): return (self._graphs.target.get_node_token_string(self._nodes.target) or "") # ------------------------------------------------------------------------------ # Alignment # ------------------------------------------------------------------------------ def get_relation_set(self): try: return [self._no_relation] + self._corpus.get_relations() except TypeError: return [self._no_relation] def get_node_pair_relation(self): return self._graph_pair.get_align(self._nodes) or self._no_relation def set_node_pair_relation(self, relation): if self.nodes_are_selected(): if relation != self._no_relation: self._graph_pair.add_align(self._nodes, relation) else: self._graph_pair.del_align(self._nodes) self._changed = True send(self.set_node_pair_relation, "newRelation.viz") send(self.set_node_pair_relation, "newRelation.gui") def get_aligned_to_node(self): """ Get 'to' node aligned to the selected 'from' node """ return self._graph_pair.get_aligned_target_node(self._nodes.source) def get_aligned_from_node(self): """ Get 'from' node aligned to the selected 'to' node """ return self._graph_pair.get_aligned_source_node(self._nodes.target) def get_auto_fold_equal_nodes(self): """ Get lists of non-terminal 'from' and 'to' nodes aligned with an 'equals' relation """ # ignoring terminals, so the list may be of unequal size from_nodes = [] to_nodes = [] for (nodes, rel) in self._graph_pair.alignments_iter(): if rel == "equals": if self._graphs.source.node_is_non_terminal(nodes.source): from_nodes.append(nodes.source) if self._graphs.target.node_is_non_terminal(nodes.target): to_nodes.append(nodes.target) return from_nodes, to_nodes #------------------------------------------------------------------------------ # Comments #------------------------------------------------------------------------------ def get_comment(self): try: return self._graph_pair.get_meta_data().find("comment").text except AttributeError: return "" def set_comment(self, text): meta_data_elem = self._graph_pair.get_meta_data() comment_elem = meta_data_elem.find("comment") if text.strip(): if comment_elem is None: comment_elem = SubElement(meta_data_elem, "comment") comment_elem.text = text elif comment_elem: meta_data_elem.remove(comment_elem) self._changed = True