def test_build_arrg_graph_rst_conceptnnet_io(self): self._setup_link_parse_tree_189() aspects_per_edu = [ (559, [u'phone']), # test added manually (560, [u'apple']), (561, []), (562, [u'store clerk', u'apple', u'teenager', u'advice']), (563, []), ] aspects_graph_builder = Aspect2AspectGraph(aspects_per_edu) rules_extractor = EDUTreeRulesExtractor() rules = rules_extractor.extract(self.link_tree, [559, 560, 562], 1) documents_info = { 189: { 'EDUs': [559, 560, 561, 562, 563], 'accepted_edus': [559, 560, 561, 562, 563], 'aspect_concepts': { 189: { 'conceptnet_io': { u'apple': [ { 'end': u'apple', 'end-lang': u'en', 'relation': u'IsA', 'start': u'object', 'start-lang': u'en', 'weight': 2.8284271 }, { 'end': u'stuff', 'end-lang': u'en', 'relation': u'Synonym', 'start': u'apple', 'start-lang': u'en', 'weight': 2.8284271 }, ] } } }, 'sentiment': { 559: 1, 560: -1, 5562: 1 }, # the rest of document info is skipped } } graph, page_rank = aspects_graph_builder.build(rules, documents_info, True) self.assertEqual(len(rules), 1) self.assertGreaterEqual(len(graph.nodes()), 4) self.assertGreaterEqual(len(graph.edges()), 3) attrib = nx.get_edge_attributes(graph, 'relation_type') self.assertEqual( attrib, { (u'object', u'apple', 0): u'IsA', (u'apple', u'stuff', 0): u'Synonym', (u'apple', u'phone', 0): 'Elaboration' })
def test_build_exemplary_arrg_graph_sample_tree_189_multiaspects(self): self._setup_link_parse_tree_189() aspects_per_edu = [ (559, [u'test', u'test2']), # test added manually (560, [u'thing', u'test2']), (561, []), (562, [u'store clerk', u'apple', u'teenager', u'advice']), (563, []) ] aspects_graph_builder = Aspect2AspectGraph( aspects_per_edu, with_cycles_between_aspects=True) rules_extractor = EDUTreeRulesExtractor() rules = rules_extractor.extract(self.link_tree, [559, 560, 562], 1) graph, page_rank = aspects_graph_builder.build(rules, conceptnet_io=False) self.assertEqual(len(rules), 1) self.assertEqual(len(graph.nodes()), 3) self.assertEqual(len(graph.edges()), 4) attrib = nx.get_edge_attributes(graph, 'relation_type') self.assertEqual( attrib, { (u'thing', u'test', 0): 'Elaboration', (u'test2', u'test2', 0): 'Elaboration', (u'test2', u'test', 0): 'Elaboration', (u'thing', u'test2', 0): 'Elaboration' })
def test_multi_aspects_per_edu(self): self._multi_aspect_per_edu_tree() rules_extractor = EDUTreeRulesExtractor( only_hierarchical_relations=False) rules = rules_extractor.extract(self.discourse_tree, [559, 560, 561, 562, 563], doc_id=1) expected_rules = { 1: [ EDURelation(edu1=560, edu2=559, relation_type='Elaboration', gerani=0.63), EDURelation(edu1=561, edu2=559, relation_type='Elaboration', gerani=0.53), EDURelation(edu1=562, edu2=559, relation_type='same-unit', gerani=0.3), EDURelation(edu1=563, edu2=559, relation_type='same-unit', gerani=0.2), EDURelation(edu1=561, edu2=560, relation_type='Elaboration', gerani=0.75), EDURelation(edu1=562, edu2=560, relation_type='same-unit', gerani=0.4), EDURelation(edu1=563, edu2=560, relation_type='same-unit', gerani=0.3), EDURelation(edu1=562, edu2=561, relation_type='same-unit', gerani=0.5), EDURelation(edu1=563, edu2=561, relation_type='same-unit', gerani=0.4), EDURelation(edu1=563, edu2=562, relation_type='Elaboration', gerani=0.75) ] } self.assertEqual(rules, expected_rules)
def test_tree_parsing_and_get_rules_all(self): rules_extractor = EDUTreeRulesExtractor( weight_type=['gerani'], only_hierarchical_relations=False) rules = rules_extractor.extract(self.discourse_tree, [513, 514, 515, 516, 517], 1) expected_rules = { 1: [ EDURelation(edu1=514, edu2=513, relation_type='Elaboration', gerani=0.8), EDURelation(edu1=515, edu2=513, relation_type='same-unit', gerani=0.42), EDURelation(edu1=516, edu2=513, relation_type='same-unit', gerani=0.33), EDURelation(edu1=517, edu2=513, relation_type='same-unit', gerani=0.25), EDURelation(edu1=515, edu2=514, relation_type='same-unit', gerani=0.5), EDURelation(edu1=516, edu2=514, relation_type='same-unit', gerani=0.42), EDURelation(edu1=517, edu2=514, relation_type='same-unit', gerani=0.33), EDURelation(edu1=516, edu2=515, relation_type='Elaboration', gerani=0.6), EDURelation(edu1=517, edu2=515, relation_type='Elaboration', gerani=0.52), EDURelation(edu1=517, edu2=516, relation_type='Joint', gerani=0.7) ] } self.assertEqual(rules, expected_rules)
def test_bfs_for_several_aspects_in_one_edu(self): rules_extractor = EDUTreeRulesExtractor( weight_type=['gerani'], only_hierarchical_relations=True) rules = rules_extractor.extract(self.discourse_tree, [513, 514, 515, 516, 517], 1) expected_rules = { 1: [ EDURelation(edu1=514, edu2=513, relation_type='Elaboration', gerani=0.8), EDURelation(edu1=516, edu2=515, relation_type='Elaboration', gerani=0.6), EDURelation(edu1=517, edu2=515, relation_type='Elaboration', gerani=0.52) ] } self.assertEqual(rules, expected_rules)
def extract_rules(discourse_tree: nltk.Tree) -> List: rules_extractor = EDUTreeRulesExtractor(tree=discourse_tree) return rules_extractor.extract()
class AspectExtractionTest(unittest.TestCase): def _load_tree(self, discourse_tree: str) -> Tree: return Tree.fromstring( discourse_tree, leaf_pattern=settings.DISCOURSE_TREE_LEAF_PATTERN, remove_empty_top_bracketing=True) def _with_simple_discourse_tree(self): self.discourse_tree = self._load_tree( settings.SAMPLE_TREE_177.open('r').read()) def _with_celery_text_discourse_tree(self): self.discourse_tree = self._load_tree( settings.SAMPLE_TREE_1.open('r').read()) def _multi_aspect_per_edu_tree(self): self.discourse_tree = self._load_tree( settings.SAMPLE_TREE_189.open('r').read()) def test_load_serialized_tree(self): self._with_simple_discourse_tree() self.rules_extractor = EDUTreeRulesExtractor( self.discourse_tree, only_hierarchical_relations=True) rules = self.rules_extractor.extract() self.assertEqual(len(rules), 5) def test_extract_rules_from_simple_celery_discourse_tree_hierarchical_only_relations( self): self._with_celery_text_discourse_tree() self.rules_extractor = EDUTreeRulesExtractor( self.discourse_tree, only_hierarchical_relations=False) rules = self.rules_extractor.extract() self.assertEqual(len(rules), 6) def test_tree_parsing_and_get_rules_hierarchical(self): rules_extractor = EDUTreeRulesExtractor( weight_type=['gerani'], only_hierarchical_relations=True) rules = rules_extractor.extract(self.discourse_tree, [513, 514, 515, 516, 517], 1) expected_rules = { 1: [ EDURelation(edu1=514, edu2=513, relation_type='Elaboration', gerani=0.8), EDURelation(edu1=516, edu2=515, relation_type='Elaboration', gerani=0.6), EDURelation(edu1=517, edu2=515, relation_type='Elaboration', gerani=0.52) ] } self.assertEqual(rules, expected_rules) def test_tree_parsing_and_get_rules_all(self): rules_extractor = EDUTreeRulesExtractor( weight_type=['gerani'], only_hierarchical_relations=False) rules = rules_extractor.extract(self.discourse_tree, [513, 514, 515, 516, 517], 1) expected_rules = { 1: [ EDURelation(edu1=514, edu2=513, relation_type='Elaboration', gerani=0.8), EDURelation(edu1=515, edu2=513, relation_type='same-unit', gerani=0.42), EDURelation(edu1=516, edu2=513, relation_type='same-unit', gerani=0.33), EDURelation(edu1=517, edu2=513, relation_type='same-unit', gerani=0.25), EDURelation(edu1=515, edu2=514, relation_type='same-unit', gerani=0.5), EDURelation(edu1=516, edu2=514, relation_type='same-unit', gerani=0.42), EDURelation(edu1=517, edu2=514, relation_type='same-unit', gerani=0.33), EDURelation(edu1=516, edu2=515, relation_type='Elaboration', gerani=0.6), EDURelation(edu1=517, edu2=515, relation_type='Elaboration', gerani=0.52), EDURelation(edu1=517, edu2=516, relation_type='Joint', gerani=0.7) ] } self.assertEqual(rules, expected_rules) def test_get_nucleus_and_satellite(self): nucleus_satellite_pairs = { 'same-unit[N][N]': ('same-unit', 'N', 'N'), 'Elaboration[N][S]': ('Elaboration', 'N', 'S'), 'Joint[N][N]': ('Joint', 'N', 'N') } for rel, ns in nucleus_satellite_pairs.iteritems(): self.assertEqual( self.rules_extractor.get_nucleus_satellite_and_relation_type( rel), ns) def test_check_if_hierarchical_rst_relation(self): to_check_hierarchicality = { ('N', 'N'): False, ('N', 'S'): True, ('S', 'N'): True, } for (rel_1, rel_2), expected in to_check_hierarchicality.iteritems(): self.assertEqual( self.rules_extractor.check_hierarchical_rst_relation( rel_1, rel_2), expected) def test_bfs_for_several_aspects_in_one_edu(self): rules_extractor = EDUTreeRulesExtractor( weight_type=['gerani'], only_hierarchical_relations=True) rules = rules_extractor.extract(self.discourse_tree, [513, 514, 515, 516, 517], 1) expected_rules = { 1: [ EDURelation(edu1=514, edu2=513, relation_type='Elaboration', gerani=0.8), EDURelation(edu1=516, edu2=515, relation_type='Elaboration', gerani=0.6), EDURelation(edu1=517, edu2=515, relation_type='Elaboration', gerani=0.52) ] } self.assertEqual(rules, expected_rules) def test_multi_aspects_per_edu(self): self._multi_aspect_per_edu_tree() rules_extractor = EDUTreeRulesExtractor( only_hierarchical_relations=False) rules = rules_extractor.extract(self.discourse_tree, [559, 560, 561, 562, 563], doc_id=1) expected_rules = { 1: [ EDURelation(edu1=560, edu2=559, relation_type='Elaboration', gerani=0.63), EDURelation(edu1=561, edu2=559, relation_type='Elaboration', gerani=0.53), EDURelation(edu1=562, edu2=559, relation_type='same-unit', gerani=0.3), EDURelation(edu1=563, edu2=559, relation_type='same-unit', gerani=0.2), EDURelation(edu1=561, edu2=560, relation_type='Elaboration', gerani=0.75), EDURelation(edu1=562, edu2=560, relation_type='same-unit', gerani=0.4), EDURelation(edu1=563, edu2=560, relation_type='same-unit', gerani=0.3), EDURelation(edu1=562, edu2=561, relation_type='same-unit', gerani=0.5), EDURelation(edu1=563, edu2=561, relation_type='same-unit', gerani=0.4), EDURelation(edu1=563, edu2=562, relation_type='Elaboration', gerani=0.75) ] } self.assertEqual(rules, expected_rules)