def test_labeled_nodes(self): ''' Test labeled nodes. Test case from Emily M. Bender. ''' search = ''' # macros @ SBJ /SBJ/; @ VP /VP/; @ VB /VB/; @ VPoB /V[PB]/; @ OBJ /OBJ/; # 1 svo S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v''' sent1 = ParentedTree.fromstring( '(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))') sent2 = ParentedTree.fromstring( '(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))') search_firsthalf = (search.split('\n\n')[0] + 'S < @SBJ < (@VP < (@VB $.. @OBJ))') search_rewrite = 'S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))' self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent1]))[0]) self.assertTrue(list(tgrep.tgrep_positions(search, [sent1]))[0]) self.assertTrue(list(tgrep.tgrep_positions(search_rewrite, [sent1]))[0]) self.assertEqual(list(tgrep.tgrep_positions(search, [sent1])), list(tgrep.tgrep_positions(search_rewrite, [sent1]))) self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent2]))[0]) self.assertFalse(list(tgrep.tgrep_positions(search, [sent2]))[0]) self.assertFalse(list(tgrep.tgrep_positions(search_rewrite, [sent2]))[0]) self.assertEqual(list(tgrep.tgrep_positions(search, [sent2])), list(tgrep.tgrep_positions(search_rewrite, [sent2])))
def test_node_nocase(self): ''' Test selecting nodes using case insensitive node names. ''' tree = ParentedTree.fromstring('(S (n x) (N x))') self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('i@"N"', [tree])), [[(0,), (1,)]])
def test_use_macros(self): ''' Test defining and using tgrep2 macros. ''' tree = ParentedTree.fromstring( '(VP (VB sold) (NP (DET the) ' '(NN heiress)) (NP (NN deed) (PREP to) ' '(NP (DET the) (NN school) (NN house))))' ) self.assertEqual( list( tgrep.tgrep_positions( '@ NP /^NP/;\n@ NN /^NN/;\n@NP !< @NP !$.. @NN', [tree] ) ), [[(1,), (2, 2)]], ) # use undefined macro @CNP self.assertRaises( tgrep.TgrepException, list, tgrep.tgrep_positions( '@ NP /^NP/;\n@ NN /^NN/;\n@CNP !< @NP !$.. @NN', [tree] ), )
def test_node_printing(self): '''Test that the tgrep print operator ' is properly ignored.''' tree = ParentedTree.fromstring('(S (n x) (N x))') self.assertEqual(list(tgrep.tgrep_positions('N', [tree])), list(tgrep.tgrep_positions('\'N', [tree]))) self.assertEqual(list(tgrep.tgrep_positions('/[Nn]/', [tree])), list(tgrep.tgrep_positions('\'/[Nn]/', [tree])))
def test_node_noleaves(self): ''' Test node name matching with the search_leaves flag set to False. ''' tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))') self.assertEqual(list(tgrep.tgrep_positions('x', [tree])), [[(0, 0, 0), (1, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('x', [tree], False)), [[]])
def test_node_quoted(self): ''' Test selecting nodes using quoted node names. ''' tree = ParentedTree.fromstring('(N ("N" x) (N" x) ("\\" x))') self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[()]]) self.assertEqual(list(tgrep.tgrep_positions('"\\"N\\""', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('"N\\""', [tree])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('"\\"\\\\\\""', [tree])), [[(2,)]])
def test_rel_sister_nodes(self): ''' Test matching sister nodes in a tree. ''' tree = ParentedTree.fromstring('(S (A x) (B x) (C x))') self.assertEqual(list(tgrep.tgrep_positions('* $. B', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* $.. B', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* $, B', [tree])), [[(2,)]]) self.assertEqual(list(tgrep.tgrep_positions('* $,, B', [tree])), [[(2,)]]) self.assertEqual(list(tgrep.tgrep_positions('* $ B', [tree])), [[(0,), (2,)]])
def test_rel_sister_nodes(self): """ Test matching sister nodes in a tree. """ tree = ParentedTree.fromstring("(S (A x) (B x) (C x))") self.assertEqual(list(tgrep.tgrep_positions("* $. B", [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions("* $.. B", [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions("* $, B", [tree])), [[(2,)]]) self.assertEqual(list(tgrep.tgrep_positions("* $,, B", [tree])), [[(2,)]]) self.assertEqual(list(tgrep.tgrep_positions("* $ B", [tree])), [[(0,), (2,)]])
def test_node_regex_2(self): ''' Test regex matching on nodes. ''' tree = ParentedTree.fromstring('(S (SBJ x) (SBJ1 x) (NP-SBJ x))') self.assertEqual(list(tgrep.tgrep_positions('/^SBJ/', [tree])), [[(0,), (1,)]]) # This is a regular expression that matches any node whose # name includes SBJ, including NP-SBJ: self.assertEqual(list(tgrep.tgrep_positions('/SBJ/', [tree])), [[(0,), (1,), (2,)]])
def test_node_regex_2(self): ''' Test regex matching on nodes. ''' tree = ParentedTree.fromstring('(S (SBJ x) (SBJ1 x) (NP-SBJ x))') self.assertEqual(list(tgrep.tgrep_positions('/^SBJ/', [tree])), [[(0, ), (1, )]]) # This is a regular expression that matches any node whose # name includes SBJ, including NP-SBJ: self.assertEqual(list(tgrep.tgrep_positions('/SBJ/', [tree])), [[(0, ), (1, ), (2, )]])
def test_node_printing(self): """Test that the tgrep print operator ' is properly ignored.""" tree = ParentedTree.fromstring("(S (n x) (N x))") self.assertEqual( list(tgrep.tgrep_positions("N", [tree])), list(tgrep.tgrep_positions("'N", [tree])), ) self.assertEqual( list(tgrep.tgrep_positions("/[Nn]/", [tree])), list(tgrep.tgrep_positions("'/[Nn]/", [tree])), )
def test_trailing_semicolon(self): """ Test that semicolons at the end of a tgrep2 search string won't cause a parse failure. """ tree = ParentedTree.fromstring("(S (NP (DT the) (JJ big) (NN dog)) " "(VP bit) (NP (DT a) (NN cat)))") self.assertEqual(list(tgrep.tgrep_positions("NN", [tree])), [[(0, 2), (2, 1)]]) self.assertEqual(list(tgrep.tgrep_positions("NN;", [tree])), [[(0, 2), (2, 1)]]) self.assertEqual(list(tgrep.tgrep_positions("NN;;", [tree])), [[(0, 2), (2, 1)]])
def test_trailing_semicolon(self): ''' Test that semicolons at the end of a tgrep2 search string won't cause a parse failure. ''' tree = ParentedTree.fromstring( '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))' ) self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]]) self.assertEqual(list(tgrep.tgrep_positions('NN;', [tree])), [[(0, 2), (2, 1)]]) self.assertEqual( list(tgrep.tgrep_positions('NN;;', [tree])), [[(0, 2), (2, 1)]] )
def test_trailing_semicolon(self): ''' Test that semicolons at the end of a tgrep2 search string won't cause a parse failure. ''' tree = ParentedTree.fromstring('(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))') self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]]) self.assertEqual(list(tgrep.tgrep_positions('NN;', [tree])), [[(0, 2), (2, 1)]]) self.assertEqual(list(tgrep.tgrep_positions('NN;;', [tree])), [[(0, 2), (2, 1)]])
def test_multiple_conjs(self): ''' Test that multiple (3 or more) conjunctions of node relations are handled properly. ''' sent = ParentedTree.fromstring( '((A (B b) (C c)) (A (B b) (C c) (D d)))') # search = '(A < B < C < D)' # search_tworels = '(A < B < C)' self.assertEqual(list(tgrep.tgrep_positions('(A < B < C < D)', [sent])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('(A < B < C)', [sent])), [[(0,), (1,)]])
def test_multiple_conjs(self): ''' Test that multiple (3 or more) conjunctions of node relations are handled properly. ''' sent = ParentedTree.fromstring( '((A (B b) (C c)) (A (B b) (C c) (D d)))') # search = '(A < B < C < D)' # search_tworels = '(A < B < C)' self.assertEqual( list(tgrep.tgrep_positions('(A < B < C < D)', [sent])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions('(A < B < C)', [sent])), [[(0, ), (1, )]])
def test_node_simple(self): ''' Test a simple use of tgrep for finding nodes matching a given pattern. ''' tree = ParentedTree.fromstring('(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))') self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]]) self.assertEqual(list(tgrep.tgrep_nodes('NN', [tree])), [[tree[0, 2], tree[2, 1]]]) self.assertEqual(list(tgrep.tgrep_positions('NN|JJ', [tree])), [[(0, 1), (0, 2), (2, 1)]])
def test_node_encoding(self): ''' Test that tgrep search strings handles bytes and strs the same way. ''' tree = ParentedTree.fromstring('(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))') self.assertEqual(list(tgrep.tgrep_positions(b('NN'), [tree])), list(tgrep.tgrep_positions('NN', [tree]))) self.assertEqual(list(tgrep.tgrep_nodes(b('NN'), [tree])), list(tgrep.tgrep_nodes('NN', [tree]))) self.assertEqual(list(tgrep.tgrep_positions(b('NN|JJ'), [tree])), list(tgrep.tgrep_positions('NN|JJ', [tree])))
def test_rel_precedence(self): ''' Test matching nodes based on precedence relations. ''' tree = ParentedTree.fromstring('(S (NP (NP (PP x)) (NP (AP x)))' ' (VP (AP (X (PP x)) (Y (AP x))))' ' (NP (RC (NP (AP x)))))') self.assertEqual(list(tgrep.tgrep_positions('* . X', [tree])), [[(0,), (0, 1), (0, 1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* . Y', [tree])), [[(1, 0, 0), (1, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* .. X', [tree])), [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* .. Y', [tree])), [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, 0, 0), (1, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* , X', [tree])), [[(1, 0, 1), (1, 0, 1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* , Y', [tree])), [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* ,, X', [tree])), [[(1, 0, 1), (1, 0, 1, 0), (2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* ,, Y', [tree])), [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]])
def test_rel_precedence(self): ''' Test matching nodes based on precedence relations. ''' tree = ParentedTree.fromstring('(S (NP (NP (PP x)) (NP (AP x)))' ' (VP (AP (X (PP x)) (Y (AP x))))' ' (NP (RC (NP (AP x)))))') self.assertEqual(list(tgrep.tgrep_positions('* . X', [tree])), [[(0, ), (0, 1), (0, 1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* . Y', [tree])), [[(1, 0, 0), (1, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* .. X', [tree])), [[(0, ), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* .. Y', [tree])), [[(0, ), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, 0, 0), (1, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* , X', [tree])), [[(1, 0, 1), (1, 0, 1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* , Y', [tree])), [[(2, ), (2, 0), (2, 0, 0), (2, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* ,, X', [tree])), [[(1, 0, 1), (1, 0, 1, 0), (2, ), (2, 0), (2, 0, 0), (2, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* ,, Y', [tree])), [[(2, ), (2, 0), (2, 0, 0), (2, 0, 0, 0)]])
def test_rel_precedence(self): """ Test matching nodes based on precedence relations. """ tree = ParentedTree.fromstring("(S (NP (NP (PP x)) (NP (AP x)))" " (VP (AP (X (PP x)) (Y (AP x))))" " (NP (RC (NP (AP x)))))") self.assertEqual(list(tgrep.tgrep_positions("* . X", [tree])), [[(0, ), (0, 1), (0, 1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions("* . Y", [tree])), [[(1, 0, 0), (1, 0, 0, 0)]]) self.assertEqual( list(tgrep.tgrep_positions("* .. X", [tree])), [[(0, ), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0)]], ) self.assertEqual( list(tgrep.tgrep_positions("* .. Y", [tree])), [[(0, ), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, 0, 0), (1, 0, 0, 0)]], ) self.assertEqual(list(tgrep.tgrep_positions("* , X", [tree])), [[(1, 0, 1), (1, 0, 1, 0)]]) self.assertEqual( list(tgrep.tgrep_positions("* , Y", [tree])), [[(2, ), (2, 0), (2, 0, 0), (2, 0, 0, 0)]], ) self.assertEqual( list(tgrep.tgrep_positions("* ,, X", [tree])), [[(1, 0, 1), (1, 0, 1, 0), (2, ), (2, 0), (2, 0, 0), (2, 0, 0, 0)]], ) self.assertEqual( list(tgrep.tgrep_positions("* ,, Y", [tree])), [[(2, ), (2, 0), (2, 0, 0), (2, 0, 0, 0)]], )
def test_node_simple(self): ''' Test a simple use of tgrep for finding nodes matching a given pattern. ''' tree = ParentedTree.fromstring( '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))') self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0,2), (2,1)]]) self.assertEqual(list(tgrep.tgrep_nodes('NN', [tree])), [[tree[0,2], tree[2,1]]]) self.assertEqual(list(tgrep.tgrep_positions('NN|JJ', [tree])), [[(0, 1), (0, 2), (2, 1)]])
def test_node_encoding(self): ''' Test that tgrep search strings handles bytes and strs the same way. ''' tree = ParentedTree.fromstring( '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))') self.assertEqual(list(tgrep.tgrep_positions(b('NN'), [tree])), list(tgrep.tgrep_positions('NN', [tree]))) self.assertEqual(list(tgrep.tgrep_nodes(b('NN'), [tree])), list(tgrep.tgrep_nodes('NN', [tree]))) self.assertEqual(list(tgrep.tgrep_positions(b('NN|JJ'), [tree])), list(tgrep.tgrep_positions('NN|JJ', [tree])))
def test_bad_operator(self): ''' Test error handling of undefined tgrep operators. ''' tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))') self.assertRaises(tgrep.TgrepException, list, tgrep.tgrep_positions('* >>> S', [tree]))
def test_bad_operator(self): """ Test error handling of undefined tgrep operators. """ tree = ParentedTree.fromstring("(S (A (T x)) (B (N x)))") self.assertRaises(tgrep.TgrepException, list, tgrep.tgrep_positions("* >>> S", [tree]))
def test_bad_operator(self): ''' Test error handling of undefined tgrep operators. ''' tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))') self.assertRaises( tgrep.TgrepException, list, tgrep.tgrep_positions('* >>> S', [tree]) )
def test_node_regex(self): ''' Test regex matching on nodes. ''' tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))') # This is a regular expression that matches any node whose # name starts with NP, including NP-SBJ: self.assertEqual(list(tgrep.tgrep_positions('/^NP/', [tree])), [[(0,), (1,)]])
def test_node_encoding(self): """ Test that tgrep search strings handles bytes and strs the same way. """ tree = ParentedTree.fromstring("(S (NP (DT the) (JJ big) (NN dog)) " "(VP bit) (NP (DT a) (NN cat)))") self.assertEqual( list(tgrep.tgrep_positions(b"NN", [tree])), list(tgrep.tgrep_positions(b"NN", [tree])), ) self.assertEqual( list(tgrep.tgrep_nodes(b"NN", [tree])), list(tgrep.tgrep_nodes("NN", [tree])), ) self.assertEqual( list(tgrep.tgrep_positions(b"NN|JJ", [tree])), list(tgrep.tgrep_positions("NN|JJ", [tree])), )
def test_comments(self): ''' Test that comments are correctly filtered out of tgrep search strings. ''' tree = ParentedTree.fromstring('(S (NN x) (NP x) (NN x))') search1 = ''' @ NP /^NP/; @ NN /^NN/; @NN ''' self.assertEqual(list(tgrep.tgrep_positions(search1, [tree])), [[(0,), (2,)]]) search2 = ''' # macros @ NP /^NP/; @ NN /^NN/; # search string @NN ''' self.assertEqual(list(tgrep.tgrep_positions(search2, [tree])), [[(0,), (2,)]])
def test_comments(self): """ Test that comments are correctly filtered out of tgrep search strings. """ tree = ParentedTree.fromstring("(S (NN x) (NP x) (NN x))") search1 = """ @ NP /^NP/; @ NN /^NN/; @NN """ self.assertEqual(list(tgrep.tgrep_positions(search1, [tree])), [[(0,), (2,)]]) search2 = """ # macros @ NP /^NP/; @ NN /^NN/; # search string @NN """ self.assertEqual(list(tgrep.tgrep_positions(search2, [tree])), [[(0,), (2,)]])
def test_node_tree_position(self): """ Test matching on nodes based on NLTK tree position. """ tree = ParentedTree.fromstring("(S (NP-SBJ x) (NP x) (NNP x) (VP x))") # test all tree positions that are not leaves leaf_positions = {tree.leaf_treeposition(x) for x in range(len(tree.leaves()))} tree_positions = [x for x in tree.treepositions() if x not in leaf_positions] for position in tree_positions: node_id = f"N{position}" tgrep_positions = list(tgrep.tgrep_positions(node_id, [tree])) self.assertEqual(len(tgrep_positions[0]), 1) self.assertEqual(tgrep_positions[0][0], position)
def test_node_tree_position(self): ''' Test matching on nodes based on NLTK tree position. ''' tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))') # test all tree positions that are not leaves leaf_positions = set( tree.leaf_treeposition(x) for x in range(len(tree.leaves())) ) tree_positions = [x for x in tree.treepositions() if x not in leaf_positions] for position in tree_positions: node_id = 'N{0}'.format(position) tgrep_positions = list(tgrep.tgrep_positions(node_id, [tree])) self.assertEqual(len(tgrep_positions[0]), 1) self.assertEqual(tgrep_positions[0][0], position)
def test_node_tree_position(self): ''' Test matching on nodes based on NLTK tree position. ''' tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))') # test all tree positions that are not leaves leaf_positions = set([tree.leaf_treeposition(x) for x in range(len(tree.leaves()))]) tree_positions = [x for x in tree.treepositions() if x not in leaf_positions] for position in tree_positions: node_id = 'N{0}'.format(position) tgrep_positions = list(tgrep.tgrep_positions(node_id, [tree])) self.assertEqual(len(tgrep_positions[0]), 1) self.assertEqual(tgrep_positions[0][0], position)
def tests_rel_dominance(self): ''' Test matching nodes based on dominance relations. ''' tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))') self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* < T > S', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* !< T', [tree])), [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !< T > S', [tree])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('* > A', [tree])), [[(0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* > B', [tree])), [[(1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !> B', [tree])), [[(), (0,), (0, 0), (0, 0, 0), (1,), (1, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !> B >> S', [tree])), [[(0,), (0, 0), (1,)]]) self.assertEqual(list(tgrep.tgrep_positions('* >> S', [tree])), [[(0,), (0, 0), (1,), (1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* >>, S', [tree])), [[(0,), (0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* >>\' S', [tree])), [[(1,), (1, 0)]]) # Known issue: #self.assertEqual(list(tgrep.tgrep_positions('* !>> S', [tree])), # [[()]]) self.assertEqual(list(tgrep.tgrep_positions('* << T', [tree])), [[(), (0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* <<\' T', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* <<1 N', [tree])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('* !<< T', [tree])), [[(0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]]) tree = ParentedTree.fromstring('(S (A (T x)) (B (T x) (N x )))') self.assertEqual(list(tgrep.tgrep_positions('* <: T', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,), (1,)]]) self.assertEqual(list(tgrep.tgrep_positions('* !<: T', [tree])), [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !<: T > S', [tree])), [[(1,)]]) tree = ParentedTree.fromstring('(S (T (A x) (B x)) (T (C x)))') self.assertEqual(list(tgrep.tgrep_positions('* >: T', [tree])), [[(1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !>: T', [tree])), [[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0, 0)]]) tree = ParentedTree.fromstring('(S (A (B (C (D (E (T x))))))' ' (A (B (C (D (E (T x))) (N x)))))') self.assertEqual(list(tgrep.tgrep_positions('* <<: T', [tree])), [[(0,), (0, 0), (0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0, 0), (1, 0, 0, 0), (1, 0, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* >>: A', [tree])), [[(0, 0), (0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0), (1, 0), (1, 0, 0)]])
def test_examples(self): ''' Test the Basic Examples from the TGrep2 manual. ''' tree = ParentedTree.fromstring('(S (NP (AP x)) (NP (PP x)))') # This matches any NP node that immediately dominates a PP: self.assertEqual(list(tgrep.tgrep_positions('NP < PP', [tree])), [[(1, )]]) tree = ParentedTree.fromstring('(S (NP x) (VP x) (NP (PP x)) (VP x))') # This matches an NP that dominates a PP and is immediately # followed by a VP: self.assertEqual(list(tgrep.tgrep_positions('NP << PP . VP', [tree])), [[(2, )]]) tree = ParentedTree.fromstring('(S (NP (AP x)) (NP (PP x)) ' '(NP (DET x) (NN x)) (VP x))') # This matches an NP that dominates a PP or is immediately # followed by a VP: self.assertEqual( list(tgrep.tgrep_positions('NP << PP | . VP', [tree])), [[(1, ), (2, )]]) tree = ParentedTree.fromstring('(S (NP (NP (PP x)) (NP (AP x)))' ' (VP (AP (NP (PP x)) (NP (AP x))))' ' (NP (RC (NP (AP x)))))') # This matches an NP that does not dominate a PP. Also, the NP # must either have a parent that is an NP or be dominated by a # VP: self.assertEqual( list(tgrep.tgrep_positions('NP !<< PP [> NP | >> VP]', [tree])), [[(0, 1), (1, 0, 1)]]) tree = ParentedTree.fromstring('(S (NP (AP (PP x) (VP x))) ' '(NP (AP (PP x) (NP x))) (NP x))') # This matches an NP that dominates a PP which itself is # immediately followed by a VP. Note the use of parentheses to # group ". VP" with the PP rather than with the NP: self.assertEqual( list(tgrep.tgrep_positions('NP << (PP . VP)', [tree])), [[(0, )]]) tree = ParentedTree.fromstring( '(S (NP (DET a) (NN cat) (PP (IN on) (NP x)))' ' (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x))' ' (NP x))') # This matches an NP whose last child is a PP that begins with # the preposition "on": self.assertEqual( list(tgrep.tgrep_positions('NP <\' (PP <, (IN < on))', [tree])), [[(0, )]]) tree = ParentedTree.fromstring( '(S (S (C x) (A (B x))) (S (C x) (A x)) ' '(S (D x) (A (B x))))') # The following pattern matches an S which has a child A and # another child that is a C and that the A has a child B: self.assertEqual( list(tgrep.tgrep_positions('S < (A < B) < C', [tree])), [[(0, )]]) tree = ParentedTree.fromstring( '(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))') # However, this pattern means that S has child A and that A # has children B and C: self.assertEqual( list(tgrep.tgrep_positions('S < ((A < B) < C)', [tree])), [[(0, )]]) # It is equivalent to this: self.assertEqual( list(tgrep.tgrep_positions('S < (A < B < C)', [tree])), [[(0, )]])
def tests_rel_indexed_children(self): ''' Test matching nodes based on their index in their parent node. ''' tree = ParentedTree.fromstring('(S (A x) (B x) (C x))') self.assertEqual(list(tgrep.tgrep_positions('* >, S', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* >1 S', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* >2 S', [tree])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('* >3 S', [tree])), [[(2,)]]) self.assertEqual(list(tgrep.tgrep_positions('* >\' S', [tree])), [[(2,)]]) self.assertEqual(list(tgrep.tgrep_positions('* >-1 S', [tree])), [[(2,)]]) self.assertEqual(list(tgrep.tgrep_positions('* >-2 S', [tree])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('* >-3 S', [tree])), [[(0,)]]) tree = ParentedTree.fromstring( '(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) ' '(F (C x) (A x) (B x)))') self.assertEqual(list(tgrep.tgrep_positions('* <, A', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* <1 A', [tree])), [[(0,)]]) self.assertEqual(list(tgrep.tgrep_positions('* <2 A', [tree])), [[(2,)]]) self.assertEqual(list(tgrep.tgrep_positions('* <3 A', [tree])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('* <\' A', [tree])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('* <-1 A', [tree])), [[(1,)]]) self.assertEqual(list(tgrep.tgrep_positions('* <-2 A', [tree])), [[(2,)]]) self.assertEqual(list(tgrep.tgrep_positions('* <-3 A', [tree])), [[(0,)]])
def tests_rel_indexed_children(self): ''' Test matching nodes based on their index in their parent node. ''' tree = ParentedTree.fromstring('(S (A x) (B x) (C x))') self.assertEqual(list(tgrep.tgrep_positions('* >, S', [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions('* >1 S', [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions('* >2 S', [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions('* >3 S', [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions('* >\' S', [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions('* >-1 S', [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions('* >-2 S', [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions('* >-3 S', [tree])), [[(0, )]]) tree = ParentedTree.fromstring( '(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) ' '(F (C x) (A x) (B x)))') self.assertEqual(list(tgrep.tgrep_positions('* <, A', [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions('* <1 A', [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions('* <2 A', [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions('* <3 A', [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions('* <\' A', [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions('* <-1 A', [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions('* <-2 A', [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions('* <-3 A', [tree])), [[(0, )]])
def test_examples(self): ''' Test the Basic Examples from the TGrep2 manual. ''' tree = ParentedTree.fromstring('(S (NP (AP x)) (NP (PP x)))') # This matches any NP node that immediately dominates a PP: self.assertEqual(list(tgrep.tgrep_positions('NP < PP', [tree])), [[(1,)]]) tree = ParentedTree.fromstring('(S (NP x) (VP x) (NP (PP x)) (VP x))') # This matches an NP that dominates a PP and is immediately # followed by a VP: self.assertEqual(list(tgrep.tgrep_positions('NP << PP . VP', [tree])), [[(2,)]]) tree = ParentedTree.fromstring('(S (NP (AP x)) (NP (PP x)) ' '(NP (DET x) (NN x)) (VP x))') # This matches an NP that dominates a PP or is immediately # followed by a VP: self.assertEqual(list(tgrep.tgrep_positions('NP << PP | . VP', [tree])), [[(1,), (2,)]]) tree = ParentedTree.fromstring('(S (NP (NP (PP x)) (NP (AP x)))' ' (VP (AP (NP (PP x)) (NP (AP x))))' ' (NP (RC (NP (AP x)))))') # This matches an NP that does not dominate a PP. Also, the NP # must either have a parent that is an NP or be dominated by a # VP: self.assertEqual(list(tgrep.tgrep_positions( 'NP !<< PP [> NP | >> VP]', [tree])), [[(0, 1), (1, 0, 1)]]) tree = ParentedTree.fromstring('(S (NP (AP (PP x) (VP x))) ' '(NP (AP (PP x) (NP x))) (NP x))') # This matches an NP that dominates a PP which itself is # immediately followed by a VP. Note the use of parentheses to # group ". VP" with the PP rather than with the NP: self.assertEqual(list(tgrep.tgrep_positions('NP << (PP . VP)', [tree])), [[(0,)]]) tree = ParentedTree.fromstring( '(S (NP (DET a) (NN cat) (PP (IN on) (NP x)))' ' (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x))' ' (NP x))') # This matches an NP whose last child is a PP that begins with # the preposition "on": self.assertEqual(list(tgrep.tgrep_positions( 'NP <\' (PP <, (IN < on))', [tree])), [[(0,)]]) tree = ParentedTree.fromstring( '(S (S (C x) (A (B x))) (S (C x) (A x)) ' '(S (D x) (A (B x))))') # The following pattern matches an S which has a child A and # another child that is a C and that the A has a child B: self.assertEqual(list(tgrep.tgrep_positions('S < (A < B) < C', [tree])), [[(0,)]]) tree = ParentedTree.fromstring( '(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))') # However, this pattern means that S has child A and that A # has children B and C: self.assertEqual(list(tgrep.tgrep_positions('S < ((A < B) < C)', [tree])), [[(0,)]]) # It is equivalent to this: self.assertEqual(list(tgrep.tgrep_positions('S < (A < B < C)', [tree])), [[(0,)]])
def tests_rel_dominance(self): ''' Test matching nodes based on dominance relations. ''' tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))') self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions('* < T > S', [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions('* !< T', [tree])), [[(), (0, 0), (0, 0, 0), (1, ), (1, 0), (1, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !< T > S', [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions('* > A', [tree])), [[(0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* > B', [tree])), [[(1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !> B', [tree])), [[(), (0, ), (0, 0), (0, 0, 0), (1, ), (1, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !> B >> S', [tree])), [[(0, ), (0, 0), (1, )]]) self.assertEqual(list(tgrep.tgrep_positions('* >> S', [tree])), [[(0, ), (0, 0), (1, ), (1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* >>, S', [tree])), [[(0, ), (0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* >>\' S', [tree])), [[(1, ), (1, 0)]]) # Known issue: #self.assertEqual(list(tgrep.tgrep_positions('* !>> S', [tree])), # [[()]]) self.assertEqual(list(tgrep.tgrep_positions('* << T', [tree])), [[(), (0, )]]) self.assertEqual(list(tgrep.tgrep_positions('* <<\' T', [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions('* <<1 N', [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions('* !<< T', [tree])), [[(0, 0), (0, 0, 0), (1, ), (1, 0), (1, 0, 0)]]) tree = ParentedTree.fromstring('(S (A (T x)) (B (T x) (N x )))') self.assertEqual(list(tgrep.tgrep_positions('* <: T', [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0, ), (1, )]]) self.assertEqual(list(tgrep.tgrep_positions('* !<: T', [tree])), [[(), (0, 0), (0, 0, 0), (1, ), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !<: T > S', [tree])), [[(1, )]]) tree = ParentedTree.fromstring('(S (T (A x) (B x)) (T (C x)))') self.assertEqual(list(tgrep.tgrep_positions('* >: T', [tree])), [[(1, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* !>: T', [tree])), [[(), (0, ), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, ), (1, 0, 0)]]) tree = ParentedTree.fromstring('(S (A (B (C (D (E (T x))))))' ' (A (B (C (D (E (T x))) (N x)))))') self.assertEqual(list(tgrep.tgrep_positions('* <<: T', [tree])), [[(0, ), (0, 0), (0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0, 0), (1, 0, 0, 0), (1, 0, 0, 0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions('* >>: A', [tree])), [[(0, 0), (0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0), (1, 0), (1, 0, 0)]])
def tests_rel_dominance(self): """ Test matching nodes based on dominance relations. """ tree = ParentedTree.fromstring("(S (A (T x)) (B (N x)))") self.assertEqual(list(tgrep.tgrep_positions("* < T", [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions("* < T > S", [tree])), [[(0, )]]) self.assertEqual( list(tgrep.tgrep_positions("* !< T", [tree])), [[(), (0, 0), (0, 0, 0), (1, ), (1, 0), (1, 0, 0)]], ) self.assertEqual(list(tgrep.tgrep_positions("* !< T > S", [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions("* > A", [tree])), [[(0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions("* > B", [tree])), [[(1, 0)]]) self.assertEqual( list(tgrep.tgrep_positions("* !> B", [tree])), [[(), (0, ), (0, 0), (0, 0, 0), (1, ), (1, 0, 0)]], ) self.assertEqual(list(tgrep.tgrep_positions("* !> B >> S", [tree])), [[(0, ), (0, 0), (1, )]]) self.assertEqual( list(tgrep.tgrep_positions("* >> S", [tree])), [[(0, ), (0, 0), (1, ), (1, 0)]], ) self.assertEqual(list(tgrep.tgrep_positions("* >>, S", [tree])), [[(0, ), (0, 0)]]) self.assertEqual(list(tgrep.tgrep_positions("* >>' S", [tree])), [[(1, ), (1, 0)]]) # Known issue: # self.assertEqual(list(tgrep.tgrep_positions('* !>> S', [tree])), # [[()]]) self.assertEqual(list(tgrep.tgrep_positions("* << T", [tree])), [[(), (0, )]]) self.assertEqual(list(tgrep.tgrep_positions("* <<' T", [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions("* <<1 N", [tree])), [[(1, )]]) self.assertEqual( list(tgrep.tgrep_positions("* !<< T", [tree])), [[(0, 0), (0, 0, 0), (1, ), (1, 0), (1, 0, 0)]], ) tree = ParentedTree.fromstring("(S (A (T x)) (B (T x) (N x )))") self.assertEqual(list(tgrep.tgrep_positions("* <: T", [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions("* < T", [tree])), [[(0, ), (1, )]]) self.assertEqual( list(tgrep.tgrep_positions("* !<: T", [tree])), [[(), (0, 0), (0, 0, 0), (1, ), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]], ) self.assertEqual(list(tgrep.tgrep_positions("* !<: T > S", [tree])), [[(1, )]]) tree = ParentedTree.fromstring("(S (T (A x) (B x)) (T (C x)))") self.assertEqual(list(tgrep.tgrep_positions("* >: T", [tree])), [[(1, 0)]]) self.assertEqual( list(tgrep.tgrep_positions("* !>: T", [tree])), [[(), (0, ), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, ), (1, 0, 0)]], ) tree = ParentedTree.fromstring("(S (A (B (C (D (E (T x))))))" " (A (B (C (D (E (T x))) (N x)))))") self.assertEqual( list(tgrep.tgrep_positions("* <<: T", [tree])), [[ (0, ), (0, 0), (0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0, 0), (1, 0, 0, 0), (1, 0, 0, 0, 0), ]], ) self.assertEqual( list(tgrep.tgrep_positions("* >>: A", [tree])), [[ (0, 0), (0, 0, 0), (0, 0, 0, 0), (0, 0, 0, 0, 0), (0, 0, 0, 0, 0, 0), (1, 0), (1, 0, 0), ]], )
def tests_rel_indexed_children(self): """ Test matching nodes based on their index in their parent node. """ tree = ParentedTree.fromstring("(S (A x) (B x) (C x))") self.assertEqual(list(tgrep.tgrep_positions("* >, S", [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions("* >1 S", [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions("* >2 S", [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions("* >3 S", [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions("* >' S", [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions("* >-1 S", [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions("* >-2 S", [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions("* >-3 S", [tree])), [[(0, )]]) tree = ParentedTree.fromstring( "(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) " "(F (C x) (A x) (B x)))") self.assertEqual(list(tgrep.tgrep_positions("* <, A", [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions("* <1 A", [tree])), [[(0, )]]) self.assertEqual(list(tgrep.tgrep_positions("* <2 A", [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions("* <3 A", [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions("* <' A", [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions("* <-1 A", [tree])), [[(1, )]]) self.assertEqual(list(tgrep.tgrep_positions("* <-2 A", [tree])), [[(2, )]]) self.assertEqual(list(tgrep.tgrep_positions("* <-3 A", [tree])), [[(0, )]])