示例#1
0
def ObtainWRTG(weighted_tree_pair, print_result=True):
  """
  Given a transducer and a weighted source/target tree, it returns a tuple
  that contains the wRTG and the weighted pair. If the transducer fails at
  explaining the source/target tree with the rules it has, then it returns
  a tuple (None, None). The weights of the RTG are not estimated here.

  global variables used here (bad practice, but need for parallelization):
    * transducer
    * feat_inst
    * model_class
    * GetScoreOfDerivation
    * CombineScoresOfDerivations
  """
  intree_str, outtree_str, pair_weight = weighted_tree_pair
  intree  = immutable(tree_or_string(intree_str))
  outtree = None if outtree_str is None else immutable(tree_or_string(outtree_str))
  wrtg = transducer.Transduce(intree, outtree, convert_to_prob=False)
  sys.stdout.flush()
  if not wrtg.P:
    output = (None, None)
    result_str = 'x'
  else:
    wrtg.ScoreDerivation = GetScoreOfDerivation
    wrtg.CombineDerivationScores = CombineScoresOfDerivations
    if feat_inst:
      feat_inst.SetContext({'src_tree' : intree_str})
    model_class.populate_wrtg_feats(wrtg, feat_inst)
    output = (wrtg, weighted_tree_pair)
    result_str = 'o'
  if print_result:
    result_str = result_str if outtree is not None else result_str.upper()
    print(result_str, end='', file=sys.stderr)
  return output
示例#2
0
 def test_Preterminal(self):
     input_tree = immutable(tree_or_string('(B D E)'))
     output_tree = immutable(tree_or_string('U'))
     productions, _ = self.transducer.Produce(input_tree, output_tree, 'q',
                                              (), ())
     rule2 = XTRule('q', tree_or_string('(B ?x0| ?x1|)'),
                    tree_or_string('U'), {}, 1.0)
     deriv_rhs = RHS(rule2)
     expected_production = Production(('q', (), ()), deriv_rhs,
                                      rule2.weight)
     self.assertIn(expected_production, productions)
示例#3
0
    def test_Nonterminal(self):
        input_tree = immutable(tree_or_string('(A (B D E) (C F G))'))
        output_tree = immutable(tree_or_string('(A (R (T V W) U) (S X))'))
        productions, _ = self.transducer.Produce(input_tree, output_tree, 'q',
                                                 (), ())
        expected_productions = []

        deriv_rhs1 = RHS(self.rules[0], [('q', (0, ), (0, 1)), \
                                         ('q', (1, ), (0, 0))])
        expected_productions.append(\
          Production(('q', (), ()), deriv_rhs1, self.rules[0].weight))

        deriv_rhs2 = RHS(self.rules[1])
        expected_productions.append(\
          Production(('q', (0,), (0, 1)), deriv_rhs2, self.rules[1].weight))

        deriv_rhs3 = RHS(self.rules[2], [('q', (1, 0), (0, 0, 0)), \
                                         ('q', (1, 1), (0, 0, 1))])
        expected_productions.append(\
          Production(('q', (1,), (0, 0)), deriv_rhs3, self.rules[2].weight))

        deriv_rhs4 = RHS(self.rules[3], [('q', (1, 0), (0, 0, 1)), \
                                         ('q', (1, 1), (0, 0, 0))])
        expected_productions.append(\
          Production(('q', (1,), (0, 0)), deriv_rhs4, self.rules[3].weight))

        deriv_rhs5 = RHS(self.rules[4])
        expected_productions.append(\
          Production(('q', (1, 0), (0, 0, 0)), deriv_rhs5, self.rules[4].weight))

        deriv_rhs8 = RHS(self.rules[7])
        expected_productions.append(\
          Production(('q', (1, 1), (0, 0, 1)), deriv_rhs8, self.rules[7].weight))

        deriv_rhs6 = RHS(self.rules[5])
        expected_productions.append(\
          Production(('q', (1, 0), (0, 0, 1)), deriv_rhs6, self.rules[5].weight))

        deriv_rhs7 = RHS(self.rules[6])
        expected_productions.append(\
          Production(('q', (1, 1), (0, 0, 0)), deriv_rhs7, self.rules[6].weight))

        self.assertEqual(len(expected_productions), len(productions))
        self.assertIn(expected_productions[0], productions)
        self.assertIn(expected_productions[1], productions)
        self.assertIn(expected_productions[2], productions)
        self.assertIn(expected_productions[3], productions)
        self.assertIn(expected_productions[4], productions)
        self.assertIn(expected_productions[5], productions)
        self.assertIn(expected_productions[6], productions)
        self.assertIn(expected_productions[7], productions)
示例#4
0
 def test_NonterminalPreterminalIdentity(self):
     """
 Using the Identity back-off, the state of the parent rule
 is applied to the paths of the variables in the RHS.
 However, the states of the paths of the variables in the RHS
 should be more specific: "copy" and "hypernym".
 """
     intree = tree_or_string('(NP (DT the) (NN dog))')
     rule0 = XTRule('q', tree_or_string('(NP ?x0|DT ?x1|NN)'),
                    tree_or_string('(NPP ?x0|DTT ?x1|NNN)'), {
                        (0, ): 'q',
                        (1, ): 'q'
                    }, 1.0)
     rule1 = XTRule('q', tree_or_string('(DT ?x0|)'),
                    tree_or_string('(DTT ?x0|)'), {(0, ): 'copy'}, 1.0)
     rule2 = XTRule('copy', tree_or_string('the'), tree_or_string('the'),
                    {}, 1.0)
     rule3 = XTRule('hypernym', tree_or_string('dog'),
                    tree_or_string('canine'), {}, 1.0)
     rules = [rule0, rule1, rule2, rule3]
     rule_backoffs = [Identity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(
         tree_or_string('(NPP (DTT the) (NN canine))'))
     self.assertIn(expected_outtree, outtrees)
示例#5
0
 def test_Nonterminal(self):
     input_tree = immutable(tree_or_string('(A (B D E) (C F G))'))
     output_tree = immutable(tree_or_string('(A (R (T V W) U) (S X))'))
     productions, non_terminals = \
       self.transducer.Produce(input_tree, output_tree, 'q', (), ())
     expected_non_terminals = [('q', (), (), ''), ('q', (0, ), (0, 1), ''),
                               ('q', (1, ), (0, 0), ''),
                               ('q', (1, 0), (0, 0, 0), ''),
                               ('q', (1, 1), (0, 0, 1), ''),
                               ('q', (1, 0), (0, 0, 1), ''),
                               ('q', (1, 1), (0, 0, 0), '')]
     self.assertIn(expected_non_terminals[0], non_terminals)
     self.assertIn(expected_non_terminals[1], non_terminals)
     self.assertIn(expected_non_terminals[2], non_terminals)
     self.assertIn(expected_non_terminals[3], non_terminals)
     self.assertIn(expected_non_terminals[4], non_terminals)
     self.assertIn(expected_non_terminals[5], non_terminals)
     self.assertIn(expected_non_terminals[6], non_terminals)
示例#6
0
文件: wrtg.py 项目: ct-clmsn/t2t-qa
 def GenerateNBestTrees(self, max_derivations = 50, direction = 'target'):
   accumulated_tree_weight = defaultdict(float)
   for i, (tree, weight) in enumerate(self.GenerateTrees(direction)):
     if i > max_derivations:
       break
     tree_immutable = immutable(tree)
     current_weight = float(weight)
     accumulated_tree_weight[tree_immutable] += current_weight
   sorted_trees_by_weight = \
     sorted([(tree, weight) for (tree, weight) in accumulated_tree_weight.items()], \
            key=lambda x: x[1], reverse=True)
   return sorted_trees_by_weight
示例#7
0
文件: wrtg.py 项目: ct-clmsn/t2t-qa
 def GenerateNBestTreesMax(self, max_derivations = 50, direction = 'target'):
   tree_to_weight = defaultdict(float)
   for i, (tree, weight) in enumerate(self.GenerateTrees(direction)):
     if i > max_derivations:
       break
     tree_immutable = immutable(tree)
     current_weight = float(weight)
     if tree_immutable in tree_to_weight:
       assert tree_to_weight[tree_immutable] >= current_weight
       continue
     tree_to_weight[tree_immutable] = current_weight
     yield tree, weight
示例#8
0
 def test_PreterminalIdentity(self):
     intree = tree_or_string('(NN dog)')
     rule1 = XTRule('q', tree_or_string('dog'), tree_or_string('perro'), {},
                    1.0)
     rules = [rule1]
     rule_backoffs = [Identity(), LexicalSimilarity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(tree_or_string('(NN perro)'))
     self.assertIn(expected_outtree, outtrees)
示例#9
0
 def test_PreterminalUnseenTerminalEqual(self):
     intree = tree_or_string('(NN dog)')
     rule0 = XTRule('q', tree_or_string('(NN ?x0|)'),
                    tree_or_string('(JJ ?x0|)'), {(0, ): 'copy'}, 1.0)
     rule1 = XTRule('copy', tree_or_string('italian'),
                    tree_or_string('italian'), {}, 1.0)
     rules = [rule0, rule1]
     rule_backoffs = [LexicalSimilarity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(tree_or_string('(JJ dog)'))
     self.assertIn(expected_outtree, outtrees)
示例#10
0
文件: wrtg.py 项目: ct-clmsn/t2t-qa
 def GenerateNBestTreesMax_(self, max_derivations = 50, direction = 'target'):
   tree_to_weight = defaultdict(float)
   for i, (tree, weight) in enumerate(self.GenerateTrees(direction)):
     if i > max_derivations:
       break
     tree_immutable = immutable(tree)
     current_weight = float(weight)
     if tree_immutable in tree_to_weight:
       assert tree_to_weight[tree_immutable] >= current_weight
       continue
     tree_to_weight[tree_immutable] = current_weight
   sorted_trees_by_weight = \
     sorted([(tree, weight) for (tree, weight) in tree_to_weight.items()], \
            key=lambda x: x[1], reverse=True)
   return sorted_trees_by_weight
示例#11
0
 def test_NonConsumingLHSAvoidsInfiniteRTG(self):
     intree = tree_or_string('(NN dog)')
     rule0 = XTRule('q', tree_or_string('?x0|NN'),
                    tree_or_string('(NN ?x0|)'), {(0, ): 'q'}, 0.9)
     rule1 = XTRule('q', tree_or_string('?x0|NN'),
                    tree_or_string('(JJ ?x0|)'), {(0, ): 't'}, 0.9)
     rule2 = XTRule('t', tree_or_string('(NN dog)'),
                    tree_or_string('canine'), {}, 1.0)
     rules = [rule0, rule1, rule2]
     initial_state = 'q'
     transducer = xT(initial_state, rules)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(tree_or_string('(JJ canine)'))
     self.assertIn(expected_outtree, outtrees)
示例#12
0
    def test_OnlySourceDifferentVarTypes(self):
        rule0 = XTRule('q', tree_or_string('(A ?x0|AA)'),
                       tree_or_string('(a ?x0|aa)'), {(0, ): 't'}, 1.0)
        rule1 = XTRule('t', tree_or_string('(AA AAA)'),
                       tree_or_string('(aa aaa)'), {}, 1.0)
        rule2 = XTRule('t', tree_or_string('(AA AAA)'),
                       tree_or_string('(bb bbb)'), {}, 1.0)
        rules = [rule0, rule1, rule2]
        self.transducer = xT('q', rules)
        input_tree = immutable(tree_or_string('(A (AA AAA))'))
        output_tree = None
        productions, _ = self.transducer.Produce(input_tree, output_tree, 'q',
                                                 (), ())

        self.assertEqual(2, len(productions))
        self.assertIn(rule0, [p.rhs.rule for p in productions])
        self.assertIn(rule1, [p.rhs.rule for p in productions])
        self.assertNotIn(rule2, [p.rhs.rule for p in productions])
示例#13
0
 def test_PreterminalIdentityUnseenTerminalSimilar(self):
     """
 Using the Identity back-off, the state of the parent rule
 is applied to the path of the variable in the RHS.
 However, the states of the path of the variable in the RHS
 should be more specific: "hypernym".
 """
     intree = tree_or_string('(NN dog)')
     rule1 = XTRule('hypernym', tree_or_string('italian'),
                    tree_or_string('european'), {}, 1.0)
     rules = [rule1]
     rule_backoffs = [Identity(), LexicalSimilarity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(tree_or_string('(NN canine)'))
     self.assertIn(expected_outtree, outtrees)
示例#14
0
 def test_NonterminalIdentityNoBackoff(self):
     intree = tree_or_string('(NP (DT the) (NN dog))')
     rule0 = XTRule('q', tree_or_string('(DT ?x0|)'),
                    tree_or_string('(DTT ?x0|)'), {(0, ): 'copy'}, 1.0)
     rule1 = XTRule('copy', tree_or_string('the'), tree_or_string('the'),
                    {}, 1.0)
     rule2 = XTRule('q', tree_or_string('(NN ?x0|)'),
                    tree_or_string('(NNN ?x0|)'), {(0, ): 'hypernym'}, 1.0)
     rule3 = XTRule('hypernym', tree_or_string('dog'),
                    tree_or_string('canine'), {}, 1.0)
     rules = [rule0, rule1, rule2, rule3]
     rule_backoffs = []
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(
         tree_or_string('(NP (DTT the) (NNN canine))'))
     self.assertNotIn(expected_outtree, outtrees)
示例#15
0
 def test_NonterminalUnseenTerminalEqualAndSimilar(self):
     intree = tree_or_string('(NP (DT the) (NN dog))')
     rule0 = XTRule('q', tree_or_string('(NP (DT ?x0|) (NN ?x1|))'),
                    tree_or_string('(NP (DT ?x0|) (NN ?x1|))'), {
                        (0, 0): 'copy',
                        (1, 0): 'hypernym'
                    }, 1.0)
     rule1 = XTRule('copy', tree_or_string('the'), tree_or_string('the'),
                    {}, 1.0)
     rule2 = XTRule('hypernym', tree_or_string('italian'),
                    tree_or_string('european'), {}, 1.0)
     rules = [rule0, rule1, rule2]
     rule_backoffs = [LexicalSimilarity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(
         tree_or_string('(NP (DT the) (NN canine))'))
     self.assertIn(expected_outtree, outtrees)
示例#16
0
 def test_PreterminalEmptyRHSfail(self):
     input_tree = immutable(tree_or_string('(B D E)'))
     output_tree = immutable(tree_or_string('Z'))
     productions, _ = self.transducer.Produce(input_tree, output_tree, 'q',
                                              (), ())
     self.assertEqual(0, len(productions))