Exemplo n.º 1
0
def _get_rule_penalty(rule, config, examples, verbose=False):
    """Returns a "penalty" for the rule based on the given set of examples."""
    # TODO(petershaw): This could potentially be more effecient by pre-indexing
    # the examples in a data structure such as a Trie.
    # TODO(petershaw): Could also consider sub-sampling the dataset for the
    # purpose of computing these correlations.

    # Optionally compute over a sample of examples only.
    sample_size = config.get("sample_size", 0)

    num_examples_match_source = 0
    num_examples_match_target = 0
    num_examples_match_source_and_target = 0
    for source_str, target_str in examples:
        source = tuple(source_str.split())
        target = tuple(target_str.split())
        match_source = rule_utils.rhs_can_maybe_derive(rule.source, source)
        match_target = rule_utils.rhs_can_maybe_derive(rule.target, target)
        if match_source:
            num_examples_match_source += 1
        if match_target:
            num_examples_match_target += 1
        if match_source and match_target:
            num_examples_match_source_and_target += 1

        if sample_size and num_examples_match_source_and_target >= sample_size:
            # Break early if using sample size and found sufficient sample.
            break

    # Ensure that at least one example is found.
    if not num_examples_match_source_and_target:
        print("Rule did not match any examples.")
        # TODO(petershaw): Raise instead?
        return 0.0
    if not num_examples_match_source:
        raise ValueError("num_examples_match_source: %s" %
                         num_examples_match_source)
    if not num_examples_match_target:
        raise ValueError("num_examples_match_target: %s" %
                         num_examples_match_target)

    if verbose:
        print("_get_rule_cost: %s" % rule)
        print("num_examples_match_source: %s" % num_examples_match_source)
        print("num_examples_match_target: %s" % num_examples_match_target)
        print("num_examples_match_source_and_target: %s" %
              num_examples_match_source_and_target)

    cost = 0.0
    p_source_given_target = (float(num_examples_match_source_and_target) /
                             num_examples_match_target)
    cost -= (config["source_given_target_coef"] *
             math.log2(p_source_given_target))
    p_target_given_source = (float(num_examples_match_source_and_target) /
                             num_examples_match_source)
    cost -= (config["target_given_source_coef"] *
             math.log2(p_target_given_source))
    return cost
Exemplo n.º 2
0
 def test_rhs_can_maybe_derive_1(self):
     rule_a = qcfg_rule.rule_from_string(
         "who is NT_1 ' s boss ? ### ( Yield :output ( FindManager :recipient ( NT_1 ) ) )"
     )
     rule_b = qcfg_rule.rule_from_string(
         "who is NT_1 ? ### ( Yield :output ( NT_1 ) )")
     self.assertTrue(
         rule_utils.rhs_can_maybe_derive(rule_b.source, rule_a.source))
     self.assertTrue(
         rule_utils.rhs_can_maybe_derive(rule_b.target, rule_a.target))
Exemplo n.º 3
0
def _find_relevant_rules(current_rules, candidate_rule):
    # TODO(petershaw): This can potentially be made more effecient by
    # pre-indexing rules in a data structure such as a Trie.
    relevant_rules = []
    for rule in current_rules:
        if (rule_utils.rhs_can_maybe_derive(candidate_rule.source, rule.source)
                and rule_utils.rhs_can_maybe_derive(candidate_rule.target,
                                                    rule.target)):
            relevant_rules.append(rule)
    return relevant_rules
Exemplo n.º 4
0
def _find_possible_candidates(rule_to_split, other_rules, config):
    """Return possible rule candidates."""
    all_candidates = set()
    for other_rule in other_rules:
        if other_rule == rule_to_split:
            continue
        if (rule_utils.rhs_can_maybe_derive(other_rule.source,
                                            rule_to_split.source)
                and rule_utils.rhs_can_maybe_derive(other_rule.target,
                                                    rule_to_split.target)):
            unifiers = unification_utils.get_rule_unifiers(
                rule_to_split, other_rule, config)
            candidates = {(unifier, other_rule) for unifier in unifiers}
            all_candidates |= candidates
    return all_candidates
Exemplo n.º 5
0
 def test_rhs_can_maybe_derive_8(self):
     rhs = tuple("foo NT_1 NT_2 bar".split())
     goal_rhs = tuple("foo foo bar buz buz".split())
     self.assertFalse(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))
Exemplo n.º 6
0
 def test_rhs_can_maybe_derive_7(self):
     rhs = tuple("turn right NT_1".split())
     goal_rhs = tuple("turn left turn right".split())
     self.assertFalse(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))
Exemplo n.º 7
0
 def test_rhs_can_maybe_derive_4(self):
     rhs = tuple("foo".split())
     goal_rhs = tuple("bar foo bar".split())
     self.assertTrue(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))
Exemplo n.º 8
0
 def test_rhs_can_maybe_derive_false_2(self):
     rhs = tuple("NT_1 named NT_2".split())
     goal_rhs = tuple("NT_1 foo".split())
     self.assertFalse(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))
Exemplo n.º 9
0
 def test_rhs_can_maybe_derive_true_5(self):
     rhs = tuple("I_TURN_RIGHT NT_1".split())
     goal_rhs = tuple(
         "I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_RUN".
         split())
     self.assertTrue(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))
Exemplo n.º 10
0
 def test_rhs_can_maybe_derive_true_4(self):
     rhs = tuple("NT_1 right".split())
     goal_rhs = tuple("run after run right thrice".split())
     self.assertTrue(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))
Exemplo n.º 11
0
 def test_rhs_can_maybe_derive_true_2(self):
     rhs = tuple("foo foo NT_1".split())
     goal_rhs = tuple("foo foo bar NT_1".split())
     self.assertTrue(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))
Exemplo n.º 12
0
 def test_rhs_can_maybe_derive_true_1(self):
     rhs = tuple("NT_1 named NT_2".split())
     goal_rhs = tuple("NT_1 have a major city named NT_2".split())
     self.assertTrue(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))
Exemplo n.º 13
0
 def test_rhs_can_maybe_derive_10(self):
     rhs = tuple("NT_1 foo bar".split())
     goal_rhs = tuple("NT_1 foo xyz foo bar".split())
     self.assertTrue(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))
Exemplo n.º 14
0
 def test_rhs_can_maybe_derive_9(self):
     rhs = tuple("NT_1 ( capital )".split())
     goal_rhs = tuple("answer ( loc_1 ( smallest ( capital ) ) )".split())
     self.assertTrue(rule_utils.rhs_can_maybe_derive(rhs, goal_rhs))