Exemplo n.º 1
0
    def test_learning_from_denotation(self):
        arithmetic_grammar = Grammar(self.arithmetic_rules)
        arithmetic_examples = self.two_parse_examples + self.one_parse_examples

        from executor import Executor

        arithmetic_model = Model(
            grammar=arithmetic_grammar,
            feature_fn=Parse.operator_precedence_features,
            weights=defaultdict(float),  # Initialize with all weights at zero
            executor=Executor.execute)

        # Train based on correct/incorrect denotation
        from metrics import DenotationAccuracyMetric

        b_trn, b_tst, a_trn, a_tst = arithmetic_model.train_test(
            train_examples=arithmetic_examples[:13],
            test_examples=arithmetic_examples[13:],
            training_metric=DenotationAccuracyMetric(),
            seed=1)

        # BEFORE SGD
        self.assertEqual(b_trn['semantics accuracy'], 10)
        self.assertEqual(b_tst['denotation accuracy'], 4)

        # AFTER SGD
        self.assertEqual(a_trn['semantics accuracy'], 12)  # Improvement
        self.assertEqual(a_trn['denotation accuracy'], 13)  # Improvement
Exemplo n.º 2
0
    def test_learning_from_many_denotations(self):
        """
        Large number of examples are used for training.
        Last 4 arithmetic_examples are used for testing.
        b_trn: performance metrics on training set before training
        a_trn: performance metrics on training set after training

        denotation accuracy: # of examples where denotation of parse at position 
        0 was correct
        """
        arithmetic_grammar = Grammar(self.arithmetic_rules)
        arithmetic_examples = self.two_parse_examples + self.one_parse_examples

        from executor import Executor

        arithmetic_model = Model(
            grammar=arithmetic_grammar,
            feature_fn=Parse.operator_precedence_features,
            weights=defaultdict(float),  # Initialize with all weights at zero
            executor=Executor.execute)

        from metrics import DenotationAccuracyMetric
        from arithmetic import arithmetic_dev_examples

        b_trn, b_tst, a_trn, a_tst = arithmetic_model.train_test(
            train_examples=arithmetic_dev_examples,
            test_examples=arithmetic_examples[13:],
            training_metric=DenotationAccuracyMetric(),
            seed=1)

        # BEFORE SGD
        self.assertEqual(b_trn['denotation accuracy'], 64)

        # AFTER SGD
        self.assertEqual(a_trn['denotation accuracy'], 92)  # Improvement
Exemplo n.º 3
0
    def test_feature_function(self):
        from experiment import evaluate_model
        from metrics import denotation_match_metrics
        from scoring import Model
        from geo880 import geo880_train_examples

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives +
                 self.rules_reverse_joins)

        grammar = Unit3Grammar(rules=rules, annotators=self.annotators)

        def empty_denotation_feature(parse):
            features = defaultdict(float)
            if parse.denotation == ():
                features['empty_denotation'] += 1.0
            return features

        weights = {'empty_denotation': -1.0}

        model = Model(grammar=grammar,
                      feature_fn=empty_denotation_feature,
                      weights=weights,
                      executor=self.geobase.executor().execute)
        metric_values = evaluate_model(model=model,
                                       examples=geo880_train_examples,
                                       metrics=denotation_match_metrics(),
                                       print_examples=False)
        self.assertEqual(235, metric_values['denotation accuracy'])
Exemplo n.º 4
0
    def test_evaluate_grammar_with_reverse_joins(self):
        from experiment import sample_wins_and_losses
        from geoquery import GeoQueryDomain
        from metrics import DenotationOracleAccuracyMetric
        from scoring import Model

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives +
                 self.rules_reverse_joins)

        grammar = Unit3Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        metric = DenotationOracleAccuracyMetric()

        # If printing=True, prints a sampling of wins (correct semantics in
        # first parse) and losses on the dataset.
        metric_values = sample_wins_and_losses(domain=self.domain,
                                               model=model,
                                               metric=metric,
                                               seed=1,
                                               printing=False)
        self.assertEqual(11562, metric_values['number of parses'])
        self.assertEqual(152, metric_values['denotation accuracy'])
Exemplo n.º 5
0
    def test_evaluation_with_scoring(self):
        """
        Evaluate the grammar on all examples, collecting metrics:
        
        semantics oracle accuracy: # of examples where one parse or the other was
        correct.

        semantics accuracy: # of examples where parse at position 0 was correct.
        """
        arithmetic_grammar = Grammar(self.arithmetic_rules)

        from executor import Executor

        arithmetic_model = Model(grammar=arithmetic_grammar,
                                 feature_fn=Parse.operator_precedence_features,
                                 weights=self.weights,
                                 executor=Executor.execute)

        from experiment import evaluate_model

        metrics = evaluate_model(model=arithmetic_model,
                                 examples=self.one_parse_examples +
                                 self.two_parse_examples)
        self.assertEqual(metrics['semantics oracle accuracy'], 17)
        self.assertEqual(metrics['semantics accuracy'], 16)  # Improvement
Exemplo n.º 6
0
    def test_learning_from_semantics(self):
        """
        First 13 examples are used for training.
        Last 4 examples are used for testing.
        b_trn: performance metrics on training set before training
        b_tst: performance metrics on test set before training
        a_trn: performance metrics on training set after training
        a_tst: performance metrics on test set after training

        semantics accuracy: # of examples where parse at position 0 was correct.
        denotation accuracy: # of examples where denotation of parse at position 
        0 was correct
        """
        arithmetic_grammar = Grammar(self.arithmetic_rules)
        arithmetic_examples = self.two_parse_examples + self.one_parse_examples

        from executor import Executor

        arithmetic_model = Model(
            grammar=arithmetic_grammar,
            feature_fn=Parse.operator_precedence_features,
            weights=defaultdict(float),  # Initialize with all weights at zero
            executor=Executor.execute)

        # Train based on correct/incorrect semantics
        from metrics import SemanticsAccuracyMetric

        b_trn, b_tst, a_trn, a_tst = arithmetic_model.train_test(
            train_examples=arithmetic_examples[:13],
            test_examples=arithmetic_examples[13:],
            training_metric=SemanticsAccuracyMetric(),
            seed=1)

        # BEFORE SGD
        self.assertEqual(b_trn['semantics accuracy'], 10)
        self.assertEqual(b_trn['denotation accuracy'], 11)
        self.assertEqual(b_tst['semantics accuracy'], 4)
        self.assertEqual(b_tst['denotation accuracy'], 4)

        # AFTER SGD
        self.assertEqual(a_trn['semantics accuracy'], 13)  # Improvement
        self.assertEqual(a_trn['denotation accuracy'], 13)  # Improvement
        self.assertEqual(a_tst['semantics accuracy'], 4)
        self.assertEqual(a_tst['denotation accuracy'], 4)
Exemplo n.º 7
0
def evaluate_grammar(grammar=None,
                     executor=None,
                     examples=[],
                     examples_label=None,
                     metrics=standard_metrics(),
                     print_examples=True):
    return evaluate_model(model=Model(grammar=grammar, executor=executor),
                          examples=examples,
                          metrics=metrics,
                          print_examples=print_examples)
Exemplo n.º 8
0
 def evaluate(self,
              executor=None,
              examples=[],
              examples_label=None,
              metrics=standard_metrics(),
              print_examples=False):
     return Model(grammar=self,
                  executor=executor).evaluate(examples=examples,
                                              metrics=metrics,
                                              print_examples=print_examples)
Exemplo n.º 9
0
def special_geo_evaluate(grammar=None, feature_fn=geo_domain.features):
    # Build the model by hand so that we can see all the pieces:
    geo_mod = Model(grammar=grammar,
                    feature_fn=feature_fn,
                    weights=geo_domain.weights(),
                    executor=geo_domain.execute)
    # This can be done with less fuss using experiment.train_test_for_domain,
    # but we want full access to the model, metrics, etc.
    train_test(model=geo_mod,
               train_examples=geo_domain.train_examples(),
               test_examples=geo_domain.test_examples(),
               metrics=geo_domain.metrics(),
               training_metric=geo_domain.training_metric(),
               seed=0,
               print_examples=False)
Exemplo n.º 10
0
    def test_training_data4(self):
        from experiment import sample_wins_and_losses
        from metrics import SemanticsOracleAccuracyMetric
        from scoring import Model
        from travel import TravelDomain
        from geonames import GeoNamesAnnotator

        domain = TravelDomain()
        rules = self.rules_travel + self.rules_travel_locations + self.rules_travel_modes + self.rules_travel_triggers + self.rules_request_types + self.rules_optionals
        grammar = Unit2Grammar(rules=rules, annotators=[GeoNamesAnnotator(live_requests=False)])
        model = Model(grammar=grammar)
        metric = SemanticsOracleAccuracyMetric()

        # If printing=True, prints a sampling of wins (correct semantics in 
        # first parse) and losses on the dataset.
        metric_values = sample_wins_and_losses(domain=domain, model=model, metric=metric, seed=31, printing=False)
Exemplo n.º 11
0
    def test_evaluate_simple_grammar(self):
        from experiment import sample_wins_and_losses
        from metrics import DenotationOracleAccuracyMetric
        from scoring import Model

        rules = self.rules_optionals + self.rules_collection_entity
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        metric = DenotationOracleAccuracyMetric()

        # If printing=True, prints a sampling of wins (correct semantics in
        # first parse) and losses on the dataset.
        metric_values = sample_wins_and_losses(domain=self.domain,
                                               model=model,
                                               metric=metric,
                                               seed=1,
                                               printing=False)
        self.assertEqual(17, metric_values['number of parses'])
Exemplo n.º 12
0
    def test_evaluate_model(self):
        from experiment import evaluate_model
        from metrics import denotation_match_metrics
        from scoring import Model
        from geo880 import geo880_train_examples

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives +
                 self.rules_reverse_joins)

        grammar = Unit3Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        # Set print_examples=True and look for 'what state has the shortest
        # river?' and
        evaluate_model(model=model,
                       examples=geo880_train_examples[:10],
                       metrics=denotation_match_metrics(),
                       print_examples=False)
Exemplo n.º 13
0
def learn_lexical_semantics(domain, seed=None):
    from parsing import Grammar
    print '#' * 80
    print 'Learn lexical semantics experiment for domain: %s\n' % domain.__class__.__name__
    original_grammar = domain.grammar()
    expanded_rules = cartesian_product_of_lexical_rules(domain.rules())
    grammar = Grammar(rules=expanded_rules,
                      annotators=original_grammar.annotators,
                      start_symbol=original_grammar.start_symbol)
    model = Model(grammar=grammar,
                  feature_fn=domain.features,
                  weights=domain.weights,
                  executor=domain.execute)
    train_test(model=model,
               train_examples=domain.train_examples(),
               test_examples=domain.test_examples(),
               metrics=domain.metrics(),
               training_metric=domain.training_metric(),
               seed=seed,
               print_examples=False)
Exemplo n.º 14
0
def clone_model(model):
    return Model(
        grammar=model.grammar,
        feature_fn=model.feature_fn,
        weights=defaultdict(float),  # Zero the weights.
        executor=model.executor)
Exemplo n.º 15
0
 def model(self):
     return Model(grammar=self.grammar(),
                  feature_fn=self.features,
                  weights=self.weights(),
                  executor=self.execute)
Exemplo n.º 16
0
travel_domain = TravelDomain()
travel_grammar = travel_domain.grammar()


def basic_feature_function(parse):
    """Features for the rule used for the root node and its children"""
    features = defaultdict(float)
    features[str(parse.rule)] += 1.0
    for child in parse.children:
        features[str(child.rule)] += 1.0
    return features


# This code evaluates the current grammar:
train_test(model=Model(grammar=travel_grammar,
                       feature_fn=basic_feature_function),
           train_examples=travel_train_examples,
           test_examples=travel_test_examples,
           print_examples=False)

# ### Question 4
#
# With the default travel grammar, many of the errors on training examples occur because the origin
# isn't marked by "from". You might have noticed that "directions New York to Philadelphia"
# is not handled properly in our opening example. Other examples include
# "transatlantic cruise southampton to tampa",
# "fly boston to myrtle beach spirit airlines", and
# "distance usa to peru". __Your tasks__: (i) extend the grammar with a single rule to handle examples
# like these, and run another evaluation using this expanded grammar (submit your completion
# of the following starter code); (ii) in 1–2 sentences,
# summarize what happened to the post-training performance metrics when this rule was added.