def test_learning_from_denotation(self): arithmetic_grammar = Grammar(self.arithmetic_rules) arithmetic_examples = self.two_parse_examples + self.one_parse_examples from executor import Executor arithmetic_model = Model( grammar=arithmetic_grammar, feature_fn=Parse.operator_precedence_features, weights=defaultdict(float), # Initialize with all weights at zero executor=Executor.execute) # Train based on correct/incorrect denotation from metrics import DenotationAccuracyMetric b_trn, b_tst, a_trn, a_tst = arithmetic_model.train_test( train_examples=arithmetic_examples[:13], test_examples=arithmetic_examples[13:], training_metric=DenotationAccuracyMetric(), seed=1) # BEFORE SGD self.assertEqual(b_trn['semantics accuracy'], 10) self.assertEqual(b_tst['denotation accuracy'], 4) # AFTER SGD self.assertEqual(a_trn['semantics accuracy'], 12) # Improvement self.assertEqual(a_trn['denotation accuracy'], 13) # Improvement
def test_learning_from_many_denotations(self): """ Large number of examples are used for training. Last 4 arithmetic_examples are used for testing. b_trn: performance metrics on training set before training a_trn: performance metrics on training set after training denotation accuracy: # of examples where denotation of parse at position 0 was correct """ arithmetic_grammar = Grammar(self.arithmetic_rules) arithmetic_examples = self.two_parse_examples + self.one_parse_examples from executor import Executor arithmetic_model = Model( grammar=arithmetic_grammar, feature_fn=Parse.operator_precedence_features, weights=defaultdict(float), # Initialize with all weights at zero executor=Executor.execute) from metrics import DenotationAccuracyMetric from arithmetic import arithmetic_dev_examples b_trn, b_tst, a_trn, a_tst = arithmetic_model.train_test( train_examples=arithmetic_dev_examples, test_examples=arithmetic_examples[13:], training_metric=DenotationAccuracyMetric(), seed=1) # BEFORE SGD self.assertEqual(b_trn['denotation accuracy'], 64) # AFTER SGD self.assertEqual(a_trn['denotation accuracy'], 92) # Improvement
def test_learning_from_semantics(self): """ First 13 examples are used for training. Last 4 examples are used for testing. b_trn: performance metrics on training set before training b_tst: performance metrics on test set before training a_trn: performance metrics on training set after training a_tst: performance metrics on test set after training semantics accuracy: # of examples where parse at position 0 was correct. denotation accuracy: # of examples where denotation of parse at position 0 was correct """ arithmetic_grammar = Grammar(self.arithmetic_rules) arithmetic_examples = self.two_parse_examples + self.one_parse_examples from executor import Executor arithmetic_model = Model( grammar=arithmetic_grammar, feature_fn=Parse.operator_precedence_features, weights=defaultdict(float), # Initialize with all weights at zero executor=Executor.execute) # Train based on correct/incorrect semantics from metrics import SemanticsAccuracyMetric b_trn, b_tst, a_trn, a_tst = arithmetic_model.train_test( train_examples=arithmetic_examples[:13], test_examples=arithmetic_examples[13:], training_metric=SemanticsAccuracyMetric(), seed=1) # BEFORE SGD self.assertEqual(b_trn['semantics accuracy'], 10) self.assertEqual(b_trn['denotation accuracy'], 11) self.assertEqual(b_tst['semantics accuracy'], 4) self.assertEqual(b_tst['denotation accuracy'], 4) # AFTER SGD self.assertEqual(a_trn['semantics accuracy'], 13) # Improvement self.assertEqual(a_trn['denotation accuracy'], 13) # Improvement self.assertEqual(a_tst['semantics accuracy'], 4) self.assertEqual(a_tst['denotation accuracy'], 4)