def test_evaluate(): grammar = Grammar() query = "(3d1 + 5) * 2" expected = 16 total, rolls = grammar.evaluate(query) print(total) print(rolls) assert total == expected assert ((sum(int(roll) for roll in rolls) + 5) * 2) == expected query = "3d1 + 2" expected = 5 total, rolls = grammar.evaluate(query) assert total == expected assert (sum(int(roll) for roll in rolls) + 2) == expected
def test_evaluation(self): """ Evaluate the grammar on all examples, collecting metrics: semantics oracle accuracy: # of examples where one parse or the other was correct. semantics accuracy: # of examples where parse at position 0 was correct. """ arithmetic_grammar = Grammar(self.arithmetic_rules) from executor import Executor examples = self.one_parse_examples + self.two_parse_examples self.assertEqual(17, len(examples)) metrics = arithmetic_grammar.evaluate(executor=Executor.execute, examples=examples, print_examples=False) # in every example we produced some correct parse. self.assertEqual(metrics['semantics oracle accuracy'], 17) # three examples where the parse at position 0 was not correct self.assertEqual(metrics['semantics accuracy'], 14)