def setUp(self): self.model = ChainModel() self.act_a = self.model.act_a self.act_b = self.model.act_b self.s1 = self.model.get_state_by_id(1) self.s2 = self.model.get_state_by_id(2) self.s3 = self.model.get_state_by_id(3)
class HypothesisTest(unittest.TestCase): def setUp(self): self.model = ChainModel() self.act_a = self.model.act_a self.act_b = self.model.act_b self.s1 = self.model.get_state_by_id(1) self.s2 = self.model.get_state_by_id(2) self.s3 = self.model.get_state_by_id(3) def test_init_hypothesis(self): u0 = 1 std0 = 1 hypothesis = Hypothesis.draw_init_hypothesis(self.model, u0, std0) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s1, self.act_a)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s1, self.act_b)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s1, self.act_a)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s2, self.act_b)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s2, self.act_a)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s3, self.act_b)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s3, self.act_a)) next_states = self.model.get_next_states(self.s1) s = 0 for next_state in next_states: p = hypothesis.get_transition(self.s1, self.act_a, next_state) s += p self.assertGreater(p, 0) self.assertAlmostEqual(s, 1) def test_draw_hypothesis(self): keepr = Keeper() for i in range(1000): keepr.update_reward_and_transition(self.s1, self.act_a, self.s2, 1.4) hypothesis = Hypothesis.draw_hypothesis(self.model, keepr) for next_state in self.model.get_next_states(self.s1): self.assertGreater( hypothesis.get_transition(self.s1, self.act_a, next_state), 0) places = 1 self.assertAlmostEqual( hypothesis.get_transition(self.s1, self.act_a, self.s2), 1, places) self.assertAlmostEqual(hypothesis.get_reward(self.s1, self.act_a), 1.4, places)
class KeeperTest(unittest.TestCase): def setUp(self): self.keepr = Keeper() self.model = ChainModel() self.act_a = self.model.act_a self.act_b = self.model.act_b self.s1 = self.model.get_state_by_id(1) self.s2 = self.model.get_state_by_id(2) self.s3 = self.model.get_state_by_id(3) def test_sums(self): self.keepr.update_reward_sums(self.s1, self.act_a, 2) self.keepr.update_reward_sums(self.s1, self.act_a, 10) self.keepr.update_reward_sums(self.s2, self.act_b, 8) self.keepr.update_reward_sums(self.s2, self.act_b, 7) self.assertEqual(self.keepr.get_sum_reward(self.s1, self.act_a), 12) self.assertEqual(self.keepr.get_sum_reward(self.s2, self.act_b), 15) self.assertEqual(self.keepr.get_sum_reward(self.s1, self.act_b), 0) self.assertEqual(self.keepr.get_sum_reward_squares(self.s1, self.act_a), 104) self.assertEqual(self.keepr.get_sum_reward_squares(self.s2, self.act_b), 113) self.assertEqual(self.keepr.get_sum_reward_squares(self.s1, self.act_b), 0) def test_reward_model(self): self.keepr.update_reward_and_transition(self.s1, self.act_a, self.s2, 2) self.keepr.update_reward_and_transition(self.s1, self.act_a, self.s2, 3) self.keepr.update_reward_and_transition(self.s2, self.act_b, self.s2, 1) self.assertEqual(self.keepr.get_reward(self.s1, self.act_a, self.s2), 2.5, "This") self.assertEqual(self.keepr.get_reward(self.s2, self.act_b, self.s2), 1.0) self.assertEqual(self.keepr.get_reward(self.s1, self.act_a, self.s3), 0) self.keepr.update_reward_and_transition(self.s1, self.act_a, self.s3, 1) self.assertEqual(2.0/3, self.keepr.get_var_reward(self.s1, self.act_a)) self.keepr.update_reward_and_transition(self.s1, self.act_a, self.s3, 1) self.assertEqual(0.6875, self.keepr.get_var_reward(self.s1, self.act_a)) def test_transition_model(self): self.keepr.update_transition(self.s1, self.act_a, self.s2) self.keepr.update_transition(self.s1, self.act_a, self.s2) self.keepr.update_transition(self.s1, self.act_a, self.s3) self.keepr.update_transition(self.s1, self.act_b, self.s2) self.assertEqual(self.keepr.get_visit_count(self.s1), 4) self.assertEqual(self.keepr.get_visit_count(self.s1, self.act_a), 3) self.assertEqual(self.keepr.get_visit_count(self.s1, self.act_b), 1) self.assertEqual(self.keepr.get_transition(self.s1, self.act_a, self.s2), 2.0/3) self.assertEqual(self.keepr.get_transition(self.s1, self.act_a, self.s3), 1.0/3) self.assertEqual(self.keepr.get_transition(self.s1, self.act_b, self.s2), 1.0) self.assertEqual(self.keepr.get_transition(self.s1, self.act_b, self.s3), 0)
def parse_model(s): if s == 'cm': return ChainModel() elif s == 'scm': return SlipperyChainModel() elif s == 'lm': return LoopModel() else: invalid()
class HypothesisTest(unittest.TestCase): def setUp(self): self.model = ChainModel() self.act_a = self.model.act_a self.act_b = self.model.act_b self.s1 = self.model.get_state_by_id(1) self.s2 = self.model.get_state_by_id(2) self.s3 = self.model.get_state_by_id(3) def test_init_hypothesis(self): u0 = 1 std0 = 1 hypothesis = Hypothesis.draw_init_hypothesis(self.model, u0, std0) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s1, self.act_a)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s1, self.act_b)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s1, self.act_a)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s2, self.act_b)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s2, self.act_a)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s3, self.act_b)) self.assertEqual((u0, std0), hypothesis.get_reward_table(self.s3, self.act_a)) next_states = self.model.get_next_states(self.s1) s = 0 for next_state in next_states: p = hypothesis.get_transition(self.s1, self.act_a, next_state) s += p self.assertGreater(p, 0) self.assertAlmostEqual(s, 1) def test_draw_hypothesis(self): keepr = Keeper() for i in range(1000): keepr.update_reward_and_transition(self.s1, self.act_a, self.s2, 1.4) hypothesis = Hypothesis.draw_hypothesis(self.model, keepr) for next_state in self.model.get_next_states(self.s1): self.assertGreater(hypothesis.get_transition(self.s1, self.act_a, next_state), 0) places = 1 self.assertAlmostEqual(hypothesis.get_transition(self.s1, self.act_a, self.s2), 1, places) self.assertAlmostEqual(hypothesis.get_reward(self.s1, self.act_a), 1.4, places)
def get_model(model_name): if model_name == "Chain": return ChainModel() elif model_name == "SlipperyChain": return SlipperyChainModel() elif model_name == "Chain2": return ChainModel2() elif model_name == "Loop": return LoopModel() elif model_name == "LoopDeadEnd": return LoopModelDeadEnd() elif model_name == "LoopDiffTrans": return LoopModelDiffTrans() elif model_name == "SpecialLoop": return SpecialLoopModel() else: raise Exception(model_name+ " not found")
class KeeperTest(unittest.TestCase): def setUp(self): self.keepr = Keeper() self.model = ChainModel() self.act_a = self.model.act_a self.act_b = self.model.act_b self.s1 = self.model.get_state_by_id(1) self.s2 = self.model.get_state_by_id(2) self.s3 = self.model.get_state_by_id(3) def test_sums(self): self.keepr.update_reward_sums(self.s1, self.act_a, 2) self.keepr.update_reward_sums(self.s1, self.act_a, 10) self.keepr.update_reward_sums(self.s2, self.act_b, 8) self.keepr.update_reward_sums(self.s2, self.act_b, 7) self.assertEqual(self.keepr.get_sum_reward(self.s1, self.act_a), 12) self.assertEqual(self.keepr.get_sum_reward(self.s2, self.act_b), 15) self.assertEqual(self.keepr.get_sum_reward(self.s1, self.act_b), 0) self.assertEqual( self.keepr.get_sum_reward_squares(self.s1, self.act_a), 104) self.assertEqual( self.keepr.get_sum_reward_squares(self.s2, self.act_b), 113) self.assertEqual( self.keepr.get_sum_reward_squares(self.s1, self.act_b), 0) def test_reward_model(self): self.keepr.update_reward_and_transition(self.s1, self.act_a, self.s2, 2) self.keepr.update_reward_and_transition(self.s1, self.act_a, self.s2, 3) self.keepr.update_reward_and_transition(self.s2, self.act_b, self.s2, 1) self.assertEqual(self.keepr.get_reward(self.s1, self.act_a, self.s2), 2.5, "This") self.assertEqual(self.keepr.get_reward(self.s2, self.act_b, self.s2), 1.0) self.assertEqual(self.keepr.get_reward(self.s1, self.act_a, self.s3), 0) self.keepr.update_reward_and_transition(self.s1, self.act_a, self.s3, 1) self.assertEqual(2.0 / 3, self.keepr.get_var_reward(self.s1, self.act_a)) self.keepr.update_reward_and_transition(self.s1, self.act_a, self.s3, 1) self.assertEqual(0.6875, self.keepr.get_var_reward(self.s1, self.act_a)) def test_transition_model(self): self.keepr.update_transition(self.s1, self.act_a, self.s2) self.keepr.update_transition(self.s1, self.act_a, self.s2) self.keepr.update_transition(self.s1, self.act_a, self.s3) self.keepr.update_transition(self.s1, self.act_b, self.s2) self.assertEqual(self.keepr.get_visit_count(self.s1), 4) self.assertEqual(self.keepr.get_visit_count(self.s1, self.act_a), 3) self.assertEqual(self.keepr.get_visit_count(self.s1, self.act_b), 1) self.assertEqual( self.keepr.get_transition(self.s1, self.act_a, self.s2), 2.0 / 3) self.assertEqual( self.keepr.get_transition(self.s1, self.act_a, self.s3), 1.0 / 3) self.assertEqual( self.keepr.get_transition(self.s1, self.act_b, self.s2), 1.0) self.assertEqual( self.keepr.get_transition(self.s1, self.act_b, self.s3), 0)