def test_state_duplicate_add(): g = StateGenerator('name', 'reward') s0 = g.generate_state(name='s0', reward=1) s1 = g.generate_state(name='s1', reward=-1) m = MDPModel('Test') m.add_states([s0, s1]) with pytest.raises(StateAlreadyPresent): m.add_states([s0])
def test_state_not_present(): g = StateGenerator('name', 'reward') s0 = g.generate_state(name='s0', reward=1) s1 = g.generate_state(name='s1', reward=-1) s2 = g.generate_state(name='s2', reward=0) s3 = g.generate_state(name='s3', reward=0) m = MDPModel('Test') m.add_states([s0, s2]) with pytest.raises(StateNotPresent): list(m.get_states([s1.id, s3.id])) assert type(next(m.get_states(s0.id))) == State
def test_init_state_initialization(): s = StateGenerator('name', 'reward') s0 = s.generate_state(name='s0', reward=1) s1 = s.generate_state(name='s1', reward=-1) p = {s0: 0.5, s1: 0.45} m = MDPModel('Test') m.add_states([s0]) with pytest.raises(StateNotPresent): m.add_init_states(p) m.add_states([s1]) with pytest.raises(ProbabilityError): m.add_init_states(p) p[s1] = 0.5 m.add_init_states(p)
def _create_mdp(self, path, size, dim): self.path = path def _fill_reward(x): if x == size - 1: return 1 else: return 0 state_generator = StateGenerator('name', 'reward') action_generator = ActionGenerator('name') states = [ state_generator.generate_state(name="s" + str(x), reward=_fill_reward(x)) for x in range(size) ] actions = [ action_generator.generate_action(name="A{}".format(name)) for name in range(dim) ] mdp = MDPModel('MdpEnvLinStatic') \ .add_states(states) \ .add_actions(actions) \ .add_init_states({states[0]: 1}) \ .add_final_states([states[size - 1]], 1000) for i in range(size - 2): actions_r = np.random.choice(dim, 2, replace=False) for action in range(dim): if action not in actions_r: mdp.add_transition(states[i + 1], actions[action], {states[i + 1]: 1}) elif action == actions_r[0]: mdp.add_transition(states[i + 1], actions[actions_r[0]], {states[i]: 1}) elif action == actions_r[1]: mdp.add_transition(states[i + 1], actions[actions_r[1]], {states[i + 2]: 1}) actions_r = np.random.choice(dim, 1, replace=False) for action in range(dim): if action not in actions_r: mdp.add_transition(states[0], actions[action], {states[0]: 1}) else: mdp.add_transition(states[0], actions[action], {states[1]: 1}) # Visualize the MDP mdp.finalize() # mdp.visualize(file="{0}/{1}".format(path, self.__class__.__name__)) return mdp
def test_initialize(): m = MDPModel('Test') with pytest.raises(InitStateNotSet): m.finalize() s = StateGenerator('name', 'reward') s0 = s.generate_state(name='s0', reward=1) s1 = s.generate_state(name='s1', reward=1) a = ActionGenerator('name', 'reward') a0 = a.generate_action(name='a0', reward=1) a1 = a.generate_action(name='a1', reward=1) p = {s0: 0.5, s1: 0.5} m.add_states([s0]) m.add_actions([a0]) m.add_init_states({s0: 1}) with pytest.raises(MDPModelNotFinalized): m.initialize() m.finalize() m.initialize()
def test_faulty_transition_entry(): s = StateGenerator('name', 'reward') s0 = s.generate_state(name='s0', reward=1) s1 = s.generate_state(name='s1', reward=-1) s2 = s.generate_state(name='s2', reward=0) s3 = s.generate_state(name='s3', reward=0) a = ActionGenerator('name', 'reward') a0 = a.generate_action(name='a0', reward=1) a1 = a.generate_action(name='a1', reward=-1) a2 = a.generate_action(name='a2', reward=-1) m = MDPModel('Test') m.add_states([s0, s1, s3]) m.add_actions([a0, a1]) with pytest.raises(StateNotPresent): m.add_transition(s2, a0, {s0.id: 0.5, s1.id: 0.5}) with pytest.raises(ActionNotPresent): m.add_transition(s1, a2, {s0.id: 0.5, s1.id: 0.5}) with pytest.raises(ProbabilityError): m.add_transition(s1, a0, {s0: 0.5, s1: 0.45}) m.add_transition(s1, a0, {s0: 0.5, s1: 0.5})
def _create_mdp(self, path, prob, size): def _fill_reward(x): if x == 0: return 0.1 elif x == size - 1: return 1 else: return 0 state_generator = StateGenerator('name', 'reward') action_generator = ActionGenerator('name') states = [ state_generator.generate_state(name="s" + str(x), reward=_fill_reward(x)) for x in range(size) ] actions = [ action_generator.generate_action(name=name) for name in ['LEFT', 'RIGHT'] ] # Initializing the states of the mdp with binomial distribution init_states = dict( zip(states, [0] + [binom.pmf(i, size - 3, prob) for i in range(size - 2)] + [0])) mdp = MDPModel('MdpEnvLinVariable') \ .add_states(states) \ .add_actions(actions) \ .add_init_states(init_states) \ .add_final_states([states[x] for x in [0, size - 1]], 100) for i in range(size - 2): mdp \ .add_transition(states[i + 1], actions[0], {states[i]: 1}) \ .add_transition(states[i + 1], actions[1], {states[i + 2]: 1}) # Visualize the MDP mdp.finalize() # mdp.visualize(file="{0}/{1}".format(path, self.__class__.__name__)) return mdp
def _create_mdp(self, path, prob, size): self.path = path def _fill_reward(x): if x == 0: return 0.1 elif x == size - 1: return 1 else: return 0 state_generator = StateGenerator('name', 'reward') action_generator = ActionGenerator('name') states = [ state_generator.generate_state(name="s" + str(x), reward=_fill_reward(x)) for x in range(size) ] actions = [ action_generator.generate_action(name=name) for name in ['LEFT', 'RIGHT'] ] mdp = MDPModel('MdpEnvLinStatic') \ .add_states(states) \ .add_actions(actions) \ .add_init_states({states[1 + np.random.binomial(size - 3, prob)]: 1}) \ .add_final_states([states[x] for x in [0, size - 1]], 100) for i in range(size - 2): mdp \ .add_transition(states[i + 1], actions[0], {states[i]: 1}) \ .add_transition(states[i + 1], actions[1], {states[i + 2]: 1}) # Visualize the MDP mdp.finalize() # mdp.visualize(file="{0}/{1}".format(path, self.__class__.__name__)) return mdp
def test_transitions(): s = StateGenerator('name', 'reward') s0 = s.generate_state(name='s0', reward=1) s1 = s.generate_state(name='s1', reward=-1) s2 = s.generate_state(name='s2', reward=0) s3 = s.generate_state(name='s3', reward=0) s4 = s.generate_state(name='s4', reward=0) a = ActionGenerator('name', 'reward') a0 = a.generate_action(name='a0', reward=1) a1 = a.generate_action(name='a1', reward=-1) a2 = a.generate_action(name='a2', reward=-1) m = MDPModel('Test') m.add_states([s0, s1, s2, s3, s4]).add_actions([a0, a1, a2]).add_init_states({s0: 1}) \ .add_transition(s0, a0, {s1: 1}).add_transition(s1, a0, {s2: 1}) \ .add_transition(s2, a1, {s3: 1}).add_transition(s3, a2, {s4: 1}).finalize() assert m.initialize() == s0 assert m.transition(a0) == s1 with pytest.raises(InvalidAction): m.transition(a1) assert m.transition(a0) == s2 assert m.transition(a1) == s3 assert m.transition(a2) == s4 assert m.is_terminated() == True
def test_argument_mismatch(): g = StateGenerator('name', 'reward', 'probability') with pytest.raises(StateParameterLengthMismatch): g.generate_state(name="Test", reward=1)
def test_argument_has_name(): g = StateGenerator('reward', 'probability', 'order') with pytest.raises(StateNameNotProvided): g.generate_state(reward=1, probability=0.5, order=0)
def test_argument_undefined(): g = StateGenerator('name', 'reward', 'probability') with pytest.raises(StateParameterUndefined): g.generate_state(name="Test", reward=1, prob=0.5)
def _create_mdp(self, path, prob, size): def _fill_reward(x): if x == (0, 0): return 0.1 elif x == (size - 1, size - 1): return 1 else: return 0 state_generator = StateGenerator('name', 'reward') action_generator = ActionGenerator('name') states = [[ state_generator.generate_state(name="s" + str(x) + "-" + str(y), reward=_fill_reward((x, y))) for x in range(size) ] for y in range(size)] actions = [ action_generator.generate_action(name=name) for name in ['LEFT', 'RIGHT', 'DOWN', 'UP'] ] init_states = dict( zip([item for sublist in states for item in sublist], [ binom.pmf(i, size - 1, prob) * binom.pmf(j, size - 1, prob) for i in range(size) for j in range(size) ])) mdp = MDPModel('MdpEnvPlanarStatic') \ .add_states([item for sublist in states for item in sublist]) \ .add_actions(actions) \ .add_init_states(init_states) \ .add_final_states([states[x][x] for x in [0, size - 1]], 1000) for j in range(size): mdp \ .add_transition(states[j][0], actions[0], {states[j][0]: 1}) \ .add_transition(states[j][size - 1], actions[1], {states[j][size - 1]: 1}) for i in range(-1, size - 1): if i == -1: mdp.add_transition(states[j][i + 1], actions[1], {states[j][i + 2]: 1}) elif i == size - 2: mdp.add_transition(states[j][i + 1], actions[0], {states[j][i]: 1}) else: mdp \ .add_transition(states[j][i + 1], actions[0], {states[j][i]: 1}) \ .add_transition(states[j][i + 1], actions[1], {states[j][i + 2]: 1}) for j in range(size): mdp \ .add_transition(states[0][j], actions[2], {states[0][j]: 1}) \ .add_transition(states[size - 1][j], actions[3], {states[size - 1][j]: 1}) for i in range(-1, size - 1): if i == -1: mdp.add_transition(states[i + 1][j], actions[3], {states[i + 2][j]: 1}) elif i == size - 2: mdp.add_transition(states[i + 1][j], actions[2], {states[i][j]: 1}) else: mdp \ .add_transition(states[i + 1][j], actions[2], {states[i][j]: 1}) \ .add_transition(states[i + 1][j], actions[3], {states[i + 2][j]: 1}) # Visualize the MDP mdp.finalize() # mdp.visualize(file="{0}/{1}".format(path, self.__class__.__name__)) return mdp