def setUp(self): self.env = RoomEnv(TestRoomSpec()) self.model_tests = [] u, d, l, r, s = get_directions() policy_left_right = get_two_action_uniform_policy( self.env.nS, self.env.nA, l, r) state_middle = RoomState((2, 1), {(2, 1): False}) state_left_vase = RoomState((1, 1), {(2, 1): True}) state_right_vase = RoomState((3, 1), {(2, 1): True}) state_left_novase = RoomState((1, 1), {(2, 1): False}) state_right_novase = RoomState((3, 1), {(2, 1): False}) forward_probs = np.zeros(self.env.nS) forward_probs[self.env.get_num_from_state(state_left_novase)] = 0.5 forward_probs[self.env.get_num_from_state(state_right_novase)] = 0.5 backward_probs = np.zeros(self.env.nS) backward_probs[self.env.get_num_from_state(state_left_vase)] = 0.25 backward_probs[self.env.get_num_from_state(state_right_vase)] = 0.25 backward_probs[self.env.get_num_from_state(state_left_novase)] = 0.25 backward_probs[self.env.get_num_from_state(state_right_novase)] = 0.25 transitions = [(state_middle, 1, forward_probs, backward_probs)] unif = np.ones(self.env.nS) / self.env.nS self.model_tests.append({ "policy": policy_left_right, "transitions": transitions, "initial_state_distribution": unif, }) self.setUpDeterministic()
def setUp(self): self.env = TrainEnv(TestTrainSpec()) u, d, l, r, s = get_directions() self.trajectories = [ [ (u, (TrainState((0, 3), {(1, 2): True}, (3, 1), True), 1.0)), (u, (TrainState((0, 2), {(1, 2): True}, (3, 2), True), 1.0)), (u, (TrainState((0, 1), {(1, 2): True}, (2, 2), True), 1.0)), (r, (TrainState((1, 1), {(1, 2): True}, (2, 1), True), 1.0)), (u, (TrainState((1, 0), {(1, 2): True}, (3, 1), True), 1.0)), (r, (TrainState((2, 0), {(1, 2): True}, (3, 2), True), 1.0)), (s, (TrainState((2, 0), {(1, 2): True}, (2, 2), True), 1.0)), (s, (TrainState((2, 0), {(1, 2): True}, (2, 1), True), 1.0)), ], [ (u, (TrainState((0, 3), {(1, 2): True}, (3, 1), True), 1.0)), (r, (TrainState((1, 3), {(1, 2): True}, (3, 2), True), 1.0)), (r, (TrainState((2, 3), {(1, 2): True}, (2, 2), True), 1.0)), ], [ (r, (TrainState((1, 4), {(1, 2): True}, (3, 1), True), 1.0)), (r, (TrainState((2, 4), {(1, 2): True}, (3, 2), True), 1.0)), (r, (TrainState((3, 4), {(1, 2): True}, (2, 2), True), 1.0)), (u, (TrainState((3, 3), {(1, 2): True}, (2, 1), True), 1.0)), (u, (TrainState((3, 2), {(1, 2): True}, (3, 1), True), 1.0)), (s, (TrainState((3, 2), {(1, 2): True}, (3, 2), False), 1.0)), (s, (TrainState((3, 2), {(1, 2): True}, (3, 2), False), 1.0)), (u, (TrainState((3, 1), {(1, 2): True}, (3, 2), False), 1.0)), (l, (TrainState((2, 1), {(1, 2): True}, (3, 2), False), 1.0)), ], ]
def setUp(self): self.env = ApplesEnv(TestApplesSpec()) u, d, l, r, s = get_directions() i = 5 # interact action def make_state(agent_pos, tree1, tree2, bucket, carrying_apple): tree_states = {(0, 0): tree1, (2, 0): tree2} bucket_state = {(1, 2): bucket} return ApplesState(agent_pos, tree_states, bucket_state, carrying_apple) self.trajectories = [ [ (u, (make_state((u, 0, 1), True, True, 0, False), 1.0)), (i, (make_state((u, 0, 1), False, True, 0, True), 1.0)), (r, (make_state((r, 1, 1), False, True, 0, True), 3.0 / 4)), (d, (make_state((d, 1, 1), False, True, 0, True), 3.0 / 4)), (i, (make_state((d, 1, 1), False, True, 1, False), 3.0 / 4)), (u, (make_state((u, 1, 0), False, True, 1, False), 3.0 / 4)), (r, (make_state((r, 1, 0), False, True, 1, False), 3.0 / 4)), (i, (make_state((r, 1, 0), False, False, 1, True), 3.0 / 4)), (d, (make_state((d, 1, 1), False, False, 1, True), 9.0 / 16)), (i, (make_state((d, 1, 1), True, False, 2, False), 3.0 / 16)), (s, (make_state((d, 1, 1), True, True, 2, False), 1.0 / 4)), ] ]
def setUp(self): self.env = BatteriesEnv(TestBatteriesSpec()) u, d, l, r, s = get_directions() def make_state(agent, train, life, battery_vals, carrying_battery): battery_present = dict(zip([(0, 0), (4, 4)], battery_vals)) return BatteriesState(agent, train, life, battery_present, carrying_battery) self.trajectories = [[ (u, (make_state((0, 3), (3, 1), 7, [True, True], False), 1.0)), (u, (make_state((0, 2), (3, 2), 6, [True, True], False), 1.0)), (u, (make_state((0, 1), (2, 2), 5, [True, True], False), 1.0)), (u, (make_state((0, 0), (2, 1), 4, [False, True], True), 1.0)), (r, (make_state((1, 0), (3, 1), 3, [False, True], True), 1.0)), (r, (make_state((2, 0), (3, 2), 2, [False, True], True), 1.0)), (s, (make_state((2, 0), (2, 2), 1, [False, True], True), 1.0)), (s, (make_state((2, 0), (2, 1), 0, [False, True], True), 1.0)), (d, (make_state((2, 1), (3, 1), 9, [False, True], False), 1.0)), (u, (make_state((2, 0), (3, 2), 8, [False, True], False), 1.0)), ]]
def setUp(self): self.env = RoomEnv(TestRoomSpec()) u, d, l, r, s = get_directions() self.trajectories = [ [ (l, (RoomState((1, 2), {(2, 1): True}), 1.0)), (u, (RoomState((1, 1), {(2, 1): True}), 1.0)), (u, (RoomState((1, 0), {(2, 1): True}), 1.0)), (r, (RoomState((2, 0), {(2, 1): True}), 1.0)), ], [ (u, (RoomState((2, 1), {(2, 1): False}), 1.0)), (u, (RoomState((2, 0), {(2, 1): False}), 1.0)), ], [ (r, (RoomState((3, 2), {(2, 1): True}), 1.0)), (u, (RoomState((3, 1), {(2, 1): True}), 1.0)), (l, (RoomState((2, 1), {(2, 1): False}), 1.0)), (d, (RoomState((2, 2), {(2, 1): False}), 1.0)), ], ]
def setUp(self): self.env = ApplesEnv(TestApplesSpec()) self.model_tests = [] _, _, _, _, stay = get_directions() policy_stay = np.zeros((self.env.nS, self.env.nA)) policy_stay[:, stay] = 1 def make_state(apple1_present, apple2_present): return ApplesState( agent_pos=(0, 1, 1), tree_states={(0, 0): apple1_present, (2, 0): apple2_present}, bucket_states={(1, 2): 0}, carrying_apple=False, ) state_0_0 = make_state(False, False) state_0_1 = make_state(False, True) state_1_0 = make_state(True, False) state_1_1 = make_state(True, True) forward_probs = np.zeros(self.env.nS) forward_probs[self.env.get_num_from_state(state_1_1)] = 1 backward_probs = np.zeros(self.env.nS) backward_probs[self.env.get_num_from_state(state_0_0)] = 0.04 backward_probs[self.env.get_num_from_state(state_0_1)] = 0.16 backward_probs[self.env.get_num_from_state(state_1_0)] = 0.16 backward_probs[self.env.get_num_from_state(state_1_1)] = 0.64 transitions = [(state_1_1, 1, forward_probs, backward_probs)] unif = np.ones(self.env.nS) / self.env.nS self.model_tests.append( { "policy": policy_stay, "transitions": transitions, "initial_state_distribution": unif, } )