def test_sample_only_preload(self): action_space = ActionSpace(self.push_groups, disable_push=True) action_space.seed(10000) all_actions = [] num_iters = 150 for _ in range(num_iters): action_id = action_space.sample() action_type, push_action, preload_action = (action_id[0], tuple(action_id[1:4]), tuple(action_id[4:])) if action_type == 0: assert action_id == NOOP_ACTION_ID, "should be a noop" all_actions.append((0,)) if 1 <= action_type <= 4: assert push_action == NOOP_PUSH_ACTION_ID, "push should be a noop" assert preload_action != NOOP_PRELOAD_ACTION_ID, "preload should not be noop" assert action_space.push_space.contains(push_action) all_actions.append((1,)) if 5 <= action_type <= 8: assert False, "action_type == 2 should not be possible" # Should return approximately 10-40 proportion of each type of action action_types = Counter(a for (a, *_) in all_actions) assert (num_iters * 0.2 * 0.8) <= action_types[0] <= (num_iters * 0.2 * 1.2) assert (num_iters * 0.8 * 0.9) <= action_types[1] <= (num_iters * 0.8 * 1.1) assert sum(action_types.values()) == num_iters == len(all_actions)
def test_sample_returns_infinitely_when_actions_not_used(self): action_space = ActionSpace(self.push_groups) action_space.seed(20000) all_actions = [] num_iters = 300 for _ in range(num_iters): action_id = action_space.sample() action_type, push_action, preload_action = (action_id[0], tuple(action_id[1:4]), tuple(action_id[4:])) if action_type == 0: assert action_id == NOOP_ACTION_ID, "should be a noop" all_actions.append((0,)) if 1 <= action_type <= 4: assert push_action != NOOP_PUSH_ACTION_ID, "push should not be a noop" assert preload_action == NOOP_PRELOAD_ACTION_ID, "preload should be noop" assert action_space.push_space.contains(push_action) all_actions.append((1,)) if 5 <= action_type <= 6: assert push_action == NOOP_PUSH_ACTION_ID, "preload should not be noop" assert preload_action != NOOP_PRELOAD_ACTION_ID, "push should be a noop" assert action_space.preload_space.contains(preload_action) all_actions.append((2,)) # Should return approximately 1-4-1 proportion of each type of action action_types = Counter(a for (a, *_) in all_actions) assert (num_iters * (1.0 / 6.0) * 0.75) <= action_types[0] <= (num_iters * (1.0 / 6.0) * 1.25) assert (num_iters * (4.0 / 6.0) * 0.75) <= action_types[1] <= (num_iters * (4.0 / 6.0) * 1.25) assert (num_iters * (1.0 / 6.0) * 0.75) <= action_types[2] <= (num_iters * (1.0 / 6.0) * 1.25) assert sum(action_types.values()) == num_iters == len(all_actions)
def test_init_disabled_preload(self): action_space = ActionSpace(self.push_groups, disable_preload=True) assert not action_space.disable_push assert action_space.disable_preload assert action_space.num_action_types == 5 assert len(action_space.spaces) == 6 assert action_space.spaces[0].n == 5 assert action_space.spaces[1].n == action_space.push_space.spaces[0].n assert action_space.spaces[2].n == action_space.push_space.spaces[1].n assert action_space.spaces[3].n == action_space.push_space.spaces[2].n assert action_space.spaces[4].n == action_space.preload_space.spaces[0].n assert action_space.spaces[5].n == action_space.preload_space.spaces[1].n
def test_sample_returns_all_actions_uniquely_when_used(self): action_space = ActionSpace(self.push_groups) action_space.seed(10000) num_iters = 150 num_non_noop = 0 assert not action_space.empty() all_actions = set() for i in range(num_iters): action_id = action_space.sample() action_type, *_ = action_id if action_type == 0 and action_space.empty(): break if action_type != 0: assert action_id not in all_actions num_non_noop += 1 all_actions.add(action_id) action_space.use_action(action_space.decode_action(action_id)) else: # If there was no break, sample did not stop print(len(all_actions)) assert False, "sample returned too many items" assert len(all_actions) == num_non_noop assert action_space.empty()
def test_init_raises_when_both_push_and_preload_disabled(self): with pytest.raises(AssertionError): ActionSpace(self.push_groups, disable_preload=True, disable_push=True)