def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS): np.random.seed(0) np_random.seed(0) env = DummyVecEnv([env_fn]) with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto( allow_soft_placement=True)).as_default(): tf.set_random_seed(0) model = learn_fn(env) sum_rew = 0 done = True for i in range(n_trials): if done: obs = env.reset() state = model.initial_state if state is not None: a, v, state, _ = model.step(obs, S=state, M=[False]) else: a, v, _, _ = model.step(obs) obs, rew, done, _ = env.step(a) sum_rew += float(rew) print("Reward in {} trials is {}".format(n_trials, sum_rew)) assert sum_rew > min_reward_fraction * n_trials, \ 'sum of rewards {} is less than {} of the total number of trials {}'.format(sum_rew, min_reward_fraction, n_trials)
def test_identity(learn_func): ''' Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) ''' np.random.seed(0) np_random.seed(0) random.seed(0) env = DummyVecEnv([lambda: IdentityEnv(10)]) with tf.Graph().as_default(), tf.Session().as_default(): tf.set_random_seed(0) model = learn_func(env) N_TRIALS = 1000 sum_rew = 0 obs = env.reset() for i in range(N_TRIALS): obs, rew, done, _ = env.step(model.step(obs)[0]) sum_rew += rew assert sum_rew > 0.9 * N_TRIALS