class _TestAgentInterface(unittest.TestCase): def setUp(self): self.env = ABC(discrete=self.discrete, partially_observable=self.partially_observable, episodic=self.episodic) def create_agent(self, env): raise NotImplementedError() def test_save_load(self): a = self.create_agent(self.env) dirname = tempfile.mkdtemp() a.save(dirname) self.assertTrue(os.path.exists(dirname)) b = self.create_agent(self.env) b.load(dirname) def test_run_episode(self): agent = self.create_agent(self.env) done = False obs = self.env.reset() t = 0 while t < 10 and not done: a = agent.act(obs) obs, r, done, info = self.env.step(a) t += 1 @testing.attr.slow def test_train(self): agent = self.create_agent(self.env) train_agent(agent=agent, env=self.env, steps=2000, outdir=tempfile.mkdtemp(), max_episode_len=10)
def make_env(process_idx, test): size = 2 return ABC(size=size, discrete=discrete, episodic=episodic or test, partially_observable=self.use_lstm, deterministic=test)
def make_env_and_successful_return(self, test): env = ABC( discrete=self.discrete, episodic=self.episodic or test, deterministic=test, ) return env, 1
def make_env_and_successful_return(self, test): env = ABC( discrete=self.discrete, episodic=self.episodic or test, deterministic=test, partially_observable=self.recurrent, ) return env, 1
def make_env_and_successful_return(self, test): return ABC(discrete=self.discrete, deterministic=test), 1
def make_env_and_successful_return(self, test): return ABC(discrete=False, partially_observable=True, deterministic=test), 1
def make_env(process_idx, test): return ABC(episodic=self.episodic or test, partially_observable=self.use_lstm, deterministic=test)
def setUp(self): self.env = ABC(discrete=self.discrete, partially_observable=self.partially_observable, episodic=self.episodic)
def make_env(): return ABC(discrete=self.discrete, deterministic=test)
def make_env(): return ABC( discrete=self.discrete, deterministic=test, episodic=self.episodic, )