def test_base(self): for i, world in enumerate(self.worlds): fname = os.path.join(self.base_dir, "world" + world + ".csv") agent = GridWorldAgent(fname, **self.settings) time, steps = agent.learn(episodes=500) self.assertTrue(time < self.max_time, "world {}: {} sec".format(world, time)) self.assertTrue(steps < 1.5 * self.optimal[i], "world {}: {} steps".format(world, steps))
def test_base(self): for i, world in enumerate(self.worlds): fname = os.path.join(self.base_dir, 'world' + world + '.csv') agent = GridWorldAgent(fname, **self.settings) time, steps = agent.learn(episodes=500) self.assertTrue(time < self.max_time, 'world {}: {} sec'.format(world, time)) self.assertTrue(steps < 1.5 * self.optimal[i], 'world {}: {} steps'.format(world, steps))
def test_dyna(self): self.settings['dyna'] = 100 for i, world in enumerate(self.worlds[:2]): fname = os.path.join(self.base_dir, 'world' + world + '.csv') agent = GridWorldAgent(fname, **self.settings) time, steps = agent.learn(episodes=50) self.assertTrue(time < self.max_time, 'world {}: {} sec'.format(world, time)) self.assertTrue(steps < 1.5 * self.optimal[i], 'world {}: {} steps'.format(world, steps))
def test_dyna(self): self.settings["dyna"] = 200 self.settings["rar"] = 0.5 self.settings["radr"] = 0.99 for i, world in enumerate(self.worlds[:2]): fname = os.path.join(self.base_dir, "world" + world + ".csv") agent = GridWorldAgent(fname, **self.settings) time, steps = agent.learn(episodes=50) self.assertTrue(time < self.max_dyna_time, "world {}: {} sec".format(world, time)) self.assertTrue(steps < 1.5 * self.optimal[i], "world {}: {} steps".format(world, steps))
def test_dyna(self): self.settings['dyna'] = 200 self.settings['rar'] = 0.5 self.settings['radr'] = 0.99 for i, world in enumerate(self.worlds[:2]): fname = os.path.join(self.base_dir, 'world' + world + '.csv') agent = GridWorldAgent(fname, **self.settings) time, steps = agent.learn(episodes=50) self.assertTrue(time < self.max_dyna_time, 'world {}: {} sec'.format(world, time)) self.assertTrue(steps < 1.5 * self.optimal[i], 'world {}: {} steps'.format(world, steps))