def test_no_crash_variable_duration(self): config = self._define_config() with config.unlocked: config.env = functools.partial( tools.MockEnvironment, observ_shape=(2, 3), action_shape=(3,), min_duration=5, max_duration=25) config.max_length = 25 config.steps = 200 config.network = networks.RecurrentGaussianPolicy for score in train.train(config, env_processes=False): float(score)
def test_no_crash_ant(self): nets = networks.ForwardGaussianPolicy, networks.RecurrentGaussianPolicy for network in nets: config = self._define_config() with config.unlocked: config.env = 'Ant-v1' config.max_length = 200 config.steps = 1000 config.network = network for score in train.train(config, env_processes=True): float(score)
def test_no_crash_variable_duration(self): config = self._define_config() with config.unlocked: config.env = functools.partial(tools.MockEnvironment, observ_shape=(2, 3), action_shape=(3, ), min_duration=5, max_duration=25) config.max_length = 25 config.steps = 200 config.network = networks.RecurrentGaussianPolicy for score in train.train(config, env_processes=False): float(score)
def test_no_crash_observation_shape(self): nets = networks.ForwardGaussianPolicy, networks.RecurrentGaussianPolicy observ_shapes = (1,), (2, 3), (2, 3, 4) for network, observ_shape in itertools.product(nets, observ_shapes): config = self._define_config() with config.unlocked: config.env = functools.partial( tools.MockEnvironment, observ_shape, action_shape=(3,), min_duration=15, max_duration=15) config.max_length = 20 config.steps = 100 config.network = network for score in train.train(config, env_processes=False): float(score)
def test_no_crash_observation_shape(self): nets = networks.ForwardGaussianPolicy, networks.RecurrentGaussianPolicy observ_shapes = (1, ), (2, 3), (2, 3, 4) for network, observ_shape in itertools.product(nets, observ_shapes): config = self._define_config() with config.unlocked: config.env = functools.partial(tools.MockEnvironment, observ_shape, action_shape=(3, ), min_duration=15, max_duration=15) config.max_length = 20 config.steps = 100 config.network = network for score in train.train(config, env_processes=False): float(score)