Пример #1
0
def make_environment(suite: str, task: str) -> dm_env.Environment:
    """Makes the requested continuous control environment.

  Args:
    suite: One of 'gym' or 'control'.
    task: Task to load. If `suite` is 'control', the task must be formatted as
      f'{domain_name}:{task_name}'

  Returns:
    An environment satisfying the dm_env interface expected by Acme agents.
  """

    if suite not in _VALID_TASK_SUITES:
        raise ValueError(
            f'Unsupported suite: {suite}. Expected one of {_VALID_TASK_SUITES}'
        )

    if suite == 'gym':
        env = gym.make(task)
        # Make sure the environment obeys the dm_env.Environment interface.
        env = wrappers.GymWrapper(env)

    elif suite == 'control':
        # Load dm_suite lazily not require Mujoco license when not using it.
        from dm_control import suite as dm_suite  # pylint: disable=g-import-not-at-top
        domain_name, task_name = task.split(':')
        env = dm_suite.load(domain_name, task_name)
        env = wrappers.ConcatObservationWrapper(env)

    # Wrap the environment so the expected continuous action spec is [-1, 1].
    # Note: this is a no-op on 'control' tasks.
    env = wrappers.CanonicalSpecWrapper(env, clip=True)
    env = wrappers.SinglePrecisionWrapper(env)
    return env
Пример #2
0
def load_environment(env_name):
  """Outputs a wrapped gym environment."""
  environment = gym.make(env_name)
  environment = TimeLimit(environment, max_episode_steps=1000)
  environment = wrappers.gym_wrapper.GymWrapper(environment)
  environment = wrappers.SinglePrecisionWrapper(environment)
  return environment
def environment(game):
    """Atari environment."""
    env = atari_lib.create_atari_environment(game_name=game,
                                             sticky_actions=True)
    env = AtariDopamineWrapper(env)
    env = wrappers.FrameStackingWrapper(env, num_frames=4)
    return wrappers.SinglePrecisionWrapper(env)
Пример #4
0
def create_environment(task_class,
                       task_name,
                       single_precision=False,
                       **kwargs):
    env = _create_environment(task_class, task_name, **kwargs)
    if single_precision:
        env = wrappers.SinglePrecisionWrapper(env)

    return env
Пример #5
0
def make_bsuite_environment(bsuite_id: str = 'deep_sea/0',
                            results_dir: str = '/tmp/bsuite',
                            overwrite: bool = False) -> dm_env.Environment:
    raw_environment = bsuite.load_and_record_to_csv(
        bsuite_id=bsuite_id,
        results_dir=results_dir,
        overwrite=overwrite,
    )
    return wrappers.SinglePrecisionWrapper(raw_environment)
Пример #6
0
def run_dqn(experiment_name):
    current_dir = pathlib.Path().absolute()
    directories = Save_paths(data_dir=f'{current_dir}/data', experiment_name=experiment_name)

    game = Winter_is_coming(setup=PARAMS['setup'])
    environment = wrappers.SinglePrecisionWrapper(game)
    spec = specs.make_environment_spec(environment)

    # Build the network.
    def _make_network(spec) -> snt.Module:
        network = snt.Sequential([
            snt.Flatten(),
            snt.nets.MLP([50, 50, spec.actions.num_values]),
        ])
        tf2_utils.create_variables(network, [spec.observations])
        return network

    network = _make_network(spec)

    # Setup the logger
    if neptune_enabled:
        agent_logger = NeptuneLogger(label='DQN agent', time_delta=0.1)
        loop_logger = NeptuneLogger(label='Environment loop', time_delta=0.1)
        PARAMS['network'] = f'{network}'
        neptune.init('cvasquez/sandbox')
        neptune.create_experiment(name=experiment_name, params=PARAMS)
    else:
        agent_logger = loggers.TerminalLogger('DQN agent', time_delta=1.)
        loop_logger = loggers.TerminalLogger('Environment loop', time_delta=1.)

    # Build the agent
    agent = DQN(
        environment_spec=spec,
        network=network,
        params=PARAMS,
        checkpoint=True,
        paths=directories,
        logger=agent_logger
    )
    # Try running the environment loop. We have no assertions here because all
    # we care about is that the agent runs without raising any errors.
    loop = acme.EnvironmentLoop(environment, agent, logger=loop_logger)
    loop.run(num_episodes=PARAMS['num_episodes'])

    last_checkpoint_path = agent.save()

    # Upload last checkpoint
    if neptune_upload_checkpoint and last_checkpoint_path:
        files = os.listdir(last_checkpoint_path)
        for f in files:
            neptune.log_artifact(os.path.join(last_checkpoint_path, f))

    if neptune_enabled:
        neptune.stop()

    do_example_run(game,agent)
Пример #7
0
 def _wrap_training(self, env: gym.Env):
     env = FilterObservation(env, filter_keys=['lidar'])
     env = Flatten(env, flatten_obs=True, flatten_actions=True)
     env = NormalizeObservations(env)
     env = FixedResetMode(env, mode='random')
     env = TimeLimit(env, max_episode_steps=self._env_config.training_time_limit)
     env = ActionRepeat(env, n=self._env_config.action_repeat)
     env = GymWrapper(environment=env)
     env = wrappers.SinglePrecisionWrapper(env)
     return env
Пример #8
0
def make_environment(evaluation: bool = False,
                     domain_name: str = 'cartpole',
                     task_name: str = 'balance') -> dm_env.Environment:
    """Implements a control suite environment factory."""
    # Nothing special to be done for evaluation environment.
    del evaluation

    environment = suite.load(domain_name, task_name)
    environment = wrappers.SinglePrecisionWrapper(environment)

    return environment
def environment(combined_challenge,
                domain,
                task,
                log_output=None,
                environment_kwargs=None):
    """RWRL environment."""
    env = rwrl_envs.load(domain_name=domain,
                         task_name=task,
                         log_output=log_output,
                         environment_kwargs=environment_kwargs,
                         combined_challenge=combined_challenge)
    return wrappers.SinglePrecisionWrapper(env)
Пример #10
0
 def _wrap_test(self, env: gym.Env):
     env = FilterObservation(env, filter_keys=['lidar'])
     env = Flatten(env, flatten_obs=False, flatten_actions=True)
     env = NormalizeObservations(env)
     env = InfoToObservation(env)
     env = FixedResetMode(env, mode='grid')
     env = TimeLimit(env, max_episode_steps=self._env_config.eval_time_limit)
     gym_env = ActionRepeat(env, n=self._env_config.action_repeat)
     env = GymWrapper(environment=gym_env)
     env = wrappers.SinglePrecisionWrapper(env)
     env.gym_env = gym_env
     return env
Пример #11
0
    def environment(self):
        """Build and return the environment."""
        if self._environment is not None:
            return self._environment

        self._environment = suite.load(domain_name=self._domain_name,
                                       task_name=self._task_name)

        self._environment = wrappers.SinglePrecisionWrapper(self._environment)
        self._environment = NormilizeActionSpecWrapper(self._environment)

        return self._environment
Пример #12
0
def make_gym_environment(
        task_name: str = 'MountainCarContinuous-v0') -> dm_env.Environment:
    """Creates an OpenAI Gym environment."""

    # Load the gym environment.
    environment = gym.make(task_name)

    # Make sure the environment obeys the dm_env.Environment interface.
    environment = wrappers.GymWrapper(environment)
    environment = wrappers.SinglePrecisionWrapper(environment)

    return environment
Пример #13
0
def make_environment(
    task: str = 'MountainCarContinuous-v0') -> dm_env.Environment:
  """Creates an OpenAI Gym environment."""

  # Load the gym environment.
  environment = gym.make(task)

  # Make sure the environment obeys the dm_env.Environment interface.
  environment = wrappers.GymWrapper(environment)
  # Clip the action returned by the agent to the environment spec.
  environment = wrappers.CanonicalSpecWrapper(environment, clip=True)
  environment = wrappers.SinglePrecisionWrapper(environment)

  return environment
Пример #14
0
def make_environment(domain_name: str = 'cartpole',
                     task_name: str = 'balance') -> dm_env.Environment:
    """Creates a RWRL suite environment."""
    environment = rwrl.load(domain_name=domain_name,
                            task_name=task_name,
                            safety_spec=dict(enable=True),
                            delay_spec=dict(enable=True, actions=20),
                            log_output=os.path.join(FLAGS.save_path,
                                                    'log.npz'),
                            environment_kwargs=dict(log_safety_vars=True,
                                                    log_every=2,
                                                    flat_observation=True))
    environment = wrappers.SinglePrecisionWrapper(environment)
    return environment
Пример #15
0
def make_environment(evaluation: bool = False,
                     task: str = 'HalfCheetah-v3') -> dm_env.Environment:
    """Creates an OpenAI Gym environment."""
    del evaluation

    # Load the gym environment.
    environment = gym.make(task)
    # Make sure the environment obeys the dm_env.Environment interface.
    environment = wrappers.GymWrapper(environment)
    # Clip the action returned by the agent to the environment spec.
    environment = wrappers.CanonicalSpecWrapper(environment, clip=True)
    environment = wrappers.SinglePrecisionWrapper(environment)

    return environment
Пример #16
0
def environment(
        combined_challenge: str,
        domain: str,
        task: str,
        log_output: Optional[str] = None,
        environment_kwargs: Optional[Dict[str,
                                          Any]] = None) -> dm_env.Environment:
    """RWRL environment."""
    env = rwrl_envs.load(domain_name=domain,
                         task_name=task,
                         log_output=log_output,
                         environment_kwargs=environment_kwargs,
                         combined_challenge=combined_challenge)
    return wrappers.SinglePrecisionWrapper(env)
Пример #17
0
def make_env_and_model() -> Tuple[dm_env.Environment, models.Model]:
  """Create environment and corresponding model (learned or simulator)."""
  environment = bsuite.load('catch', kwargs={})
  if FLAGS.simulator:
    model = simulator.Simulator(environment)  # pytype: disable=attribute-error
  else:
    model = mlp.MLPModel(
        specs.make_environment_spec(environment),
        replay_capacity=1000,
        batch_size=16,
        hidden_sizes=(50,),
    )
  environment = wrappers.SinglePrecisionWrapper(environment)

  return environment, model
Пример #18
0
def make_environment(evaluation: bool = False,
                     domain_name: str = 'cartpole',
                     task_name: str = 'balance',
                     concatenate_observations: bool = False
                     ) -> dm_env.Environment:
  """Implements a control suite environment factory."""
  # Nothing special to be done for evaluation environment.
  del evaluation

  environment = suite.load(domain_name, task_name)
  environment = wrappers.SinglePrecisionWrapper(environment)
  timestep = environment.reset()
  obs_names = list(timestep.observation.keys())
  if concatenate_observations:
    environment = wrappers.ConcatObservationWrapper(environment, obs_names)
  return environment
Пример #19
0
def main(_):
    # Create an environment and grab the spec.
    environment = bsuite.load_from_id('catch/0')
    environment = wrappers.SinglePrecisionWrapper(environment)
    environment_spec = specs.make_environment_spec(environment)

    network = snt.Sequential([
        snt.Flatten(),
        snt.nets.MLP([50, 50, environment_spec.actions.num_values])
    ])

    # Construct the agent.
    agent = dqn.DQN(environment_spec=environment_spec, network=network)

    # Run the environment loop.
    loop = acme.EnvironmentLoop(environment, agent)
    loop.run(num_episodes=environment.bsuite_num_episodes)  # pytype: disable=attribute-error
Пример #20
0
def main(_):
  # Create an environment and grab the spec.
  raw_environment = bsuite.load_from_id(FLAGS.bsuite_id)
  environment = wrappers.SinglePrecisionWrapper(raw_environment)
  environment_spec = specs.make_environment_spec(environment)

  # Construct the agent.
  agent = dqfd.DQfD(
      environment_spec=environment_spec,
      network=make_network(environment_spec.actions),
      demonstration_dataset=bsuite_demonstrations.make_dataset(raw_environment),
      demonstration_ratio=FLAGS.demonstration_ratio,
      samples_per_insert=FLAGS.samples_per_insert,
      learning_rate=FLAGS.learning_rate)

  # Run the environment loop.
  loop = acme.EnvironmentLoop(environment, agent)
  loop.run(num_episodes=environment.bsuite_num_episodes)  # pytype: disable=attribute-error
Пример #21
0
def make_env_and_model(
        bsuite_id: str, results_dir: str,
        overwrite: bool) -> Tuple[dm_env.Environment, models.Model]:
    """Create environment and corresponding model (learned or simulator)."""
    raw_env = bsuite.load_from_id(bsuite_id)
    if FLAGS.simulator:
        model = simulator.Simulator(raw_env)  # pytype: disable=attribute-error
    else:
        model = mlp.MLPModel(
            specs.make_environment_spec(raw_env),
            replay_capacity=1000,
            batch_size=16,
            hidden_sizes=(50, ),
        )
    environment = csv_logging.wrap_environment(raw_env, bsuite_id, results_dir,
                                               overwrite)
    environment = wrappers.SinglePrecisionWrapper(environment)

    return environment, model
Пример #22
0
  def test_discrete(self):
    env = wrappers.SinglePrecisionWrapper(
        fakes.DiscreteEnvironment(
            action_dtype=np.int64, obs_dtype=np.int64, reward_dtype=np.float64))

    self.assertTrue(np.issubdtype(env.observation_spec().dtype, np.int32))
    self.assertTrue(np.issubdtype(env.action_spec().dtype, np.int32))
    self.assertTrue(np.issubdtype(env.reward_spec().dtype, np.float32))
    self.assertTrue(np.issubdtype(env.discount_spec().dtype, np.float32))

    timestep = env.reset()
    self.assertEqual(timestep.reward, None)
    self.assertEqual(timestep.discount, None)
    self.assertTrue(np.issubdtype(timestep.observation.dtype, np.int32))

    timestep = env.step(0)
    self.assertTrue(np.issubdtype(timestep.reward.dtype, np.float32))
    self.assertTrue(np.issubdtype(timestep.discount.dtype, np.float32))
    self.assertTrue(np.issubdtype(timestep.observation.dtype, np.int32))
Пример #23
0
def make_environment(task, evaluation = False):
  """Creates an OpenAI Gym environment."""

  # Load the gym environment.
  environment = gym.make(task)

  environment = env_wrappers.AdroitSparseRewardWrapper(environment)

  # Make sure the environment obeys the dm_env.Environment interface.
  environment = wrappers.GymWrapper(environment)
  # Clip the action returned by the agent to the environment spec.
  environment = wrappers.CanonicalSpecWrapper(environment, clip=True)
  environment = wrappers.SinglePrecisionWrapper(environment)

  if evaluation:
    environment = env_wrappers.SuccessRewardWrapper(environment,
                                                    success_threshold=1.)

  return environment
    def test_loop_run(self):
        raw_env = rl_environment.Environment('tic_tac_toe')
        env = open_spiel_wrapper.OpenSpielWrapper(raw_env)
        env = wrappers.SinglePrecisionWrapper(env)
        environment_spec = acme.make_environment_spec(env)

        actors = []
        for _ in range(env.num_players):
            actors.append(RandomActor(environment_spec))

        loop = open_spiel_environment_loop.OpenSpielEnvironmentLoop(
            env, actors)
        result = loop.run_episode()
        self.assertIn('episode_length', result)
        self.assertIn('episode_return', result)
        self.assertIn('steps_per_second', result)

        loop.run(num_episodes=10)
        loop.run(num_steps=100)
Пример #25
0
def make_single_agent_env(scenario: str, render=False):
    scenario = SingleAgentScenario.from_spec(scenario, rendering=render)
    env = VectorizedSingleAgentRaceEnv(scenarios=[scenario])
    env = wrap_env(env=env, wrapper_configs='single_agent_wrappers.yml')
    env = wrappers.GymWrapper(environment=env)
    env = wrappers.SinglePrecisionWrapper(env)
    return env

# def make_multi_agent_env(scenario: str, render=False, test=False):
#     scenario = MultiAgentScenario.from_spec(scenario, rendering=render)
#     env = VectorizedMultiAgentRaceEnv(scenarios=[scenario])
#     if test:
#         env = wrap_env(env=env, wrapper_configs='multi_agent_test_wrappers.yml')
#     else:
#         env = wrap_env(env=env, wrapper_configs='multi_agent_wrappers.yml')
#
#     env = MultiAgentGymWrapper(environment=env)
#     env = wrappers.SinglePrecisionWrapper(env)
#     return env
Пример #26
0
  def test_continuous(self):
    env = wrappers.SinglePrecisionWrapper(
        fakes.ContinuousEnvironment(
            action_dim=0, dtype=np.float64, reward_dtype=np.float64))

    self.assertTrue(np.issubdtype(env.observation_spec().dtype, np.float32))
    self.assertTrue(np.issubdtype(env.action_spec().dtype, np.float32))
    self.assertTrue(np.issubdtype(env.reward_spec().dtype, np.float32))
    self.assertTrue(np.issubdtype(env.discount_spec().dtype, np.float32))

    timestep = env.reset()
    self.assertIsNone(timestep.reward)
    self.assertIsNone(timestep.discount)
    self.assertTrue(np.issubdtype(timestep.observation.dtype, np.float32))

    timestep = env.step(0.0)
    self.assertTrue(np.issubdtype(timestep.reward.dtype, np.float32))
    self.assertTrue(np.issubdtype(timestep.discount.dtype, np.float32))
    self.assertTrue(np.issubdtype(timestep.observation.dtype, np.float32))
Пример #27
0
    def environment(self):
        """Build and return the environment."""

        if self._task_name == 'humanoid_corridor':
            self._environment = _build_humanoid_corridor_env()
        elif self._task_name == 'humanoid_gaps':
            self._environment = _build_humanoid_corridor_gaps()
        elif self._task_name == 'humanoid_walls':
            self._environment = _build_humanoid_walls_env()

        self._environment = NormilizeActionSpecWrapper(self._environment)
        self._environment = MujocoActionNormalizer(
            environment=self._environment, rescale='clip')
        self._environment = wrappers.SinglePrecisionWrapper(self._environment)

        all_observations = list(self._proprio_keys) + list(self._pixel_keys)
        self._environment = FilterObservationsWrapper(self._environment,
                                                      all_observations)

        return self._environment
Пример #28
0
def main(_):
    # Create an environment and grab the spec.
    raw_environment = bsuite.load_and_record_to_csv(
        bsuite_id=FLAGS.bsuite_id,
        results_dir=FLAGS.results_dir,
        overwrite=FLAGS.overwrite,
    )
    environment = wrappers.SinglePrecisionWrapper(raw_environment)
    environment_spec = specs.make_environment_spec(environment)

    network = snt.Sequential([
        snt.Flatten(),
        snt.nets.MLP([50, 50, environment_spec.actions.num_values])
    ])

    # Construct the agent.
    agent = dqn.DQN(environment_spec=environment_spec, network=network)

    # Run the environment loop.
    loop = acme.EnvironmentLoop(environment, agent)
    loop.run(num_episodes=environment.bsuite_num_episodes)  # pytype: disable=attribute-error
Пример #29
0
    def environment(self):
        """Return environment."""
        if self._task_name == 'rodent_escape':
            self._environment = _build_rodent_escape_env()
        elif self._task_name == 'rodent_gaps':
            self._environment = _build_rodent_corridor_gaps()
        elif self._task_name == 'rodent_two_touch':
            self._environment = _build_rodent_two_touch_env()
        elif self._task_name == 'rodent_mazes':
            self._environment = _build_rodent_maze_env()

        self._environment = NormilizeActionSpecWrapper(self._environment)
        self._environment = MujocoActionNormalizer(
            environment=self._environment, rescale='clip')
        self._environment = wrappers.SinglePrecisionWrapper(self._environment)

        all_observations = list(self._proprio_keys) + list(self._pixel_keys)
        self._environment = FilterObservationsWrapper(self._environment,
                                                      all_observations)

        return self._environment
Пример #30
0
def main(_):
    # Create an environment and grab the spec.
    env_configs = {'players': FLAGS.num_players} if FLAGS.num_players else {}
    raw_environment = rl_environment.Environment(FLAGS.game, **env_configs)

    environment = open_spiel_wrapper.OpenSpielWrapper(raw_environment)
    environment = wrappers.SinglePrecisionWrapper(
        environment)  # type: open_spiel_wrapper.OpenSpielWrapper
    environment_spec = acme.make_environment_spec(environment)

    # Build the networks.
    networks = []
    policy_networks = []
    for _ in range(environment.num_players):
        network = legal_actions.MaskedSequential([
            snt.Flatten(),
            snt.nets.MLP([50, 50, environment_spec.actions.num_values])
        ])
        policy_network = snt.Sequential([
            network,
            legal_actions.EpsilonGreedy(epsilon=0.1, threshold=-1e8)
        ])
        networks.append(network)
        policy_networks.append(policy_network)

    # Construct the agents.
    agents = []

    for network, policy_network in zip(networks, policy_networks):
        agents.append(
            dqn.DQN(environment_spec=environment_spec,
                    network=network,
                    policy_network=policy_network))

    # Run the environment loop.
    loop = open_spiel_environment_loop.OpenSpielEnvironmentLoop(
        environment, agents)
    loop.run(num_episodes=100000)