Exemplo n.º 1
0
def main():
    # Get command line arguments.
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-n',
        '--num_repeats',
        type=int,
        default=1,
        help='The number of resets to perform.')
    env_id, params, args = parse_env_args(parser)

    # Show INFO-level logs.
    logging.basicConfig(level=logging.INFO)

    # Create the environment and get the robot component.
    robel.set_env_params(env_id, params)
    env = gym.make(env_id).unwrapped
    assert env.robot.is_hardware

    for i in range(args.num_repeats):
        print('Starting reset #{}'.format(i))

        # Disengage all of the motors and let the dkitty fall.
        env.robot.set_motors_engaged(None, engaged=False)

        print('Place the robot to a starting position.')
        input('Press Enter to start the reset...')

        # Start with all motors engaged.
        env.robot.set_motors_engaged(None, engaged=True)
        env.reset()

        print('Done reset! Turning off the robot in a few seconds.')
        time.sleep(2)
Exemplo n.º 2
0
    def _load_env(self,
                  env_name: str,
                  device: Optional[str] = None,
                  env_params: Optional[Dict[str, Any]] = None):
        """Loads the given environment."""
        env_params = env_params or {}
        if device is not None:
            env_params['device_path'] = device
        if env_params:
            robel.set_env_params(env_name, env_params)

        with self._env_lock:
            self._load_env_name = env_name
            self._load_env_params = env_params
Exemplo n.º 3
0
    def _run_env(self):
        """Runs a loop for the current environment."""
        step = 0
        while not self._stop_event.is_set():

            with self._env_lock:
                # Unload the current env and load the new one if given.
                if self._load_env_name is not None:
                    if self._env is not None:
                        self._env.close()
                    robel.set_env_params(self._load_env_name,
                                         self._load_env_params)
                    self._env = gym.make(self._load_env_name).unwrapped
                    self._env.reset()

                    self._load_env_name = None
                    self._load_env_params = None
                if self._env is None:
                    continue

            frame_start_time = time.time()

            with self._env_lock:
                if self._do_reset:
                    self._do_reset = False
                    self._env.reset()
                if self._do_random_actions:
                    self._action = self._env.action_space.sample()
                if self._action is not None:
                    self._env.step(self._action)
                else:
                    self._env.get_obs_dict()

                self._env.render()
                # self._display_image_obs(obs)

            sleep_duration = MIN_FRAME_TIME - (time.time() - frame_start_time)
            if sleep_duration > 0:
                time.sleep(sleep_duration)
            step += 1

        with self._env_lock:
            if self._env is not None:
                self._env.close()
            self._env = None
Exemplo n.º 4
0
def rollout_script(arg_def_fn=None,
                   env_factory=None,
                   policy_factory=None,
                   add_policy_arg: bool = True):
    """Performs a rollout script.

    Args:
        arg_def_fn: A function that takes an ArgumentParser. Use this to add
            arguments to the script.
        env_factory: A function that takes program arguments and returns
            an environment. Otherwise, uses `gym.make`.
        policy_factory: A function that takes program arguments and returns a
            policy function (callable that observations and returns actions)
            and the environment.
        add_policy_arg: If True, adds an argument to take a policy path.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('-o',
                        '--output',
                        help='The directory to save rollout data to.')
    if add_policy_arg:
        parser.add_argument('-p',
                            '--policy',
                            help='The path to the policy file to load.')
    parser.add_argument('-n',
                        '--num_episodes',
                        type=int,
                        default=DEFAULT_EPISODE_COUNT,
                        help='The number of episodes to run.')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='The seed for the environment.')
    parser.add_argument(
        '-r',
        '--render',
        nargs='?',
        const='human',
        default=None,
        help=('The rendering mode. If provided, renders to a window. A render '
              'mode string can be passed here.'))
    # Add additional argparse arguments.
    if arg_def_fn:
        arg_def_fn(parser)
    env_id, params, args = parse_env_args(parser,
                                          default_env_name=DEFAULT_ENV_NAME)

    robel.set_env_params(env_id, params)
    if env_factory:
        env = env_factory(args)
    else:
        env = gym.make(env_id)

    action_fn = None
    if policy_factory:
        action_fn = policy_factory(args)

    if args.seed is not None:
        env.seed(args.seed)

    paths = []
    try:
        episode_num = 0
        for traj in do_rollouts(
                env,
                num_episodes=args.num_episodes,
                action_fn=action_fn,
                render_mode=args.render,
        ):
            print('Episode {}'.format(episode_num))
            print('> Total reward: {}'.format(traj.total_reward))
            if traj.durations:
                print('> Execution times:')
                for key in sorted(traj.durations):
                    print('{}{}: {:.2f}ms'.format(' ' * 4, key,
                                                  traj.durations[key] * 1000))
            episode_num += 1

            if args.output:
                paths.append(
                    dict(
                        actions=traj.actions,
                        observations=traj.observations,
                        rewards=traj.rewards,
                        total_reward=traj.total_reward,
                        infos=traj.infos,
                    ))
    finally:
        env.close()

        if paths and args.output:
            os.makedirs(args.output, exist_ok=True)
            # Serialize the paths.
            save_path = os.path.join(args.output, 'paths.pkl')
            with open(save_path, 'wb') as f:
                pickle.dump(paths, f)

            # Log the paths to a CSV file.
            csv_path = os.path.join(args.output,
                                    '{}-results.csv'.format(env_id))
            with EpisodeLogger(csv_path) as logger:
                for path in paths:
                    logger.log_path(path)