Exemplo n.º 1
0
def compute_path_info(infos):
    all_env_infos = [ppp.list_of_dicts__to__dict_of_lists(ep_info) for ep_info in infos]
    statistics = OrderedDict()
    stat_prefix = ""
    for k in all_env_infos[0].keys():
        final_ks = np.array([info[k][-1] for info in all_env_infos])
        first_ks = np.array([info[k][0] for info in all_env_infos])
        all_ks = np.concatenate([info[k] for info in all_env_infos])
        statistics.update(
            create_stats_ordered_dict(
                stat_prefix + k,
                final_ks,
                stat_prefix="{}/final/".format("env_infos"),
            )
        )
        statistics.update(
            create_stats_ordered_dict(
                stat_prefix + k,
                first_ks,
                stat_prefix="{}/initial/".format("env_infos"),
            )
        )
        statistics.update(
            create_stats_ordered_dict(
                stat_prefix + k,
                all_ks,
                stat_prefix="{}/".format("env_infos"),
            )
        )
    return statistics
Exemplo n.º 2
0
    def process_samples(self, paths):
        r"""Process sample data based on the collected paths.

        Notes: P is the maximum path length (self.max_path_length)

        Args:
            paths (list[dict]): A list of collected paths

        Returns:
            torch.Tensor: The observations of the environment
                with shape :math:`(N, P, O*)`.
            torch.Tensor: The actions fed to the environment
                with shape :math:`(N, P, A*)`.
            torch.Tensor: The acquired rewards with shape :math:`(N, P)`.
            list[int]: Numbers of valid steps in each paths.
            torch.Tensor: Value function estimation at each step
                with shape :math:`(N, P)`.

        """
        if self.exploration_bonus > 0.:
            paths = self._add_exploration_bonus(paths)
        valids = torch.Tensor([len(path['actions'])
                               for path in paths]).int().to(ptu.device)
        obs = torch.stack([
            pad_to_last(path['observations'],
                        total_length=self.max_path_length,
                        axis=0) for path in paths
        ]).to(ptu.device)

        actions = torch.stack([
            pad_to_last(path['actions'],
                        total_length=self.max_path_length,
                        axis=0) for path in paths
        ]).to(ptu.device)

        rewards = torch.stack([
            pad_to_last(path['rewards'].reshape(-1),
                        total_length=self.max_path_length) for path in paths
        ]).to(ptu.device)

        returns = torch.stack([
            pad_to_last(tu.discount_cumsum(path['rewards'].reshape(-1),
                                           self.discount).copy(),
                        total_length=self.max_path_length) for path in paths
        ]).to(ptu.device)
        # batch x label_num x label_dim
        env_infos = [
            ppp.list_of_dicts__to__dict_of_lists(p['env_infos']) for p in paths
        ]
        labels = torch.stack([
            pad_to_last(env_info['sup_labels'],
                        total_length=self.max_path_length,
                        axis=0) for env_info in env_infos
        ]).to(ptu.device)
        with torch.no_grad():
            baselines = self._value_function(obs).squeeze(-1)

        return obs, actions, rewards, returns, valids, baselines, labels
Exemplo n.º 3
0
def get_generic_path_information(paths, stat_prefix=''):
    """
    Get an OrderedDict with a bunch of statistic names and values.
    """
    statistics = OrderedDict()
    returns = [sum(path["rewards"]) for path in paths]
    rewards = np.vstack([path["rewards"] for path in paths])
    statistics.update(
        create_stats_ordered_dict('Rewards', rewards, stat_prefix=stat_prefix))
    statistics.update(
        create_stats_ordered_dict('Returns', returns, stat_prefix=stat_prefix))
    actions = [path["actions"] for path in paths]
    if len(actions[0].shape) == 1:
        actions = np.hstack([path["actions"] for path in paths])
    else:
        actions = np.vstack([path["actions"] for path in paths])
    statistics.update(
        create_stats_ordered_dict('Actions', actions, stat_prefix=stat_prefix))
    statistics['Num Paths'] = len(paths)
    statistics[stat_prefix + 'Average Returns'] = get_average_returns(paths)
    avg_time, num_solves, avg_solve_time = get_average_time_and_num_solve(
        paths)
    statistics[stat_prefix + 'Average Time'] = avg_time
    statistics[stat_prefix + 'Average Solve Time'] = avg_solve_time
    statistics[stat_prefix + 'Number of Solves'] = num_solves

    for info_key in ['env_infos', 'agent_infos']:
        if info_key in paths[0]:
            all_env_infos = [
                ppp.list_of_dicts__to__dict_of_lists(path[info_key])
                for path in paths
            ]
            for k in all_env_infos[0].keys():
                final_ks = np.array([info[k][-1] for info in all_env_infos])
                first_ks = np.array([info[k][0] for info in all_env_infos])
                all_ks = np.concatenate([info[k] for info in all_env_infos])
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        final_ks,
                        stat_prefix='{}/final/'.format(info_key),
                    ))
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        first_ks,
                        stat_prefix='{}/initial/'.format(info_key),
                    ))
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        all_ks,
                        stat_prefix='{}/'.format(info_key),
                    ))

    return statistics
Exemplo n.º 4
0
def get_generic_path_information(paths, stat_prefix=""):
    """
    Get an OrderedDict with a bunch of statistic names and values.
    """
    statistics = OrderedDict()
    returns = [sum(path["rewards"]) for path in paths]

    rewards = np.vstack([path["rewards"] for path in paths])
    statistics.update(
        create_stats_ordered_dict("Rewards", rewards, stat_prefix=stat_prefix))
    statistics.update(
        create_stats_ordered_dict("Returns", returns, stat_prefix=stat_prefix))
    actions = [path["actions"] for path in paths]
    if len(actions[0].shape) == 1:
        actions = np.hstack([path["actions"] for path in paths])
    else:
        actions = np.vstack([path["actions"] for path in paths])
    statistics.update(
        create_stats_ordered_dict("Actions", actions, stat_prefix=stat_prefix))
    statistics["Num Paths"] = len(paths)
    statistics["Proportion exploration"] = sum([
        sum(path["explored"]) for path in paths
    ])[0] / sum([len(path["explored"]) for path in paths])
    statistics[stat_prefix + "Average Returns"] = get_average_returns(paths)

    for info_key in ["agent_infos"]:
        if info_key in paths[0]:
            all_env_infos = [
                ppp.list_of_dicts__to__dict_of_lists(p[info_key])
                for p in paths
            ]
            for k in all_env_infos[0].keys():
                final_ks = np.array([info[k][-1] for info in all_env_infos])
                first_ks = np.array([info[k][0] for info in all_env_infos])
                all_ks = np.concatenate([info[k] for info in all_env_infos])
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        final_ks,
                        stat_prefix="{}/final/".format(info_key),
                    ))
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        first_ks,
                        stat_prefix="{}/initial/".format(info_key),
                    ))
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        all_ks,
                        stat_prefix="{}/".format(info_key)))

    return statistics
Exemplo n.º 5
0
def get_traffic_path_information(paths, stat_prefix=''):
    """
    Get an OrderedDict with a bunch of statistic names and values.
    """
    statistics = OrderedDict()
    returns = [sum(path["rewards"]) for path in paths]

    rewards = np.vstack([path["rewards"] for path in paths])
    statistics.update(
        create_stats_ordered_dict('Rewards', rewards, stat_prefix=stat_prefix))
    statistics.update(
        create_stats_ordered_dict('Returns', returns, stat_prefix=stat_prefix))
    actions = [path["actions"] for path in paths]
    if len(actions[0].shape) == 1:
        actions = np.hstack([path["actions"] for path in paths])
    else:
        actions = np.vstack([path["actions"] for path in paths])
    statistics.update(
        create_stats_ordered_dict('Actions', actions, stat_prefix=stat_prefix))
    statistics['Num Paths'] = len(paths)
    statistics[stat_prefix + 'Average Returns'] = get_average_returns(paths)

    num_collision, num_block, num_outroad, num_success, num_timeout = 0, 0, 0, 0, 0
    log_path = logger.get_snapshot_dir()
    for pid, path in enumerate(paths):
        event = path["env_infos"][-1]['event']
        if event == 'collision':
            num_collision += 1
        elif event == 'block':
            num_block += 1
        elif event == 'outroad':
            num_outroad += 1
        elif event == 'goal':
            num_success += 1
        else:
            num_timeout += 1
    statistics['Num Collision'] = num_collision
    statistics['Num Block'] = num_block
    statistics['Num Outroad'] = num_outroad
    statistics['Num Success'] = num_success
    statistics['Num Timeout'] = num_timeout

    for info_key in ['agent_infos']:
        if info_key in paths[0]:
            all_env_infos = [
                ppp.list_of_dicts__to__dict_of_lists(p[info_key])
                for p in paths
            ]
            for k in all_env_infos[0].keys():
                final_ks = np.array([info[k][-1] for info in all_env_infos])
                first_ks = np.array([info[k][0] for info in all_env_infos])
                all_ks = np.concatenate([info[k] for info in all_env_infos])
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        final_ks,
                        stat_prefix='{}/final/'.format(info_key),
                    ))
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        first_ks,
                        stat_prefix='{}/initial/'.format(info_key),
                    ))
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        all_ks,
                        stat_prefix='{}/'.format(info_key),
                    ))

    return statistics
Exemplo n.º 6
0
def get_ma_path_information(paths, stat_prefix=''):
    """
    Get an OrderedDict with a bunch of statistic names and values.
    """
    num_agent = paths[0]["rewards"].shape[1]
    statistics = OrderedDict()
    for agent in range(num_agent):
        returns = [sum(path["rewards"][:, agent, :]) for path in paths]

        rewards = np.vstack([path["rewards"][:, agent, :] for path in paths])
        statistics.update(
            create_stats_ordered_dict('Rewards {}'.format(agent),
                                      rewards,
                                      stat_prefix=stat_prefix))
        statistics.update(
            create_stats_ordered_dict('Returns {}'.format(agent),
                                      returns,
                                      stat_prefix=stat_prefix))
        actions = [path["actions"][:, agent, :] for path in paths]
        actions = np.vstack(actions)
        statistics.update(
            create_stats_ordered_dict('Actions {}'.format(agent),
                                      actions,
                                      stat_prefix=stat_prefix))
        statistics[
            stat_prefix +
            'Average Returns {}'.format(agent)] = get_ma_average_returns(
                paths, agent)

    statistics['Num Paths'] = len(paths)
    num_success = 0
    for path in paths:
        if path['terminals'][-1].all():
            num_success += 1
    statistics['Success Rate'] = float(num_success) / float(len(paths))

    for info_key in ['env_infos', 'agent_infos']:
        if info_key in paths[0]:
            all_env_infos = [
                ppp.list_of_dicts__to__dict_of_lists(p[info_key])
                for p in paths
            ]
            for k in all_env_infos[0].keys():
                final_ks = np.array([info[k][-1] for info in all_env_infos])
                first_ks = np.array([info[k][0] for info in all_env_infos])
                all_ks = np.concatenate([info[k] for info in all_env_infos])
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        final_ks,
                        stat_prefix='{}/final/'.format(info_key),
                    ))
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        first_ks,
                        stat_prefix='{}/initial/'.format(info_key),
                    ))
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        all_ks,
                        stat_prefix='{}/'.format(info_key),
                    ))

    return statistics
Exemplo n.º 7
0
def get_custom_generic_path_information(paths,
                                        path_length,
                                        reward_scale,
                                        stat_prefix=''):
    """
    Get an OrderedDict with a bunch of statistic names and values.

    Differs from normal rlkit utility function in the following ways:
    Grabs normalized reward / return values where reward is normalized to 1.0
    Grabs cumulative reward specified accumulated at @path_length timestep
    """
    statistics = OrderedDict()
    returns = [sum(path["rewards"]) for path in paths]

    # Grab returns accumulated up to specified timestep
    expl_returns = [sum(path["rewards"][:path_length]) for path in paths]

    rewards = np.vstack([path["rewards"] for path in paths])
    # norm_rewards = [path["rewards"] / reward_scale for path in paths]
    statistics.update(
        eval_util.create_stats_ordered_dict('Rewards',
                                            rewards,
                                            stat_prefix=stat_prefix))
    statistics.update(
        eval_util.create_stats_ordered_dict('Returns',
                                            returns,
                                            stat_prefix=stat_prefix))

    # Add extra stats
    statistics.update(
        eval_util.create_stats_ordered_dict('ExplReturns',
                                            expl_returns,
                                            stat_prefix=stat_prefix))

    actions = [path["actions"] for path in paths]
    if len(actions[0].shape) == 1:
        actions = np.hstack([path["actions"] for path in paths])
    else:
        actions = np.vstack([path["actions"] for path in paths])
    statistics.update(
        eval_util.create_stats_ordered_dict('Actions',
                                            actions,
                                            stat_prefix=stat_prefix))
    statistics['Num Paths'] = len(paths)
    statistics[stat_prefix +
               'Average Returns'] = eval_util.get_average_returns(paths)

    for info_key in ['env_infos', 'agent_infos']:
        if info_key in paths[0]:
            all_env_infos = [
                ppp.list_of_dicts__to__dict_of_lists(p[info_key])
                for p in paths
            ]
            for k in all_env_infos[0].keys():
                final_ks = np.array([info[k][-1] for info in all_env_infos])
                first_ks = np.array([info[k][0] for info in all_env_infos])
                all_ks = np.concatenate([info[k] for info in all_env_infos])
                statistics.update(
                    eval_util.create_stats_ordered_dict(
                        stat_prefix + k,
                        final_ks,
                        stat_prefix='{}/final/'.format(info_key),
                    ))
                statistics.update(
                    eval_util.create_stats_ordered_dict(
                        stat_prefix + k,
                        first_ks,
                        stat_prefix='{}/initial/'.format(info_key),
                    ))
                statistics.update(
                    eval_util.create_stats_ordered_dict(
                        stat_prefix + k,
                        all_ks,
                        stat_prefix='{}/'.format(info_key),
                    ))

    return statistics
Exemplo n.º 8
0
def get_generic_path_information(paths, stat_prefix=''):
    """
    Get an OrderedDict with a bunch of statistic names and values.
    """
    statistics = OrderedDict()
    returns = [sum(path["rewards"]) for path in paths]

    rewards = np.vstack([path["rewards"] for path in paths])
    statistics.update(
        create_stats_ordered_dict('Rewards', rewards, stat_prefix=stat_prefix))
    statistics.update(
        create_stats_ordered_dict('Returns', returns, stat_prefix=stat_prefix))
    actions = [path["actions"] for path in paths]
    if len(actions[0].shape) == 1:
        actions = np.hstack([path["actions"] for path in paths])
    else:
        actions = np.vstack([path["actions"] for path in paths])
    statistics.update(
        create_stats_ordered_dict('Actions', actions, stat_prefix=stat_prefix))
    statistics['Num Paths'] = len(paths)
    statistics[stat_prefix + 'Average Returns'] = get_average_returns(paths)

    # rlkit path info
    for info_key in ['env_infos', 'agent_infos']:
        if info_key in paths[0]:
            all_env_infos = [
                ppp.list_of_dicts__to__dict_of_lists(p[info_key])
                for p in paths
            ]
            for k in all_env_infos[0].keys():
                # final_ks = np.array([info[k][-1] for info in all_env_infos])
                # first_ks = np.array([info[k][0] for info in all_env_infos])
                # all_ks = np.concatenate([info[k] for info in all_env_infos])

                # statistics.update(create_stats_ordered_dict(
                #     stat_prefix + k,
                #     final_ks,
                #     stat_prefix='{}/final/'.format(info_key),
                # ))
                # statistics.update(create_stats_ordered_dict(
                #     stat_prefix + k,
                #     first_ks,
                #     stat_prefix='{}/initial/'.format(info_key),
                # ))
                # statistics.update(create_stats_ordered_dict(
                #     stat_prefix + k,
                #     all_ks,
                #     stat_prefix='{}/'.format(info_key),
                # ))
                sum_ks = [np.sum(info[k]) for info in all_env_infos]
                average_ks = [np.mean(info[k]) for info in all_env_infos]
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        sum_ks,
                        stat_prefix='{}/sum/'.format(info_key),
                    ))
                statistics.update(
                    create_stats_ordered_dict(
                        stat_prefix + k,
                        average_ks,
                        stat_prefix='{}/average/'.format(info_key),
                    ))

    return statistics