Exemplo n.º 1
0
def eval_policy(dataset, save_file_name):
    b_size = dataset.batch_size
    d_size = len(dataset)

    obs_all = []
    goals_all = []
    output_actions = []
    iters = math.ceil(d_size / b_size)
    for b in range(iters):
        logger.debug("[%d/%d]: Eval policy" % (b, iters))
        idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size))
        if args.random_goals:
            inputs, outputs = dataset.get_batch(indices=idxs,
                                                torch_device=model.device,
                                                get_horizon_goals=False)
            # this is to account for broadcasting to H+1 goals
            goals = env_spec.get_uniform(
                env_spec.goal_names, b_size,
                torch_device=model.device).unsqueeze(1)
        else:
            inputs, outputs, goals = dataset.get_batch(
                indices=idxs,
                torch_device=model.device,
                get_horizon_goals=True)

        # get obs batch
        obs = AttrDict()
        for name in env_spec.observation_names:
            obs[name] = inputs[name]

        act = policy.get_action(model, obs, goals, batch=True)

        goals_all.append(goals.leaf_apply(lambda v: to_numpy(v)))
        obs_all.append(obs.leaf_apply(lambda v: to_numpy(v)))
        output_actions.append(act.leaf_apply(lambda v: to_numpy(v)))

    # one big dictionary
    combined_obs = AttrDict.leaf_combine_and_apply(
        obs_all, lambda vs: np.concatenate(vs, axis=0))
    combined_goals = AttrDict.leaf_combine_and_apply(
        goals_all, lambda vs: np.concatenate(vs, axis=0))
    combined_output_actions = AttrDict.leaf_combine_and_apply(
        output_actions, lambda vs: np.concatenate(vs, axis=0))

    combined_obs.combine(combined_goals)
    combined_obs.combine(combined_output_actions)

    logger.debug("Saving Action Sequences")
    savemat(save_file_name, combined_obs)
def eval_model(dataset, save_file_name):
    b_size = dataset.batch_size
    d_size = len(dataset)

    pred_trajectories = []
    action_sequences = []
    true_trajectories = []
    costs = []

    iters = math.ceil(d_size / b_size)
    for b in range(iters):
        logger.debug("[%d/%d]: Eval model" % (b, iters))
        idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size))
        inputs, outputs, goals = dataset.get_batch(indices=idxs,
                                                   torch_device=model.device,
                                                   get_horizon_goals=True,
                                                   get_action_seq=True)

        # get obs batch
        obs = AttrDict()
        for name in env_spec.observation_names:
            obs[name] = inputs[name]

        act_seq = AttrDict()
        act_seq['act'] = inputs['act_seq']

        model.eval()
        all_obs, all_mouts = rollout(env_spec, model, obs, act_seq,
                                     policy._advance_obs_fn)

        # first unsqueezes and then concats
        all_obs = AttrDict.leaf_combine_and_apply(
            all_obs,
            func=lambda vs: torch.cat(vs, dim=1),
            map_func=lambda arr: arr.unsqueeze(1))
        all_mouts = AttrDict.leaf_combine_and_apply(
            all_mouts,
            func=lambda vs: torch.cat(vs, dim=1),
            map_func=lambda arr: arr.unsqueeze(1))

        cost_dict = AttrDict(
            {'costs': policy._cost_fn(all_obs, goals, act_seq, all_mouts)})

        true_trajectories.append(goals.leaf_apply(lambda v: to_numpy(v)))
        pred_trajectories.append(all_obs.leaf_apply(lambda v: to_numpy(v)))
        action_sequences.append(act_seq.leaf_apply(lambda v: to_numpy(v)))
        costs.append(cost_dict.leaf_apply(lambda v: to_numpy(v)))

    # one big dictionary
    final_dict = AttrDict.leaf_combine_and_apply(
        true_trajectories, lambda vs: np.concatenate(vs, axis=0))
    combined_pred = AttrDict.leaf_combine_and_apply(
        pred_trajectories, lambda vs: np.concatenate(vs, axis=0))
    combined_acts = AttrDict.leaf_combine_and_apply(
        action_sequences, lambda vs: np.concatenate(vs, axis=0))
    combined_costs = AttrDict.leaf_combine_and_apply(
        costs, lambda vs: np.concatenate(vs, axis=0))

    final_dict.combine(combined_pred)
    final_dict.combine(combined_acts)  # no overlapping keys
    final_dict.combine(combined_costs)

    logger.debug("Saving Model Trajectories")
    logger.debug("Keys: " + str(final_dict.keys()))
    savemat(save_file_name, final_dict)