Пример #1
0
def inspect_path(path, orig_visitation):
    T, dS = path['observations'].shape
    T, dA = path['actions'].shape
    freq = np.zeros((dS, dA))
    for t in range(T):
        obs = one_hot_to_flat(path['observations'][t])
        act = one_hot_to_flat(path['actions'][t])
        freq[obs, act] += 1
    freq = freq / float(T)
    import pdb
    pdb.set_trace()
Пример #2
0
def compute_vistation_demos(env, demos):
    dim_obs = env.observation_space.flat_dim
    dim_act = env.action_space.flat_dim
    counts = np.zeros((dim_obs, dim_act))

    for demo in demos:
        obs = demo['observations']
        act = demo['actions']
        state_ids = one_hot_to_flat(obs)
        T = len(state_ids)
        for t in range(T):
            counts[state_ids[t], act[t]] += 1
    return counts / float(np.sum(counts))