def inspect_path(path, orig_visitation): T, dS = path['observations'].shape T, dA = path['actions'].shape freq = np.zeros((dS, dA)) for t in range(T): obs = one_hot_to_flat(path['observations'][t]) act = one_hot_to_flat(path['actions'][t]) freq[obs, act] += 1 freq = freq / float(T) import pdb pdb.set_trace()
def compute_vistation_demos(env, demos): dim_obs = env.observation_space.flat_dim dim_act = env.action_space.flat_dim counts = np.zeros((dim_obs, dim_act)) for demo in demos: obs = demo['observations'] act = demo['actions'] state_ids = one_hot_to_flat(obs) T = len(state_ids) for t in range(T): counts[state_ids[t], act[t]] += 1 return counts / float(np.sum(counts))