def act_vectorized(self, obs, goal, horizon=None, greedy=False, noise=0, marginal_policy=None): obs = torch.tensor(obs, dtype=torch.float32) goal = torch.tensor(goal, dtype=torch.float32) if horizon is not None: horizon = torch.tensor(horizon, dtype=torch.float32) logits = self.forward(obs, goal, horizon=horizon) logits = logits.view(-1, self.n_dims, self.granularity) noisy_logits = logits * (1 - noise) probs = torch.softmax(noisy_logits, 2) if greedy: samples = torch.argmax(probs, dim=-1) else: samples = torch.distributions.categorical.Categorical( probs=probs).sample() samples = self.flattened(samples) if greedy: samples = ptu.to_numpy(samples) random_samples = np.random.choice(self.action_space.n, size=len(samples)) return np.where( np.random.rand(len(samples)) < noise, random_samples, samples, ) return ptu.to_numpy(samples)
def act_vectorized(self, obs, goal, horizon=None, greedy=False, noise=0, marginal_policy=None): obs = torch.tensor(obs, dtype=torch.float32) goal = torch.tensor(goal, dtype=torch.float32) if horizon is not None: horizon = torch.tensor(horizon, dtype=torch.float32) logits = self.forward(obs, goal, horizon=horizon) if marginal_policy is not None: dummy_goal = torch.zeros_like(goal) marginal_logits = marginal_policy.forward(obs, dummy_goal, horizon) logits -= marginal_logits noisy_logits = logits * (1 - noise) probs = torch.softmax(noisy_logits, 1) if greedy: samples = torch.argmax(probs, dim=-1) else: samples = torch.distributions.categorical.Categorical( probs=probs).sample() return ptu.to_numpy(samples)
def loss_fn(self, observations, goals, actions, horizons, weights): obs_dtype = torch.float32 action_dtype = torch.int64 if self.is_discrete_action else torch.float32 observations_torch = torch.tensor(observations, dtype=obs_dtype) goals_torch = torch.tensor(goals, dtype=obs_dtype) actions_torch = torch.tensor(actions, dtype=action_dtype) horizons_torch = torch.tensor(horizons, dtype=obs_dtype) weights_torch = torch.tensor(weights, dtype=torch.float32) conditional_nll = self.policy.nll(observations_torch, goals_torch, actions_torch, horizon=horizons_torch) nll = conditional_nll return torch.mean(nll * weights_torch)
def __init__(self, env, dim_out=1, state_embedding=None, goal_embedding=None, layers=[512, 512], max_horizon=None, freeze_embeddings=False, add_extra_conditioning=False, dropout=0): super(StateGoalNetwork, self).__init__() self.max_horizon = max_horizon if state_embedding is None: state_embedding = Flatten() if goal_embedding is None: goal_embedding = Flatten() self.state_embedding = state_embedding self.goal_embedding = goal_embedding self.freeze_embeddings = freeze_embeddings state_dim_in = self.state_embedding( torch.tensor(torch.zeros( env.observation_space.shape)[None])).size()[1] goal_dim_in = self.goal_embedding( torch.tensor(torch.zeros(env.goal_space.shape)[None])).size()[1] dim_in = state_dim_in + goal_dim_in if max_horizon is not None: self.net = CBCNetwork(dim_in, max_horizon, dim_out, layers=layers, add_conditioning=add_extra_conditioning, dropout=dropout) else: self.net = FCNetwork(dim_in, dim_out, layers=layers)
def __init__(self, input_shapes, dim_out, input_embeddings=None, layers=[512, 512], freeze_embeddings=False): super(MultiInputNetwork, self).__init__() if input_embeddings is None: input_embeddings = [Flatten() for _ in range(len(input_shapes))] self.input_embeddings = input_embeddings self.freeze_embeddings = freeze_embeddings dim_ins = [ embedding(torch.tensor(np.zeros((1, ) + input_shape))).size(1) for embedding, input_shape in zip(input_embeddings, input_shapes) ] full_dim_in = sum(dim_ins) self.net = FCNetwork(full_dim_in, dim_out, layers=layers)
def flattened(self, tensor): # tensor expected to be n x self.n_dims multipliers = self.granularity**torch.tensor(np.arange(self.n_dims)) flattened = (tensor * multipliers).sum(1) return flattened.int()