def _get_latent_locs(self, model, hidden_state, file_name=None): rand_actions = ptu.from_numpy( np.stack([ self.env.sample_action() for _ in range(self.num_loc_samples) ])) # (B,A) state_action_attention, interaction_attention, all_delta_vals, all_lambdas_deltas = \ model.get_all_activation_values(hidden_state, rand_actions) interaction_attention = interaction_attention.sum( -1) # (B,K,K-1) -> (B,K) normalized_weights = interaction_attention / interaction_attention.sum( 0) #(B,K) mean_point = (normalized_weights.unsqueeze(2) * rand_actions[:, :2].unsqueeze(1) ) # ((B,K)->(B,K,1) * (B,2)->(B,1,2)) -> (B,K,2) mean_point = mean_point.sum(0) #(B,K,2) -> (K,2) if file_name is not None: plot_action_vals(self.env, ptu.get_numpy(interaction_attention), ptu.get_numpy(rand_actions), "{}/{}_pick_locs".format(self.logging_dir, file_name), is_normalized=True) return mean_point
def _random_shooting(self, actions, model, image_suffix): # Like internal_inference except initial_hidden_state might only contain one state while obs/actions contain (B,*) # Inputs: obs (B,T1,3,D,D) or None, actions (B,T2,A) or None, initial_hidden_state or None, schedule (T3) # Note: Assume that initial_hidden_state has entries of size (B=1,*) goal_info = self.goal_info schedule = np.array([1]*actions.shape[1]) actions = ptu.from_numpy(actions) if self.action_type is None: predicted_info = model.batch_internal_inference(obs=None, actions=actions, initial_hidden_state=self.initial_hidden_state, schedule=schedule, figure_path=None) all_env_actions = actions else: predicted_info, all_env_actions = self._latent_batch_internal_inference(model, actions, self.initial_hidden_state) # Inputs to get_action_rankings(): goal_latents (n_goal_latents=K,R), # goal_latents_recon (n_goal_latents=K,3,64,64), goal_image (1,3,64,64), pred_latents (n_actions,K,R), # pred_latents_recon (n_actions,K,3,64,64), pred_images (n_actions,3,64,64) sorted_costs, best_actions_indices, goal_latent_indices = self.score_actions_class.get_action_rankings( goal_info["state"]["post"]["samples"][0], goal_info["sub_images"][0], goal_info["goal_image"], predicted_info["state"]["post"]["samples"], predicted_info["sub_images"], predicted_info["final_recon"], image_suffix = image_suffix) num_plot_actions = 20 self.plot_action_errors(self.env, all_env_actions[best_actions_indices][:num_plot_actions, 0], predicted_info["final_recon"][best_actions_indices][:num_plot_actions], image_suffix+"_action_errors") best_single_env_action = all_env_actions[best_actions_indices[0]] return best_actions_indices, goal_latent_indices, predicted_info["final_recon"], ptu.get_numpy(best_single_env_action)
def process_env_actions(env_actions): if len(env_actions.shape) == 1: #(A) numpy env_actions = np.expand_dims(env_actions, 0) #(T=1,A), numpy return ptu.from_numpy(env_actions)
def process_env_obs(env_obs): if len(env_obs.shape) == 3: #(D,D,3) numpy env_obs = np.expand_dims(env_obs, 0) #(T=1,D,D,3), numpy return ptu.from_numpy(np.moveaxis(env_obs, 3, 1)) / 255
def __init__( self, input_width, input_height, input_channels, output_size, kernel_sizes, n_channels, strides, paddings, hidden_sizes=None, added_fc_input_size=0, batch_norm_conv=False, batch_norm_fc=False, init_w=1e-4, hidden_init=nn.init.xavier_uniform_, hidden_activation=nn.ReLU(), output_activation=identity, ): if hidden_sizes is None: hidden_sizes = [] assert len(kernel_sizes) == \ len(n_channels) == \ len(strides) == \ len(paddings) super().__init__() self.hidden_sizes = hidden_sizes self.input_width = input_width self.input_height = input_height self.input_channels = input_channels self.output_size = output_size self.output_activation = output_activation self.hidden_activation = hidden_activation self.batch_norm_conv = batch_norm_conv self.batch_norm_fc = batch_norm_fc self.added_fc_input_size = added_fc_input_size self.conv_input_length = self.input_width * self.input_height * self.input_channels self.conv_layers = nn.ModuleList() self.conv_norm_layers = nn.ModuleList() self.fc_layers = nn.ModuleList() self.fc_norm_layers = nn.ModuleList() for out_channels, kernel_size, stride, padding in \ zip(n_channels, kernel_sizes, strides, paddings): conv = nn.Conv2d(input_channels, out_channels, kernel_size, stride=stride, padding=padding) hidden_init(conv.weight) conv.bias.data.fill_(0) conv_layer = conv self.conv_layers.append(conv_layer) input_channels = out_channels xcoords = np.expand_dims(np.linspace(-1, 1, self.input_width), 0).repeat(self.input_height, 0) ycoords = np.repeat(np.linspace(-1, 1, self.input_height), self.input_width).reshape( (self.input_height, self.input_width)) self.coords = from_numpy( np.expand_dims(np.stack([xcoords, ycoords], 0), 0))
def __init__( self, input_width, input_height, input_channels, output_size, kernel_sizes, n_channels, strides, paddings, hidden_sizes, lstm_size, lstm_input_size, added_fc_input_size=0, batch_norm_conv=False, batch_norm_fc=False, init_w=1e-4, hidden_init=nn.init.xavier_uniform_, hidden_activation=nn.ReLU(), lambda_output_activation=identity, k=None, ): if hidden_sizes is None: hidden_sizes = [] assert len(kernel_sizes) == \ len(n_channels) == \ len(strides) == \ len(paddings) super().__init__() self.hidden_sizes = hidden_sizes self.input_width = input_width self.input_height = input_height self.input_channels = input_channels self.lstm_size = lstm_size self.output_size = output_size self.lambda_output_activation = lambda_output_activation self.hidden_activation = hidden_activation self.batch_norm_conv = batch_norm_conv self.batch_norm_fc = batch_norm_fc self.added_fc_input_size = added_fc_input_size self.conv_input_length = self.input_width * self.input_height * self.input_channels self.K = k self.conv_layers = nn.ModuleList() self.conv_norm_layers = nn.ModuleList() self.fc_layers = nn.ModuleList() self.fc_norm_layers = nn.ModuleList() self.avg_pooling = torch.nn.AvgPool2d(kernel_size=input_width) self.lstm = nn.LSTM(lstm_input_size, lstm_size, num_layers=1, batch_first=True) for out_channels, kernel_size, stride, padding in \ zip(n_channels, kernel_sizes, strides, paddings): conv = nn.Conv2d(input_channels, out_channels, kernel_size, stride=stride, padding=padding) hidden_init(conv.weight) conv.bias.data.fill_(0) conv_layer = conv self.conv_layers.append(conv_layer) input_channels = out_channels # find output dim of conv_layers by trial and add normalization conv layers test_mat = torch.zeros( 1, self.input_channels, self.input_width, self.input_height ) # initially the model is on CPU (caller should then move it to GPU if for conv_layer in self.conv_layers: test_mat = conv_layer(test_mat) #self.conv_norm_layers.append(nn.BatchNorm2d(test_mat.shape[1])) test_mat = self.avg_pooling(test_mat) #Avg pooling layer fc_input_size = int(np.prod(test_mat.shape)) # used only for injecting input directly into fc layers fc_input_size += added_fc_input_size for idx, hidden_size in enumerate(hidden_sizes): fc_layer = nn.Linear(fc_input_size, hidden_size) #norm_layer = nn.BatchNorm1d(hidden_size) fc_layer.weight.data.uniform_(-init_w, init_w) fc_layer.bias.data.uniform_(-init_w, init_w) self.fc_layers.append(fc_layer) #self.fc_norm_layers.append(norm_layer) fc_input_size = hidden_size self.last_fc = nn.Linear(lstm_size, output_size) #self.last_fc.weight.data.uniform_(-init_w, init_w) #self.last_fc.bias.data.uniform_(-init_w, init_w) self.last_fc2 = nn.Linear(lstm_size, output_size) xcoords = np.expand_dims(np.linspace(-1, 1, self.input_width), 0).repeat(self.input_height, 0) ycoords = np.repeat(np.linspace(-1, 1, self.input_height), self.input_width).reshape( (self.input_height, self.input_width)) self.coords = from_numpy( np.expand_dims(np.stack([xcoords, ycoords], 0), 0)) #(1, 2, D, D)