def forward_sum_symbolic(self, landmark_r_theta_dict_list): x_list = [] for landmark_r_theta_dict in landmark_r_theta_dict_list: x = cuda_var(torch.zeros(1, self.image_emb_size)) for landmark, (r, theta) in landmark_r_theta_dict.iteritems(): if theta == -1: # not visible continue # get landmark embedding landmark_id = self.landmark_names.index(landmark) landmark_var = cuda_var( torch.from_numpy(np.array([landmark_id]))) landmark_embedding = self.landmark_embedding(landmark_var) # get r embedding r_var = cuda_var(torch.from_numpy(np.array([r]))) r_embedding = self.r_embedding(r_var) # get theta embedding theta_var = cuda_var(torch.from_numpy(np.array([theta]))) theta_embedding = self.theta_embedding(theta_var) embedding = torch.cat( [landmark_embedding, r_embedding, theta_embedding], dim=1) #embedding = F.relu(self.dense(embedding)) x = x + embedding x_list.append(x) return torch.cat(x_list)
def calc_loss_entropy(self, batch_replay_items): agent_observation_state_ls = [] immediate_rewards = [] action_batch = [] for replay_item in batch_replay_items: agent_observation_state_ls.append( replay_item.get_agent_observed_state()) action_batch.append(replay_item.get_action()) immediate_rewards.append(replay_item.get_reward()) action_batch = cuda_var(torch.from_numpy(np.array(action_batch))) immediate_rewards = cuda_var( torch.from_numpy(np.array(immediate_rewards)).float()) num_states = int(action_batch.size()[0]) model_prob_batch = self.model.get_probs_batch( agent_observation_state_ls) chosen_log_probs = model_prob_batch.gather(1, action_batch.view(-1, 1)) reward_log_probs = immediate_rewards * chosen_log_probs.view(-1) entropy = -torch.mean( torch.sum(model_prob_batch * torch.exp(model_prob_batch), 1)) objective = torch.sum(reward_log_probs) / num_states loss = -(objective + self.entropy_coef * entropy) self.entropy = entropy return loss
def calc_loss(self, batch_replay_items): log_probabilities = [] rewards = [] action_batch = [] for replay_item in batch_replay_items: log_probabilities.append(replay_item.get_log_prob()) action_batch.append(replay_item.get_action()) rewards.append(replay_item.get_reward()) action_batch = cuda_var(torch.from_numpy(np.array(action_batch))) rewards = cuda_var(torch.from_numpy(np.array(rewards))).float() num_states = int(action_batch.size()[0]) model_prob_batch = torch.cat(log_probabilities, dim=0) chosen_log_probs = model_prob_batch.gather(1, action_batch.view(-1, 1)) reward_log_probs = rewards * chosen_log_probs.view(-1) entropy = -torch.mean( torch.sum(model_prob_batch * torch.exp(model_prob_batch), 1)) objective = torch.sum(reward_log_probs) / num_states loss = -(objective + self.entropy_coef * entropy) self.entropy = entropy return loss
def get_probs(self, agent_observed_state, model_state, mode=None, volatile=False): assert isinstance(agent_observed_state, AgentObservedState) # Image list is already padded with zero-images if <5 images are available images = agent_observed_state.get_image()[-5:] image_batch = cuda_var( torch.from_numpy(np.array(images)).float(), volatile) # Flatten them? TODO: maybe don't hardcode this later on? batch size is 1 ;) image_batch = image_batch.view(1, 15, self.config["image_height"], self.config["image_width"]) # List of instructions. False is there because it expects a second argument. TODO: figure out what this is instructions_batch = ([agent_observed_state.get_instruction()], False) # Previous action prev_actions_raw = [agent_observed_state.get_previous_action()] # If previous action is non-existant then encode that as a stop? prev_actions = [ self.none_action if a is None else a for a in prev_actions_raw ] prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions))) # Get probabilities probs_batch, new_model_state = self.final_module( image_batch, instructions_batch, prev_actions_batch, model_state) # last two we don't really need... return probs_batch, new_model_state, None, None
def get_probs(self, agent_observed_state, model_state, mode=None, volatile=False): assert isinstance(agent_observed_state, AgentObservedState) agent_observed_state_list = [agent_observed_state] # Extract the last 4 images or add dummy paddings image_seqs = [[aos.get_image()] for aos in agent_observed_state_list] image_batch = cuda_var( torch.from_numpy(np.array(image_seqs)).float(), volatile) instructions = [ aos.get_instruction() for aos in agent_observed_state_list ] instructions_batch = cuda_var( torch.from_numpy(np.array(instructions)).long()) time = agent_observed_state.time_step time = cuda_var(torch.from_numpy(np.array([time])).long()) probs_batch, new_model_state, image_emb_seq, state_feature = self.final_module( image_batch, instructions_batch, time, mode, model_state) return probs_batch, new_model_state, image_emb_seq, state_feature
def calc_loss(self, batch): curr_obs = cuda_var( torch.cat([ torch.from_numpy(np.array(point[0])).view(1, -1) for point in batch ], dim=0)).float() actions = cuda_var( torch.cat([ torch.from_numpy(np.array(point[1])).view(1, -1) for point in batch ], dim=0)).float() next_obs = cuda_var( torch.cat([ torch.from_numpy(np.array(point[2])).view(1, -1) for point in batch ], dim=0)).float() gold_labels = cuda_var( torch.cat([ torch.from_numpy(np.array(point[3])).view(1, -1) for point in batch ], dim=0)).long() log_probs = self.forward(curr_obs, actions, next_obs) classification_loss = -torch.mean( log_probs.gather(1, gold_labels.view(-1, 1))) return classification_loss
def calc_loss_old(self, batch_replay_items): angle_batch = [] distance_batch = [] batch_next_state_feature = [] for replay_item in batch_replay_items: angle, distance = replay_item.get_goal() angle_batch.append(angle) distance_batch.append(distance) batch_next_state_feature.append(replay_item.get_state_feature()) angle_batch = cuda_var(torch.from_numpy(np.array(angle_batch))) distance_batch = cuda_var(torch.from_numpy(np.array(distance_batch))) batch_next_state_feature = torch.cat(batch_next_state_feature) # Compute the negative log probability loss goal_angle_log_probability, goal_distance_log_probability = self.model.predict_goal_result( batch_next_state_feature) chosen_angle_log_probs = goal_angle_log_probability.gather( 1, angle_batch.view(-1, 1)) chosen_distance_log_probs = goal_distance_log_probability.gather( 1, distance_batch.view(-1, 1)) goal_probability_loss = -torch.sum(chosen_angle_log_probs) - torch.sum( chosen_distance_log_probs) num_states = float(len(batch_replay_items)) goal_probability_loss = goal_probability_loss / num_states return goal_probability_loss
def get_probs(self, agent_observed_state, model_state, mode=None, volatile=False): assert isinstance(agent_observed_state, AgentObservedState) agent_observed_state_list = [agent_observed_state] image_seq_lens = [1] image_seq_lens_batch = cuda_tensor( torch.from_numpy(np.array(image_seq_lens))) # max_len = max(image_seq_lens) # image_seqs = [aos.get_image()[:max_len] # for aos in agent_observed_state_list] image_seqs = [[aos.get_last_image()] for aos in agent_observed_state_list] image_batch = cuda_var(torch.from_numpy(np.array(image_seqs)).float(), volatile) instructions = [aos.get_instruction() for aos in agent_observed_state_list] read_pointers = [aos.get_read_pointers() for aos in agent_observed_state_list] instructions_batch = (instructions, read_pointers) prev_actions_raw = [aos.get_previous_action() for aos in agent_observed_state_list] prev_actions = [self.none_action if a is None else a for a in prev_actions_raw] prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions)), volatile) probs_batch, new_model_state, image_emb_seq, state_feature = self.final_module( image_batch, image_seq_lens_batch, instructions_batch, prev_actions_batch, mode, model_state) return probs_batch, new_model_state, image_emb_seq, state_feature
def calc_loss(self, batch_replay_items): """ Given a set of replay items this function calculates the loss variable """ agent_observation_state_ls = [] immediate_rewards = [] action_batch = [] log_probabilities = [] for replay_item in batch_replay_items: agent_observation_state_ls.append( replay_item.get_agent_observed_state()) action_batch.append(replay_item.get_action()) immediate_rewards.append(replay_item.get_reward()) log_probabilities.append(replay_item.get_log_prob()) log_probabilities = torch.cat(log_probabilities) action_batch = cuda_var(torch.from_numpy(np.array(action_batch))) immediate_rewards = cuda_var( torch.from_numpy(np.array(immediate_rewards)).float()) model_log_prob_batch = log_probabilities chosen_log_probs = model_log_prob_batch.gather( 1, action_batch.view(-1, 1)) reward_log_probs = immediate_rewards * chosen_log_probs.view(-1) model_prob_batch = torch.exp(model_log_prob_batch) self.entropy = -torch.sum( torch.sum(model_log_prob_batch * model_prob_batch, 1)) objective = torch.sum(reward_log_probs) loss = -objective - self.entropy_coef * self.entropy return loss
def calc_loss(self, batch_replay_items): agent_observation_state_ls = [] immediate_rewards = [] action_batch = [] for replay_item in batch_replay_items: agent_observation_state_ls.append( replay_item.get_agent_observed_state()) action_batch.append(replay_item.get_action()) immediate_rewards.append(replay_item.get_reward()) action_batch = cuda_var(torch.from_numpy(np.array(action_batch))) immediate_rewards = cuda_var( torch.from_numpy(np.array(immediate_rewards)).float()) num_states = int(action_batch.size()[0]) model_prob_batch = self.model.get_probs_batch( agent_observation_state_ls) chosen_log_probs = model_prob_batch.gather(1, action_batch.view(-1, 1)) reward_log_probs = immediate_rewards * chosen_log_probs.view(-1) gold_distribution = cuda_var( torch.FloatTensor([0.6719, 0.1457, 0.1435, 0.0387])) cross_entropy = -torch.mean( torch.sum(gold_distribution * model_prob_batch, 1)) objective = torch.sum(reward_log_probs) / num_states loss = -(objective - self.entropy_coef * cross_entropy) self.cross_entropy = cross_entropy return loss
def get_probs(self, agent_observed_state, model_state, mode=None, volatile=False): assert isinstance(agent_observed_state, AgentObservedState) agent_observed_state_list = [agent_observed_state] image_seqs = [[aos.get_last_image()] for aos in agent_observed_state_list] image_batch = cuda_var( torch.from_numpy(np.array(image_seqs)).float(), volatile) instructions = [ aos.get_instruction() for aos in agent_observed_state_list ] instructions_batch = cuda_var( torch.from_numpy(np.array(instructions)).long()) time = agent_observed_state.time_step time = cuda_var(torch.from_numpy(np.array([time])).long()) previous_action = agent_observed_state.previous_action if previous_action is None: previous_action = 4 # num_actions + 1 previous_action = cuda_var( torch.from_numpy(np.array([previous_action])).long()) probs_batch, new_model_state, image_emb_seq, state_feature = self.final_module( image_batch, instructions_batch, time, previous_action, mode, model_state) return probs_batch, new_model_state, image_emb_seq, state_feature
def get_probs_symbolic_text(self, agent_observed_state, symbolic_text, model_state, mode=None, volatile=False): """ Same as get_probs instead forces the model to use the given symbolic text """ assert isinstance(agent_observed_state, AgentObservedState) agent_observed_state_list = [agent_observed_state] image_seq_lens = [1] image_seq_lens_batch = cuda_tensor( torch.from_numpy(np.array(image_seq_lens))) image_seqs = [[aos.get_last_image()] for aos in agent_observed_state_list] image_batch = cuda_var(torch.from_numpy(np.array(image_seqs)).float(), volatile) instructions_batch = [symbolic_text] prev_actions_raw = [aos.get_previous_action() for aos in agent_observed_state_list] prev_actions = [self.none_action if a is None else a for a in prev_actions_raw] prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions)), volatile) probs_batch, new_model_state, image_emb_seq, state_feature = self.final_module(image_batch, image_seq_lens_batch, instructions_batch, prev_actions_batch, mode, model_state) return probs_batch, new_model_state, image_emb_seq, state_feature
def get_probs_batch(self, agent_observed_state_list, mode=None): for aos in agent_observed_state_list: assert isinstance(aos, AgentObservedState) # print "batch size:", len(agent_observed_state_list) # sort list by instruction length agent_observed_state_list = sorted( agent_observed_state_list, key=lambda aos_: len(aos_.get_instruction()), reverse=True ) images = [aos.get_image() for aos in agent_observed_state_list] image_batch = cuda_var(torch.from_numpy(np.array(images)).float()) instructions = [aos.get_instruction() for aos in agent_observed_state_list] read_pointers = [aos.get_read_pointers() for aos in agent_observed_state_list] instructions_batch = (instructions, read_pointers) prev_actions_raw = [aos.get_previous_action() for aos in agent_observed_state_list] prev_actions = [self.none_action if a is None else a for a in prev_actions_raw] prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions))) probs_batch = self.final_module(image_batch, instructions_batch, prev_actions_batch, mode) return probs_batch
def get_attention_prob(self, agent_observed_state, model_state, mode=None, volatile=False): assert isinstance(agent_observed_state, AgentObservedState) agent_observed_state_list = [agent_observed_state] image_seqs = [[aos.get_last_image()] for aos in agent_observed_state_list] image_batch = cuda_var( torch.from_numpy(np.array(image_seqs)).float(), volatile) instructions = [ aos.get_instruction() for aos in agent_observed_state_list ] instructions_batch = cuda_var( torch.from_numpy(np.array(instructions)).long()) time = agent_observed_state.time_step time = cuda_var(torch.from_numpy(np.array([time])).long()) instruction_string = instruction_to_string( agent_observed_state.instruction, self.config) state_feature = self.final_module.get_attention_prob( image_batch, instructions_batch, instruction_string, agent_observed_state.goal) return state_feature
def get_probs(self, agent_observed_state, model_state, mode=None): assert isinstance(agent_observed_state, AgentObservedState) agent_observed_state_list = [agent_observed_state] image_seq_lens = [1] image_seq_lens_batch = cuda_tensor( torch.from_numpy(np.array(image_seq_lens))) image_seqs = [[aos.get_last_image()] for aos in agent_observed_state_list] image_batch = cuda_var(torch.from_numpy(np.array(image_seqs)).float()) goal_image_seqs = [[aos.get_goal_image()] for aos in agent_observed_state_list] goal_image_batch = cuda_var(torch.from_numpy(np.array(goal_image_seqs)).float()) prev_actions_raw = [aos.get_previous_action() for aos in agent_observed_state_list] prev_actions = [self.none_action if a is None else a for a in prev_actions_raw] prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions))) probs_batch, new_model_state, image_emb_seq = self.final_module(image_batch, image_seq_lens_batch, goal_image_batch, prev_actions_batch, mode, model_state) return probs_batch, new_model_state, image_emb_seq
def calc_loss(self, batch_replay_items): agent_observation_state_ls = [] landmark = [] theta_1 = [] theta_2 = [] r = [] for replay_item in batch_replay_items: agent_observation_state_ls.append( replay_item.get_agent_observed_state()) landmark_, theta_1_, theta_2_, r_ = replay_item.get_symbolic_text() landmark.append(landmark_) theta_1.append(theta_1_) theta_2.append(theta_2_) r.append(r_) num_states = len(agent_observation_state_ls) landmark_batch = cuda_var(torch.from_numpy(np.array(landmark))) theta_1_batch = cuda_var(torch.from_numpy(np.array(theta_1))) theta_2_batch = cuda_var(torch.from_numpy(np.array(theta_2))) r_batch = cuda_var(torch.from_numpy(np.array(r))) model_prob_landmark, model_prob_theta_1, model_prob_theta_2, model_prob_r \ = self.model.get_symbolic_text_batch(agent_observation_state_ls) # compute expected theta model_prob_theta_1_ = torch.exp(model_prob_theta_1) model_prob_theta_2_ = torch.exp(model_prob_theta_2) expected_theta_1 = torch.matmul(model_prob_theta_1_, self.theta_values) # batch expected_theta_2 = torch.matmul(model_prob_theta_2_, self.theta_values) # batch gold_theta_1 = self.theta_values.gather(0, theta_1_batch.view(-1, 1)) gold_theta_2 = self.theta_values.gather(0, theta_2_batch.view(-1, 1)) theta_1_diff_1 = torch.remainder(gold_theta_1 - expected_theta_1, 360) theta_1_diff_2 = torch.remainder(expected_theta_1 - gold_theta_1, 360) theta_1_diff = torch.min(theta_1_diff_1, theta_1_diff_2) theta_1_loss = torch.mean(theta_1_diff**2) theta_2_diff_1 = torch.remainder(gold_theta_2 - expected_theta_2, 360) theta_2_diff_2 = torch.remainder(expected_theta_2 - gold_theta_2, 360) theta_2_diff = torch.min(theta_2_diff_1, theta_2_diff_2) theta_2_loss = torch.mean(theta_2_diff**2) chosen_log_probs_landmark = model_prob_landmark.gather( 1, landmark_batch.view(-1, 1)) # chosen_log_probs_theta_1 = model_prob_theta_1.gather(1, theta_1_batch.view(-1, 1)) # chosen_log_probs_theta_2 = model_prob_theta_2.gather(1, theta_2_batch.view(-1, 1)) chosen_log_probs_r = model_prob_r.gather(1, r_batch.view(-1, 1)) cross_entropy_loss_objective = torch.sum(chosen_log_probs_landmark) / num_states \ + torch.sum(chosen_log_probs_r) / num_states loss = -cross_entropy_loss_objective + 0.0002 * theta_1_loss + 0.0002 * theta_2_loss return loss
def get_loss_and_prob(volatile_features, goal, final_height, final_width): attention_probs = volatile_features["attention_probs"] attention_logits = volatile_features["attention_logits"] attention_log_prob = F.log_softmax(attention_logits, dim=0) row, col, row_real, col_real = goal gold_prob = GoalPrediction.generate_gold_prob(goal, final_height, final_width) if row is None: cross_entropy_loss = -torch.sum( gold_prob * attention_log_prob) # cross entropy loss meta = {"cross_entropy": cross_entropy_loss, "dist_loss": None} return cross_entropy_loss, attention_log_prob[final_height * final_width], meta row_, col_ = row + 0.5, col + 0.5 position_height = cuda_var( torch.from_numpy(np.array(list(range( 0, final_height))))).float().view(-1, 1) + 0.5 position_width = cuda_var( torch.from_numpy(np.array(list(range( 0, final_width))))).float().view(-1, 1) + 0.5 attention_prob = attention_probs[:-1].view(final_height, final_width) expected_row = torch.sum(position_height * attention_prob) expected_col = torch.sum(position_width.view(1, -1) * attention_prob) dist_loss = torch.sqrt((expected_row - row_) * (expected_row - row_) + (expected_col - col_) * (expected_col - col_)) cross_entropy_loss = -torch.sum( gold_prob * attention_log_prob) # cross entropy loss if row is None or col is None: ix = final_height * final_width else: ix = row * final_width + col if GoalPrediction.loss_type == GoalPrediction.LOGLOSS: loss = -attention_log_prob[ix] elif GoalPrediction.loss_type == GoalPrediction.LOGLOSS_DIST: loss = -attention_log_prob[ix] + dist_loss elif GoalPrediction.loss_type == GoalPrediction.CROSS_ENTROPY: loss = cross_entropy_loss elif GoalPrediction.loss_type == GoalPrediction.DIST_LOSS: loss = dist_loss elif GoalPrediction.loss_type == GoalPrediction.CROSS_ENTROPY_AND_DIST_LOSS: loss = cross_entropy_loss + dist_loss else: raise AssertionError("Unhandled loss type ", GoalPrediction.loss_type) prob = attention_log_prob[ix] meta = {"cross_entropy": cross_entropy_loss, "dist_loss": dist_loss} return loss, prob, meta
def forward(self, instructions_batch): token_lists, text_pointers = instructions_batch batch_size = len(token_lists) text_lengths = np.array([len(tokens) for tokens in token_lists]) dims = (self.num_layers, batch_size, self.hidden_dim) hidden_f = (Variable(cuda_tensor(torch.zeros(*dims)), requires_grad=False), Variable(cuda_tensor(torch.zeros(*dims)), requires_grad=False)) hidden_b = (Variable(cuda_tensor(torch.zeros(*dims)), requires_grad=False), Variable(cuda_tensor(torch.zeros(*dims)), requires_grad=False)) # pad text tokens with 0's tokens_batch_f = [[] for _ in xrange(batch_size)] tokens_batch_b = [[] for _ in xrange(batch_size)] for i in xrange(batch_size): num_zeros = text_lengths[0] - text_lengths[i] tokens_batch_f[i] = token_lists[i] + [0] * num_zeros tokens_batch_b[i] = token_lists[i][::-1] + [0] * num_zeros tokens_batch_f = cuda_var(torch.from_numpy(np.array(tokens_batch_f))) tokens_batch_b = cuda_var(torch.from_numpy(np.array(tokens_batch_b))) # swap so batch dimension is second, sequence dimension is first tokens_batch_f = tokens_batch_f.transpose(0, 1) tokens_batch_b = tokens_batch_b.transpose(0, 1) emb_sentence_f = self.embedding(tokens_batch_f) emb_sentence_b = self.embedding(tokens_batch_b) packed_input_f = pack_padded_sequence(emb_sentence_f, text_lengths) packed_input_b = pack_padded_sequence(emb_sentence_b, text_lengths) lstm_out_packed_f, _ = self.lstm_f(packed_input_f, hidden_f) lstm_out_packed_b, _ = self.lstm_b(packed_input_b, hidden_b) # return average output embedding lstm_out_f, _ = pad_packed_sequence(lstm_out_packed_f) lstm_out_b, _ = pad_packed_sequence(lstm_out_packed_b) lstm_out_f = lstm_out_f.transpose(0, 1) lstm_out_b = lstm_out_b.transpose(0, 1) embeddings_list = [] for i, (start_i, end_i) in enumerate(text_pointers): embeddings = [] if start_i > 0: embeddings.append(lstm_out_f[i][start_i - 1]) else: embeddings.append(cuda_var(torch.zeros(self.hidden_dim))) embeddings.append(lstm_out_f[i][end_i - 1]) embeddings.append(lstm_out_b[i][start_i]) if end_i < text_lengths[i]: embeddings.append(lstm_out_b[i][end_i]) else: embeddings.append(cuda_var(torch.zeros(self.hidden_dim))) embeddings_list.append(torch.cat(embeddings).view(1, -1)) embeddings_batch = torch.cat(embeddings_list) return embeddings_batch
def save_correlation_figure_(num_homing_policies, model, test_batches, exp_name): correlation_stats = {} for batch in test_batches: prev_observations = cuda_var( torch.cat([ torch.from_numpy(np.array(point.get_curr_obs())).view(1, -1) for point in batch ], dim=0)).float() actions = cuda_var( torch.cat([ torch.from_numpy(np.array(point.get_action())).view(1, -1) for point in batch ], dim=0)).long() observations = cuda_var( torch.cat([ torch.from_numpy(np.array(point.get_next_obs())).view(1, -1) for point in batch ], dim=0)).float() # Compute loss _, info_dict = model.gen_prob(prev_observations, actions, observations) # batch x 2 assigned_states = info_dict["assigned_states"] for i, point in enumerate(batch): assigned_state = int(assigned_states[i]) if point.get_next_state() in correlation_stats: correlation_stats[ point.get_next_state()][assigned_state] += 1.0 else: vec = np.zeros(num_homing_policies, dtype=np.float32) vec[assigned_state] = 1.0 correlation_stats[point.get_next_state()] = vec num_states = 0 image = [] for key in sorted(correlation_stats): vec = correlation_stats[key] vec = vec / max(1.0, vec.sum()) image.append(vec) num_states += 1 image = np.vstack(image) image = scipy.misc.imresize(image, (num_states * 100, num_homing_policies * 100)) filelist = os.listdir('./%s' % exp_name) num_images = len(filelist) scipy.misc.imsave("./%s/image_%d.png" % (exp_name, num_images + 1), image)
def get_log_prob(self, replay_item): image_batch, instruction = replay_item image_seqs = [image_batch] image_batch = cuda_var(torch.from_numpy(np.array(image_seqs)).float()) instructions = [instruction] instructions_batch = cuda_var( torch.from_numpy(np.array(instructions)).long()) return self.final_module(image_batch, instructions_batch)
def get_probs(self, agent_observed_state, model_state, mode=None, volatile=False): assert isinstance(agent_observed_state, AgentObservedState) #supposedly this is already padded with zeros, but i need to double check that code images = agent_observed_state.get_image()[-5:] # image_seqs = [[aos.get_last_image()] # for aos in agent_observed_state_list] image_batch = cuda_var( torch.from_numpy(np.array(images)).float(), volatile) #flatten them? TODO: maybe don't hardcode this later on? batch size is 1 ;) image_batch = image_batch.view(1, 15, 128, 128) # list of list :) instructions_batch = ([agent_observed_state.get_instruction()], False) #instructions_batch = (cuda_var(torch.from_numpy(np.array(instructions)).long()), False) #print("instructions", instructions) #print("instructins_batch", instructions_batch) prev_actions_raw = agent_observed_state.get_previous_action() prev_actions_raw = self.none_action if prev_actions_raw is None else prev_actions_raw if prev_actions_raw == 81: previous_direction_id = [4] else: previous_direction_id = [prev_actions_raw % 4] #this input is is over the space 81 things :) previous_block_id = [int(prev_actions_raw / 4)] prev_block_id_batch = cuda_var( torch.from_numpy(np.array(previous_block_id))) prev_direction_id_batch = cuda_var( torch.from_numpy(np.array(previous_direction_id))) # prev_actions = [self.none_action if a is None else a # for a in prev_actions_raw] #prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions))) probs_batch, new_model_state = self.final_module( image_batch, instructions_batch, prev_block_id_batch, prev_direction_id_batch, model_state) # last two we don't really need... return probs_batch, new_model_state, None, None
def calc_loss(self, batch_replay_items): if len(batch_replay_items) <= 1: return None action_batch = [] batch_image_feature = [] batch_next_image_feature = [] for replay_item in batch_replay_items: next_image_emb = replay_item.get_next_image_emb() if next_image_emb is None: # sometimes it can None for the last item in a rollout continue action_batch.append(replay_item.get_action()) batch_image_feature.append(replay_item.get_image_emb()) batch_next_image_feature.append(next_image_emb) action_batch = cuda_var(torch.from_numpy(np.array(action_batch))) batch_image_feature = torch.cat(batch_image_feature) batch_next_image_feature = torch.cat(batch_next_image_feature) # Predict the feature of next image batch_predicted_next_image_feature = self.model.predict_action_result( batch_image_feature, action_batch) # Compute the squared mean loss diff = (batch_predicted_next_image_feature - batch_next_image_feature) temporal_autoencoding_loss = torch.mean(diff**2) return temporal_autoencoding_loss
def calc_loss(self, batch_replay_items): images = [] visible_objects = [] for replay_item in batch_replay_items: image, visible_objects_ = replay_item visible_objects.append(visible_objects_) images.append([image]) theta_logits = self.model.get_probs(images) # batch x 67 x 12 num_states = int(theta_logits.size()[0]) one_hot_vector = torch.zeros(theta_logits.size()) for i in range(0, num_states): visible_objects_example = visible_objects[i] for landmark in range(0, self.num_landmark): # See if the landmark is present and visible in the agent's field of view if landmark in visible_objects_example and visible_objects_example[ landmark][1] != -1: r, theta = visible_objects_example[landmark] one_hot_vector[i, landmark, theta] = 1.0 loss = F.binary_cross_entropy_with_logits( theta_logits, cuda_var(one_hot_vector).float()) return loss
def _gather_fqi_samples(replay_dataset, step, horizon, reward_func, learned_policy): dataset = [] for replay_item in replay_dataset[step]: assert type(replay_item) == TransitionDatapoint and \ replay_item.get_timestep() == step and \ replay_item.is_valid() == 1 current_obs = replay_item.get_curr_obs() next_obs = replay_item.get_next_obs() if reward_func is None: total_reward = replay_item.get_reward() else: total_reward = reward_func(current_obs, step) if step < horizon: obs_var = cuda_var(torch.from_numpy(next_obs)).float().view(1, -1) q_val = learned_policy[step + 1].gen_q_val(obs_var).view(-1) # num_actions total_reward += float(q_val.max(0)[0].data.cpu()) # Predict reward and take max datapoint = (current_obs, replay_item.get_action_prob(), replay_item.get_action(), total_reward, replay_item.get_curr_state(), replay_item.get_next_state(), replay_item.get_policy_index()) dataset.append(datapoint) return dataset
def get_probs_batch(self, agent_observed_state_list, mode=None): for aos in agent_observed_state_list: assert isinstance(aos, AgentObservedState) # print "batch size:", len(agent_observed_state_list) # sort list by instruction length agent_observed_state_list = sorted( agent_observed_state_list, key=lambda aos_: len(aos_.get_instruction()), reverse=True ) symbolic_image_list = [] for aos in agent_observed_state_list: x_pos, z_pos, y_angle = aos.get_position_orientation() landmark_pos_dict = aos.get_landmark_pos_dict() symbolic_image = get_visible_landmark_r_theta( x_pos, z_pos, y_angle, landmark_pos_dict) symbolic_image_list.append(symbolic_image) image_batch = symbolic_image_list instructions_batch = [aos.get_symbolic_instruction() for aos in agent_observed_state_list] prev_actions_raw = [aos.get_previous_action() for aos in agent_observed_state_list] prev_actions = [self.none_action if a is None else a for a in prev_actions_raw] prev_actions_batch = cuda_var(torch.from_numpy(np.array(prev_actions))) probs_batch = self.final_module(image_batch, instructions_batch, prev_actions_batch, mode) return probs_batch
def get_probs_and_visible_objects(self, agent_observed_state_list): for aos in agent_observed_state_list: assert isinstance(aos, AgentObservedState) # print "batch size:", len(agent_observed_state_list) # sort list by instruction length agent_observed_state_list = sorted( agent_observed_state_list, key=lambda aos_: len(aos_.get_instruction()), reverse=True) images = [[aos.get_last_image()] for aos in agent_observed_state_list] image_batch = cuda_var(torch.from_numpy(np.array(images)).float()) landmarks_visible = [] for aos in agent_observed_state_list: x_pos, z_pos, y_angle = aos.get_position_orientation() landmark_pos_dict = aos.get_landmark_pos_dict() visible_landmarks = get_visible_landmark_r_theta( x_pos, z_pos, y_angle, landmark_pos_dict, self.landmark_names) landmarks_visible.append(visible_landmarks) # shape is BATCH_SIZE x 63 x 2 probs_batch = self.final_module(image_batch) # landmarks_visible is list of length BATCH_SIZE, each item is a set containing landmark indices return probs_batch, landmarks_visible
def log_homing_policy_reward(self, env, homing_policies, step, logger): num_samples = self.constants["eval_homing_policy_sample_size"] all_total_reward = 0.0 for ix, policy in enumerate(homing_policies[step]): total_reward = 0.0 for _ in range(0, num_samples): # Rollin for steps obs, meta = env.reset() for step_ in range(1, step + 1): obs_var = cuda_var(torch.from_numpy(obs)).float().view(1, -1) action = policy[step_].sample_action(obs_var) obs, reward, done, meta = env.step(action) total_reward = total_reward + reward total_reward = total_reward / float(max(1, num_samples)) all_total_reward = all_total_reward + total_reward logger.log("After horizon %r. Policy Number %r receives mean reward %r" % (step, ix + 1, total_reward)) all_total_reward = all_total_reward / float(max(1, len(homing_policies[step]))) logger.log("After horizon %r. Random Policy receives reward %r" % (step, all_total_reward))
def generate_gold_prob(goal, final_height, final_width, sigma2=0.5): row, col, row_real, col_real = goal gold_prob = cuda_var(torch.zeros(final_height * final_width + 1)).float() if row is None or col is None: gold_prob[final_height * final_width] = 1.0 # last value indicates not present return gold_prob row_ = float(round(row_real)) + 0.5 col_ = float(round(col_real)) + 0.5 for i in range(0, final_height): for j in range(0, final_width): ix = i * final_width + j center = (i + 0.5, j + 0.5) # dist2 = (center[0] - row_real) * (center[0] - row_real) + \ # (center[1] - col_real) * (center[1] - col_real) dist2 = (center[0] - row_) * (center[0] - row_) + \ (center[1] - col_) * (center[1] - col_) gold_prob[ix] = -dist2 / (2.0 * sigma2) gold_prob = torch.exp(gold_prob).float() gold_prob[final_height * final_width] = 0.0 gold_prob = gold_prob / (gold_prob.sum() + 0.00001) return gold_prob
def _gather_last_observation(env, actions, step, homing_policies, selection_weights): start_obs, meta = env.reset() if step > 1: if selection_weights is None: # Select a homing policy for the previous time step randomly uniformly policy = random.choice(homing_policies[step - 1]) else: # Select a homing policy for the previous time step using the given weights # policy = random.choices(homing_policies[step - 1], weights=selection_weights, k=1)[0] ix = gp.sample_action_from_prob(selection_weights) policy = homing_policies[step - 1][ix] obs = start_obs for step_ in range(1, step): obs_var = cuda_var(torch.from_numpy(obs)).float().view(1, -1) action = policy[step_].sample_action(obs_var) obs, reward, done, meta = env.step(action) action = random.choice(actions) new_obs, reward, done, meta = env.step(action) return new_obs, meta
def forward(self, instructions_batch): token_lists, _ = instructions_batch batch_size = len(token_lists) dims = (self.num_layers, batch_size, self.hidden_dim) hidden = (Variable(cuda_tensor(torch.zeros(*dims)), requires_grad=False), Variable(cuda_tensor(torch.zeros(*dims)), requires_grad=False)) # pad text tokens with 0's text_lengths = np.array([len(tokens) for tokens in token_lists]) tokens_batch = [[] for _ in range(batch_size)] for i in range(batch_size): num_zeros = text_lengths[0] - text_lengths[i] tokens_batch[i] = token_lists[i] + [0] * num_zeros tokens_batch = cuda_var(torch.from_numpy(np.array(tokens_batch))) # swap so batch dimension is second, sequence dimension is first tokens_batch = tokens_batch.transpose(0, 1) emb_sentence = self.embedding(tokens_batch) packed_input = pack_padded_sequence(emb_sentence, text_lengths) lstm_out_packed, _ = self.lstm(packed_input, hidden) # return average output embedding lstm_out, seq_lengths = pad_packed_sequence(lstm_out_packed) lstm_out = lstm_out.transpose(0, 1) sum_emb_list = [] for i, seq_out in enumerate(lstm_out): seq_len = seq_lengths[i] sum_emb = torch.sum(seq_out[:seq_len], 0) / seq_len sum_emb_list.append(sum_emb.view(1, -1)) return torch.cat(sum_emb_list)