def dump_data(self, train_dataset, test_dataset): landmark_distribution = [0] * 63 theta_1_distribution = [0] * NO_BUCKETS theta_2_distribution = [0] * NO_BUCKETS r_distribution = [0] * 15 for data_point in train_dataset: symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment( data_point) landmark, theta_1, theta_2, r = symbolic_form landmark_distribution[landmark] += 1 theta_1_distribution[theta_1] += 1 theta_2_distribution[theta_2] += 1 r_distribution[r] += 1 landmark_string = nav_drone_symbolic_instructions.LANDMARK_NAMES[ landmark] instruction = data_point.get_instruction() logging.info("Instruction %r, Symbolic form %r %r %r %r", debug.instruction_to_string(instruction, self.config), landmark_string, theta_1, theta_2, r) logging.info("Landmark Distribution %r", landmark_distribution) logging.info("Theta 1 Distribution %r", theta_1_distribution) logging.info("Theta 2 Distribution %r", theta_2_distribution) logging.info("R Distribution %r", r_distribution)
def get_attention_prob(self, agent_observed_state, model_state, mode=None, volatile=False): assert isinstance(agent_observed_state, AgentObservedState) agent_observed_state_list = [agent_observed_state] image_seqs = [[aos.get_last_image()] for aos in agent_observed_state_list] image_batch = cuda_var( torch.from_numpy(np.array(image_seqs)).float(), volatile) instructions = [ aos.get_instruction() for aos in agent_observed_state_list ] instructions_batch = cuda_var( torch.from_numpy(np.array(instructions)).long()) time = agent_observed_state.time_step time = cuda_var(torch.from_numpy(np.array([time])).long()) instruction_string = instruction_to_string( agent_observed_state.instruction, self.config) state_feature = self.final_module.get_attention_prob( image_batch, instructions_batch, instruction_string, agent_observed_state.goal) return state_feature
def reset(self, data_point, action_space, config): assert isinstance(data_point, NavDroneDataPoint) assert isinstance(action_space, ActionSpace) self.move_list = [] with self.shared_data_lock: self.shared_data["scene_name"] = data_point.get_scene_name() end_x, end_z = data_point.get_destination_list()[-1] self.shared_data["end_x"], self.shared_data["end_z"] = end_x, end_z self.shared_data["dest_list"] = data_point.get_destination_list() gold_moves = [] for seg in data_point.get_sub_trajectory_list(): moves = [action_space.get_action_name(a) for a in seg] gold_moves.extend(moves) gold_moves.append(STOP) self.shared_data["trajectory"] = [SERVER_MOVE_RESPONSES.index(m) for m in gold_moves] instruction_segments = data_point.get_instruction_oracle_segmented() instruction_string = "" for i, instruction_seg in enumerate(instruction_segments): if i % 2 == 0: color = "yellow" else: color = "magenta" instruction_string += "<color=%s>" % color instruction_string += instruction_to_string(instruction_seg, config) instruction_string += "</color> " self.scene_config_queue.put(data_point.get_scene_config()) self.path_queue.put(data_point.get_scene_path()) self.instructions_queue.put(instruction_string.strip()) self.start_pos_queue.put(data_point.get_start_pos()) self.next_dest_queue.put(data_point.get_destination_list()) self.move_queue_full = True
def show_instruction(self, data_point, show_discourse=True): if show_discourse: paragraph_instruction = data_point.get_paragraph_instruction() start_index, end_index = data_point.get_instruction_indices() previous_instruction_string = instruction_to_string( paragraph_instruction[:start_index], self.config) instruction_string = instruction_to_string( paragraph_instruction[start_index:end_index], self.config) future_instruction_string = instruction_to_string( paragraph_instruction[end_index:], self.config) return previous_instruction_string + " \n /** " + instruction_string + " **/\n " + future_instruction_string else: instruction_string = instruction_to_string( data_point.get_instruction(), self.config) return instruction_string
def tag_dataset(dataset, config): noun_set = dict([]) for data_point in dataset: instruction = instruction_to_string(data_point.get_instruction(), config) token_seq = nltk.tokenize.word_tokenize(instruction) tagger = nltk.pos_tag(token_seq) for tag in tagger: if tag[1] == "NN" or tag[1] == "NNP": noun = tag[0].lower() if noun in noun_set: noun_set[noun] += 1 else: noun_set[noun] = 1 sorted_nouns = sorted(noun_set.items(), key=lambda x: -x[1]) print "Noun set is " + str(sorted_nouns)
def get_unet_output(self, agent_observed_state, model_state, mode=None, volatile=False): assert isinstance(agent_observed_state, AgentObservedState) agent_observed_state_list = [agent_observed_state] image_seqs = [[aos.get_last_image()] for aos in agent_observed_state_list] image_batch = cuda_var( torch.from_numpy(np.array(image_seqs)).float(), volatile) instructions = [ aos.get_instruction() for aos in agent_observed_state_list ] instructions_batch = cuda_var( torch.from_numpy(np.array(instructions)).long()) time = agent_observed_state.time_step time = cuda_var(torch.from_numpy(np.array([time])).long()) instruction_string = instruction_to_string( agent_observed_state.instruction, self.config) # Embed the text _, text_emb_raw = self.text_module(instructions_batch) # Embed the image image_emb_seq = self.image_module(image_batch) image_embedding = image_emb_seq[:, 0, :, :, :] # 1 x num_channels x height x width unet_output = self.final_module(image_embedding, text_emb_raw) return unet_output
def _test(self, data_point_ix, data_point, test_image, tensorboard=None, debug=False): image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # Test image high_quality_test_image_example = self.get_exploration_image() print("Image shape is ", high_quality_test_image_example.shape) test_image_example = test_image[data_point_ix][0] # Predict the goal predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location( test_image_example, data_point, panaroma=True) current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) print("Predicted Error ", predictor_error) num_actions = 0 actions = [] info = dict() # Dictionary to contain key results info["instruction_string"] = instruction_to_string( data_point.instruction, self.config) info["datapoint_id"] = data_point.get_scene_name() info["stop_dist_error"] = metadata["stop_dist_error"] info["closest_dist_error"] = metadata["closest_dist_error"] info["edit_dist_error"] = metadata["edit_dist_error"] info["num_actions_taken"] = num_actions info["predicted_goal"] = predicted_goal info["predicted_error"] = predictor_error info["gold_goal"] = data_point.get_destination_list()[-1] info["final_location"] = (metadata["x_pos"], metadata["z_pos"]) info["predicted_screen_pixels"] = predicted_pixel self.save_attention_prob(high_quality_test_image_example, attention_prob, info["instruction_string"], info["datapoint_id"]) # self.save_example(image, info["instruction_string"], info["datapoint_id"], scale=5) self.server.halt_and_receive_feedback() return metadata, actions, predictor_error, info
def do_train(self, agent, train_dataset, tune_dataset, experiment_name): """ Perform training """ assert isinstance( agent, ReadPointerAgent ), "This learning algorithm works only with READPointerAgent" dataset_size = len(train_dataset) for epoch in range(1, self.max_epoch + 1): logging.info("Starting epoch %d", epoch) action_counts = dict() action_counts[ReadPointerAgent.READ_MODE] = [0] * 2 action_counts[ReadPointerAgent. ACT_MODE] = [0] * self.action_space.num_actions() # Test on tuning data agent.test(tune_dataset, tensorboard=self.tensorboard) batch_replay_items = [] total_reward = 0 episodes_in_batch = 0 for data_point_ix, data_point in enumerate(train_dataset): if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) logging.info("Training data action counts %r", action_counts) num_actions = 0 max_num_actions = len(data_point.get_trajectory()) max_num_actions += self.constants["max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None) mode = ReadPointerAgent.READ_MODE last_action_was_halt = False instruction = instruction_to_string( data_point.get_instruction(), self.config) print "TRAIN INSTRUCTION: %r" % instruction print "" while True: # Sample action using the policy # Generate probabilities over actions probabilities = list( torch.exp(self.model.get_probs(state, mode).data)) # Use test policy to get the action action = gp.sample_action_from_prob(probabilities) action_counts[mode][action] += 1 if mode == ReadPointerAgent.READ_MODE: # read mode boundary conditions forced_action = False if not state.are_tokens_left_to_be_read(): # force halt action = 1 forced_action = True elif num_actions >= max_num_actions or last_action_was_halt: # force read action = 0 forced_action = True if not forced_action: # Store reward in the replay memory list reward = self._calc_reward_read_mode(state, action) replay_item = ReplayMemoryItem(state, action, reward, mode=mode) batch_replay_items.append(replay_item) if action == 0: last_action_was_halt = False state = state.update_on_read() elif action == 1: last_action_was_halt = True mode = ReadPointerAgent.ACT_MODE else: raise AssertionError( "Read mode only supports two actions: read(0) and halt(1). " + "Found " + str(action)) elif mode == ReadPointerAgent.ACT_MODE: # deal with act mode boundary conditions if num_actions >= max_num_actions: forced_stop = True break elif action == agent.action_space.get_stop_action_index( ): if state.are_tokens_left_to_be_read(): reward = self._calc_reward_act_halt(state) # Add to replay memory replay_item = ReplayMemoryItem( state, agent.action_space.get_stop_action_index(), reward, mode) batch_replay_items.append(replay_item) mode = ReadPointerAgent.READ_MODE last_action_was_halt = True state = state.update_on_act_halt() else: forced_stop = False break else: image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, mode=mode) batch_replay_items.append(replay_item) # Update the agent state state = state.update(image, action) num_actions += 1 total_reward += reward last_action_was_halt = False else: raise AssertionError( "Mode should be either read or act. Unhandled mode: " + str(mode)) assert mode == ReadPointerAgent.ACT_MODE, "Agent should end on Act Mode" # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, agent.action_space.get_stop_action_index(), reward, mode) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update episodes_in_batch += 1 if episodes_in_batch == 1: loss_val = self.do_update(batch_replay_items) batch_replay_items = [] entropy_val = float(self.entropy.data[0]) self.tensorboard.log(entropy_val, loss_val, total_reward) total_reward = 0 episodes_in_batch = 0 self.tensorboard.log_train_error(metadata["error"]) # Save the model self.model.save_model( experiment_name + "/read_pointer_contextual_bandit_resnet_epoch_" + str(epoch)) logging.info("Training data action counts %r", action_counts)
def test_goal_prediction(self, test_dataset, tensorboard=None, logger=None, pushover_logger=None): self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 sum_loss, count, sum_prob, goal_prob_count = 0, 0, 0, 0 metadata = {"feedback": ""} for data_point_ix, data_point in enumerate(test_dataset): print("Datapoint index ", data_point_ix) image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) ################################## state.goal = GoalPrediction.get_goal_location( metadata, data_point, 8, 8) print("Instruction is ", instruction_to_string(data_point.instruction, self.config)) ################################## # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices() num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None trajectory = data_point.get_trajectory()[0:1] trajectory_len = len(trajectory) while True: if num_actions == trajectory_len: action = self.action_space.get_stop_action_index() else: action = trajectory[num_actions] # Generate probabilities over actions if isinstance(self.model, AbstractModel): raise NotImplementedError() elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, volatile = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] # Compute goal prediction accuracy goal_loss, prob, _ = self.goal_prediction_accuracy( state.goal, volatile) sum_loss += goal_loss count += 1 if prob is not None: sum_prob += prob goal_prob_count += 1 else: raise NotImplementedError() # log_probabilities, model_state = self.model.get_probs(state, model_state) # probabilities = list(torch.exp(log_probabilities.data)) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_all_test_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) if metadata["stop_dist_error"] < 5.0: task_completion_accuracy += 1 # Update the scores based on meta_data self.meta_data_util.log_results(metadata) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) ################################## state.goal = GoalPrediction.get_goal_location( metadata, data_point, 8, 8) ################################## num_actions += 1 print("Finished testing. Now logging.") task_completion_accuracy = (task_completion_accuracy * 100.0) / float( max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log( "Testing: Task completion accuracy is: %r" % task_completion_accuracy, logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.log( "Goal Count %r, Mean Goal Loss %r" % (count, sum_loss / float(count)), logger) self.log( "Goal Prob Count %r, Mean Goal Prob %r" % (goal_prob_count, sum_prob / float(goal_prob_count)), logger) self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str( metadata["feedback"] ) + " --- " + "task_completion_accuracy=%r" % task_completion_accuracy pushover_logger.log(pushover_feedback)
def test_auto_segmented(self, test_dataset, tensorboard=None, segmenting_type="auto"): assert segmenting_type in ("auto", "oracle") self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() metadata = "" for data_point in test_dataset: if segmenting_type == "auto": segmented_instruction = data_point.get_instruction_auto_segmented( ) else: segmented_instruction = data_point.get_instruction_oracle_segmented( ) num_segments = len(segmented_instruction) gold_num_actions = len(data_point.get_trajectory()) horizon = gold_num_actions // num_segments horizon += self.constants["max_extra_horizon_auto_segmented"] image, metadata = self.server.reset_receive_feedback(data_point) instruction = instruction_to_string(data_point.get_instruction(), self.config) print("TEST INSTRUCTION: %r" % instruction) print("") for instruction_i, instruction in enumerate(segmented_instruction): state = AgentObservedState( instruction=instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction) num_actions = 0 # self._save_agent_state(state, num_actions) while True: # Generate probabilities over actions probabilities = list( torch.exp(self.model.get_probs(state).data)) # print "test probs:", probabilities # Use test policy to get the action action = self.test_policy(probabilities) action_counts[action] += 1 # logging.info("Taking action-num=%d horizon=%d action=%s from %s", # num_actions, max_num_actions, str(action), str(probabilities)) if action == self.action_space.get_stop_action_index( ) or num_actions >= horizon: break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state state = state.update(image, action) num_actions += 1 _, _, metadata = self.server.halt_and_receive_feedback() if tensorboard is not None: tensorboard.log_test_error(metadata["error"]) self.meta_data_util.log_results(metadata) logging.info("Testing data action counts %r", action_counts)
def _test(self, data_point_ix, data_point, test_image, tensorboard=None, debug=False): image, metadata = self.server.reset_receive_feedback(data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # Test image if test_image is None: test_image_example = self.get_exploration_image() else: test_image_example = test_image[data_point_ix][0] # Predict the goal predicted_goal, predictor_error, predicted_pixel, attention_prob = self.get_3d_location( test_image_example, data_point, panaroma=True) current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) print("Predicted Error ", predictor_error) num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None actions = [] info = dict() while True: # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) actions.append(action) if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_all_test_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Update the scores based on meta_data self.meta_data_util.log_results(metadata) if debug: # Dictionary to contain key results info["instruction_string"] = instruction_to_string( data_point.instruction, self.config) info["datapoint_id"] = data_point.get_scene_name() info["stop_dist_error"] = metadata["stop_dist_error"] info["closest_dist_error"] = metadata["closest_dist_error"] info["edit_dist_error"] = metadata["edit_dist_error"] info["num_actions_taken"] = num_actions info["predicted_goal"] = predicted_goal info["predicted_error"] = predictor_error info["gold_goal"] = data_point.get_destination_list()[-1] info["final_location"] = (metadata["x_pos"], metadata["z_pos"]) info["predicted_screen_pixels"] = predicted_pixel self.save_attention_prob(test_image_example, attention_prob, info["instruction_string"], info["datapoint_id"]) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update(image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) # Set the goal based on the current position and angle current_bot_location = metadata["x_pos"], metadata["z_pos"] current_bot_pose = metadata["y_angle"] state.goal = PredictorPlannerAgent.get_goal_location( current_bot_location, current_bot_pose, predicted_goal, 32, 32) num_actions += 1 # logging.info("Error, Start-Distance, Turn-Angle, %r %r %r", metadata["stop_dist_error"], distance, angle) return metadata, actions, predictor_error, info
def test(self, test_dataset, tensorboard=None, logger=None, pushover_logger=None): self.server.clear_metadata() action_counts = [0] * self.action_space.num_actions() task_completion_accuracy = 0 print("Reached Test") test_dataset_size = len(test_dataset) metadata = {"feedback": ""} data_point = random.sample(test_dataset, 1)[0] while True: print("Please enter an instruction. For sample see:") # data_point = random.sample(test_dataset, 1)[0] image, metadata = self.server.reset_receive_feedback(data_point) print( "Sample instruction: ", instruction_to_string(data_point.get_instruction(), self.config)) input_instruction = input( "Enter an instruction or enter q to quit ") if input_instruction == "q" or input_instruction == "quit": break input_instruction_ids = self.convert_to_id(input_instruction) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=input_instruction_ids, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point, prev_instruction=data_point.get_prev_instruction(), next_instruction=data_point.get_next_instruction()) # state.start_read_pointer, state.end_read_pointer = data_point.get_instruction_indices() num_actions = 0 max_num_actions = self.constants["horizon"] model_state = None # print "Model state is new " while True: time.sleep(0.3) # Generate probabilities over actions if isinstance(self.model, AbstractModel): probabilities = list( torch.exp(self.model.get_probs(state).data)) elif isinstance(self.model, AbstractIncrementalModel): log_probabilities, model_state, _, _ = self.model.get_probs( state, model_state, volatile=True) probabilities = list(torch.exp(log_probabilities.data))[0] else: # print "Num action is " + str(num_actions) + " and max is " + str(max_num_actions) log_probabilities, model_state = self.model.get_probs( state, model_state) probabilities = list(torch.exp(log_probabilities.data)) # raise AssertionError("Unhandled Model type.") # Use test policy to get the action action = self.test_policy(probabilities) # DONT FORGET TO REMOVE # action = np.random.randint(0, 2) action_counts[action] += 1 if action == self.action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = self.server.halt_and_receive_feedback( ) if tensorboard is not None: tensorboard.log_all_test_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) if metadata["stop_dist_error"] < 5.0: task_completion_accuracy += 1 # Update the scores based on meta_data self.meta_data_util.log_results(metadata) break else: # Send the action and get feedback image, reward, metadata = self.server.send_action_receive_feedback( action) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) num_actions += 1 print("Finished testing. Now logging.") task_completion_accuracy = (task_completion_accuracy * 100.0) / float( max(len(test_dataset), 1)) self.log("Overall test results:", logger) self.log( "Testing: Task completion accuracy is: %r" % task_completion_accuracy, logger) self.log("Testing: Final Metadata: %r" % metadata, logger) self.log("Testing: Action Distribution: %r" % action_counts, logger) self.meta_data_util.log_results(metadata, logger) self.log("Testing data action counts %r" % action_counts, logger) if pushover_logger is not None: pushover_feedback = str( metadata["feedback"] ) + " --- " + "task_completion_accuracy=%r" % task_completion_accuracy pushover_logger.log(pushover_feedback)
def test_classifier(self, agent, test_dataset): fp, fn, tp, tn = 0, 0, 0, 0 fn_examples = [] fp_examples = [] perfect_segmented_examples = [] for data_point_ix, data_point in enumerate(test_dataset): state = AgentObservedState( instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=None, # image, previous_action=None) segments = data_point.get_instruction_oracle_segmented() segment_lens = [len(s) for s in segments] num_mistakes = 0 for i, seg_len in enumerate(segment_lens): segment_instruction = debug.instruction_to_string( segments[i], self.config) num_read = 0 while num_read < seg_len: state = state.update_on_read() num_read += 1 candidate_instruction = debug.instruction_to_string( segments[i][:num_read], self.config) model_log_probs = list( self.model.get_segmentation_probs([state ]).view(-1).data) pred_action = gp.get_argmax_action(model_log_probs) if num_read < seg_len and pred_action == 0: tn += 1 elif num_read < seg_len and pred_action == 1: fp += 1 num_mistakes += 1 fp_examples.append( (candidate_instruction, segment_instruction)) elif num_read == seg_len and pred_action == 0: fn += 1 num_mistakes += 1 fn_examples.append( (candidate_instruction, segment_instruction)) elif num_read == seg_len and pred_action == 1: tp += 1 state = state.update_on_act_halt() if num_mistakes == 0: instruction_strings = [] for seg in segments: instruction_strings.append( debug.instruction_to_string(seg, self.config)) perfect_segmented_examples.append( " ----- ".join(instruction_strings)) # calculate precision if fp + tp > 0: precision = (tp * 1.0) / (fp + tp) else: precision = 1.0 # calculate recall if fn + tp > 0: recall = (tp * 1.0) / (fn + tp) else: recall = 1.0 if precision + recall > 0: f1 = (2.0 * precision * recall) / (precision + recall) else: f1 = 0.0 # print FP examples random.shuffle(fp_examples) logging.info("FP EXAMPLES:") for ex in fp_examples[:20]: logging.info(ex) # print FN examples random.shuffle(fn_examples) logging.info("FN EXAMPLES:") for ex in fn_examples[:20]: logging.info(ex) # print perfect segmented examples random.shuffle(perfect_segmented_examples) logging.info("PERFECT SEGMENTED EXAMPLES:") for ex in perfect_segmented_examples[:20]: logging.info(ex) logging.info("testing results: precision=%.2f; recall=%f; f1=%.2f" % (precision, recall, f1))
def interactive_shell(self, train_dataset, train_images): traj_len = len(train_dataset) keep = False image_id = 1 while True: # Sample a random dataset if not keep: ix = random.randint(0, traj_len - 1) data_point = train_dataset[ix] image = train_images[ix][0] # Show the image in pyplot plt.imshow(image.swapaxes(0, 1).swapaxes(1, 2)) plt.ion() plt.show() # Get the instruction print("Enter the instruction below (q or quit to quit)\n") print("Sample instruction is ", instruction_to_string(data_point.instruction, self.config)) while True: instruction = input() if instruction == "q" or instruction == "quit": break elif len(instruction) == 0: print("Enter a non-empty instruction (q or quit to quit)") else: break instruction_id = self.convert_to_id(instruction) state = AgentObservedState(instruction=instruction_id, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=None, position_orientation=None, data_point=data_point) # Show the attention mask _, _, _, \ volatile = self.model.get_attention_prob(state, model_state=None) attention_prob = volatile["attention_probs"][:-1].view( self.final_height, self.final_width) attention_prob = attention_prob.cpu().data.numpy() resized_kernel = scipy.misc.imresize( attention_prob, (self.config["image_height"], self.config["image_width"])) plt.clf() plt.title(instruction) plt.imshow(image.swapaxes(0, 1).swapaxes(1, 2)) plt.imshow(resized_kernel, cmap="jet", alpha=0.5) print( "Enter s to save, k to keep working on this environment, sk to do both. Other key to simply continue" ) key_ = input() if key_ == "s": plt.savefig("interactive_image_" + str(image_id) + ".png") image_id += 1 if key_ == "k": keep = True else: keep = False if key_ == "sk": plt.savefig("image_" + str(image_id) + ".png") image_id += 1 keep = True plt.clf()
def test(self, tune_dataset, tensorboard): total_validation_loss = 0 total_validation_prob = 0 total_validation_exact_accuracy = 0 total_goal_distance = 0 num_items = 0 # Next metric measures when the goal is visible and prediction is within 10\% radius total_epsilon_accuracy = 0 num_visible_items = 0 # Next metric measures distance in real world and only when goal is visible total_real_world_distance = 0 for data_point_ix, data_point in enumerate(tune_dataset): model_state = None state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=data_point.start_image, previous_action=None, pose=None, position_orientation=None, data_point=data_point) num_items_ = 0 sum_loss = 0 sum_prob = 0 sum_acc = 0 sum_dist = 0 sum_real_world_distance = 0 row, col = data_point.goal_pixel goal = row, col, row, col state.goal = goal volatile = self.model.get_attention_prob(state, model_state) if not self.ignore_none or row is not None: gold_ix = row * self.final_width + col loss, prob, meta = GoalPrediction.get_loss_and_prob( volatile, goal, self.final_height, self.final_width) num_items_ += 1 sum_loss = sum_loss + float(loss.data.cpu().numpy()[0]) sum_prob = sum_prob + float(prob.data.cpu().numpy()[0]) inferred_ix, row_col = self.get_inferred_value(volatile) if gold_ix == inferred_ix: sum_acc = sum_acc + 1.0 if row is not None and col is not None: sum_dist = sum_dist + abs(row - int(round(inferred_ix/self.final_width)))\ + abs(col - int(inferred_ix % self.final_height)) num_visible_items += 1 if self.is_close_enough(inferred_ix, row, col): total_epsilon_accuracy += 1 real_world_distance = self.compute_distance_in_real_world( inferred_ix, data_point) sum_real_world_distance += real_world_distance # Save the map instruction_string = instruction_to_string( data_point.instruction, self.config) # goal_x, goal_y = data_point.goal_location # goal_x, goal_y = round(goal_x, 2), round(goal_y, 2) # predicted_goal_x, predicted_goal_y = predicted_goal # predicted_goal_x, predicted_goal_y = round(predicted_goal_x, 2), round(predicted_goal_y, 2) # instruction_string = instruction_string + \ # "\n (Error: " + str(round(sum_real_world_distance, 2)) + ")" + \ # "\n %r %r %r %r \n" % (goal_x, goal_y, predicted_goal_x, predicted_goal_y) # self.show_image(data_point.get_destination_list()[-1], predicted_goal, data_point.get_start_pos(), # instruction_string) # Save the generated image self.global_id += 1 if self.global_id % 25 == 0: goal_prob = GoalPrediction.generate_gold_prob( goal, 32, 32) predicted_goal = (int(inferred_ix / 32), inferred_ix % 32, int(inferred_ix / 32), inferred_ix % 32) predicted_goal_prob = GoalPrediction.generate_gold_prob( predicted_goal, 32, 32) self.save_attention_prob( data_point.start_image, volatile["attention_probs"][:-1].view(32, 32), data_point.instruction_string, goal_prob[:-1].view(32, 32)) self.save_attention_prob( data_point.start_image, predicted_goal_prob[:-1].view(32, 32), data_point.instruction_string, goal_prob[:-1].view(32, 32)) total_validation_loss += sum_loss total_validation_prob += sum_prob total_goal_distance += sum_dist total_validation_exact_accuracy += sum_acc total_real_world_distance += sum_real_world_distance num_items += num_items_ mean_total_goal_distance = total_goal_distance / float( max(num_items, 1)) mean_total_validation_loss = total_validation_loss / float( max(num_items, 1)) mean_total_validation_prob = total_validation_prob / float( max(num_items, 1)) mean_total_validation_accuracy = (total_validation_exact_accuracy * 100.0) / float(max(num_items, 1)) mean_total_epsilon_accuracy = (total_epsilon_accuracy * 100.0) / float( max(num_visible_items, 1)) mean_real_world_distance = total_real_world_distance / float( max(num_visible_items, 1)) logging.info( "Mean Test result: L1 Distance is %r, Loss %r, Prob %r, Acc is %r, Epsilon Accuracy is %r" % (mean_total_goal_distance, mean_total_validation_loss, mean_total_validation_prob, mean_total_validation_accuracy, mean_total_epsilon_accuracy)) logging.info( "Num visible items %r, Num Exact Match items is %r, Num epsilon match %r, Num Items is %r " % (num_visible_items, total_validation_exact_accuracy, total_epsilon_accuracy, num_items)) logging.info("Num visible items %r, Mean Real World Distance %r " % (num_visible_items, mean_real_world_distance)) return mean_real_world_distance
def do_train(self, agent, train_dataset, tune_dataset, experiment_name): """ Perform training """ for epoch in range(1, self.max_epoch + 1): # Test on tuning data agent.test(tune_dataset, tensorboard=self.tensorboard) for data_point in train_dataset: batch_replay_items = [] num_actions = 0 total_reward = 0 max_num_actions = len(data_point.get_trajectory()) max_num_actions += self.constants["max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback(data_point) state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None) forced_stop = True instruction = instruction_to_string( data_point.get_instruction(), self.config) print "TRAIN INSTRUCTION: %r" % instruction print "" while num_actions < max_num_actions: # Sample action using the policy # Generate probabilities over actions probabilities = list(torch.exp(self.model.get_probs(state).data)) # Use test policy to get the action action = gp.sample_action_from_prob(probabilities) if action == agent.action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback(action) total_reward += reward # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward) batch_replay_items.append(replay_item) # Update the agent state state = state.update(image, action) num_actions += 1 # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback() total_reward += reward # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem(state, agent.action_space.get_stop_action_index(), reward) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Compute Q-values using sampled rollout ReinforceLearning._set_q_val(batch_replay_items) # Perform update loss_val = self.do_update(batch_replay_items) entropy_val = float(self.entropy.data[0]) self.tensorboard.log(entropy_val, loss_val, total_reward) self.tensorboard.log_train_error(metadata["error"]) # Save the model self.model.save_model(experiment_name + "/reinforce_epoch_" + str(epoch))