def dump_data(self, train_dataset, test_dataset): landmark_distribution = [0] * 63 theta_1_distribution = [0] * NO_BUCKETS theta_2_distribution = [0] * NO_BUCKETS r_distribution = [0] * 15 for data_point in train_dataset: symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment( data_point) landmark, theta_1, theta_2, r = symbolic_form landmark_distribution[landmark] += 1 theta_1_distribution[theta_1] += 1 theta_2_distribution[theta_2] += 1 r_distribution[r] += 1 landmark_string = nav_drone_symbolic_instructions.LANDMARK_NAMES[ landmark] instruction = data_point.get_instruction() logging.info("Instruction %r, Symbolic form %r %r %r %r", debug.instruction_to_string(instruction, self.config), landmark_string, theta_1, theta_2, r) logging.info("Landmark Distribution %r", landmark_distribution) logging.info("Theta 1 Distribution %r", theta_1_distribution) logging.info("Theta 2 Distribution %r", theta_2_distribution) logging.info("R Distribution %r", r_distribution)
def predict_angle_from_resnet(self, test_dataset, test_images): angle_accuracy = 0 for data_point_ix, data_point in enumerate(test_dataset): gold_landmark, gold_theta_1, gold_theta_2, gold_r = \ nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point) # Compute probabilities over list of visible objects log_prob_landmark, log_prob_distance, log_prob_theta = self.resnet_detection_model.get_probs( [[test_images[data_point_ix]]]) prob_landmark = list(torch.exp(log_prob_landmark.data)[0]) prob_distance = list(torch.exp(log_prob_distance.data)[0]) prob_theta = list(torch.exp(log_prob_theta.data)[0]) # Find the angle of the gold landmark and compare if gold_landmark < 63 and gold_theta_1 < 4.0: print "GOLD LANDMARK is " + str(gold_landmark) argmax_theta_val = gp.get_argmax_action(prob_theta[gold_landmark]) print "ARGMAX THETA VAL " + str(argmax_theta_val) + " and " + str(gold_theta_1) if argmax_theta_val == gold_theta_1: angle_accuracy += 1 angle_accuracy = (angle_accuracy * 100.0)/float(len(test_dataset)) logging.info("Angle accuracy of gold landmark is %r", angle_accuracy)
def test1(self, test_dataset, test_real_dataset): num_data_points = 0 num_used_landmarks = 0 theta_accuracy = 0 neighbouring_accuracy = 0 symbolic_landmark_accuracy = 0 for data_point_ix, data_point in enumerate(test_dataset): image, visible_objects = data_point # Compute probabilities over list of visible objects log_prob_theta = self.model.get_probs([[image]]) prob_theta = list(torch.exp(log_prob_theta.data)[0]) data_point_real = test_real_dataset[data_point_ix] gold_landmark, gold_theta_1, gold_theta_2, gold_r = \ nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point_real) gold_theta_1_corrected = (gold_theta_1 + 6) % 12 if gold_theta_1_corrected == gp.get_argmax_action( prob_theta[gold_landmark]): symbolic_landmark_accuracy += 1 for i in range(0, self.num_landmark): if i in visible_objects: # predicted the distance and angle argmax_theta_val = gp.get_argmax_action(prob_theta[i]) gold_angle = visible_objects[i][1] if argmax_theta_val == gold_angle: theta_accuracy += 1 angle_diff = min((argmax_theta_val - gold_angle) % 12, (gold_angle - argmax_theta_val) % 12) if angle_diff <= 1: neighbouring_accuracy += 1 num_used_landmarks += 1 num_data_points += 1 theta_accuracy /= float(max(num_used_landmarks, 1)) neighbouring_accuracy /= float(max(num_used_landmarks, 1)) symbolic_landmark_accuracy /= float(max(len(test_real_dataset), 1)) logging.info( "Num datapoints %r, num visible landmarks %r and mean theta accuracy %r and neigbhouring accuracy %r", num_data_points, num_used_landmarks, theta_accuracy, neighbouring_accuracy) logging.info("Accuracy for landmark mentioned in the text is %r", symbolic_landmark_accuracy) return theta_accuracy
def do_train(self, agent, train_dataset, test_dataset, train_images, test_images, experiment_name): """ Perform training """ dataset_size = len(train_dataset) clock = 0 clock_max = 1 for epoch in range(1, self.max_epoch + 1): logging.info("Starting epoch %d", epoch) self.test_classifier(agent, test_dataset, test_images) for data_point_ix, data_point in enumerate(train_dataset): if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) batch_replay_items = [] state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=train_images[data_point_ix], previous_action=None, data_point=data_point) # Store it in the replay memory list symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point) replay_item = SymbolicTextReplayMemoryItem(state, symbolic_form) batch_replay_items.append(replay_item) # Global for replay_item in batch_replay_items: self.global_replay_memory.append(replay_item) clock += 1 if clock % clock_max == 0: batch_replay_items = self.sample_from_global_memory() self.global_replay_memory.clear() clock = 0 # Perform update loss_val = self.do_update(batch_replay_items) self.tensorboard.log_loglikelihood_position(loss_val) # Save the model self.model.save_model(experiment_name + "/ml_learning_symbolic_text_prediction_epoch_" + str(epoch))
def try_to_progress(self): # If in state (1) or (2) then return immediately if self.status == Client.WAITING_FOR_EXAMPLE or self.status == Client.WAITING_FOR_ACTION: return self.status assert self.status == Client.WAITING_TO_RECEIVE # If in state (3) then see if the message is available. If the message # is available then return to waiting for an action or a new example. if self.state is None: feedback = self.server.receive_reset_feedback_nonblocking() else: feedback = self.server.receive_feedback_nonblocking() if feedback is None: return self.status else: if self.state is None: # assert False, "state should not be none" # Feedback is in response to reset image, metadata = feedback pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) self.state = AgentObservedState( instruction=self.current_data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=self.current_data_point) # Waiting for action self.status = Client.WAITING_FOR_ACTION else: # Feedback is in response to an action image, reward, metadata = feedback self.total_reward += reward # Create a replay item unless it is forced if not self.forced_stop: symbolic_text = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment( self.current_data_point) replay_item = ReplayMemoryItem( self.state, self.last_expert_action, reward, log_prob=self.last_log_prob, image_emb_seq=self.image_emb_seq, factor_entropy=self.factor_entropy, text_emb=self.model_state[0], symbolic_text=symbolic_text) self.local_batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) self.state = self.state.update( image, self.last_action, pose=pose, position_orientation=position_orientation, data_point=self.current_data_point) if self.last_action == self.agent.action_space.get_stop_action_index( ): # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) self.__flush_to_global_batch() if self.tensorboard is not None: self.tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) self.status = Client.WAITING_FOR_EXAMPLE else: if self.num_action >= self.max_num_actions: # Send forced stop action and wait to receive self._take_forced_stop() self.status = Client.WAITING_TO_RECEIVE else: # Wait to take another action self.status = Client.WAITING_FOR_ACTION self.metadata = metadata return self.status
def test_classifier(self, agent, test_dataset): accuracy = 0 landmark_accuracy = 0 theta_1_accuracy = 0 theta_2_accuracy = 0 theta_1_regression_accuracy = 0 theta_2_regression_accuracy = 0 r_accuracy = 0 cmatrix_landmark = np.zeros((67, 67)) cmatrix_theta1 = np.zeros((NO_BUCKETS, NO_BUCKETS)) cmatrix_theta2 = np.zeros((NO_BUCKETS, NO_BUCKETS)) cmatrix_range = np.zeros((15, 15)) for data_point_ix, data_point in enumerate(test_dataset): state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=None, previous_action=None) prob_landmark, prob_theta_1, prob_theta_2, prob_r = self.model.get_symbolic_text_batch( [state]) prob_landmark_float = list(torch.exp(prob_landmark.data)[0]) prob_theta_1_float = list(torch.exp(prob_theta_1.data)[0]) prob_theta_2_float = list(torch.exp(prob_theta_2.data)[0]) prob_r_float = list(torch.exp(prob_r.data)[0]) landmark = gp.get_argmax_action(prob_landmark_float) theta_1 = gp.get_argmax_action(prob_theta_1_float) theta_2 = gp.get_argmax_action(prob_theta_2_float) r = gp.get_argmax_action(prob_r_float) gold_landmark, gold_theta_1, gold_theta_2, gold_r = \ nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point) plaintext_sentence = self.get_sentence(data_point.instruction) sentence_printed = False def direction(angle_binned): direction = "FAIL" if 42 <= angle_binned or 0 <= angle_binned < 6: direction = "BEHIND OF" if 6 <= angle_binned < 18: direction = "RIGHT OF" if 18 <= angle_binned < 30: direction = "FRONT OF" if 30 <= angle_binned < 42: direction = "LEFT OF" return direction if gold_landmark == landmark: landmark_accuracy += 1 #else: #if not sentence_printed: # print "SENTENCE IS: " + plaintext_sentence # sentence_printed = True #print "INCORRECT LANDMARK: " + LANDMARK_NAMES[landmark] + " instead of " + LANDMARK_NAMES[gold_landmark] if gold_theta_1 == theta_1: theta_1_accuracy += 1 if gold_theta_2 == theta_2: theta_2_accuracy += 1 elif direction(theta_2) != direction(gold_theta_2): if not sentence_printed: print "SENTENCE IS: " + plaintext_sentence sentence_printed = True print "INCORRECT THETA: " + direction(theta_2) + "(" + str( theta_2) + ")" + " instead of " + direction( gold_theta_2) + "(" + str(gold_theta_2) + ")" theta_1_regression_accuracy += min( (gold_theta_1 - theta_1) % NO_BUCKETS, NO_BUCKETS - (gold_theta_1 - theta_1) % NO_BUCKETS) theta_2_regression_accuracy += min( (gold_theta_2 - theta_2) % NO_BUCKETS, NO_BUCKETS - (gold_theta_2 - theta_2) % NO_BUCKETS) if gold_r == r: r_accuracy += 1 if gold_landmark == landmark and gold_theta_1 == theta_1 and gold_theta_2 == theta_2 and gold_r == r: accuracy += 1 # update confusion matrix cmatrix_landmark[gold_landmark][landmark] += 1 cmatrix_theta1[gold_theta_1][theta_1] += 1 cmatrix_theta2[gold_theta_2][theta_2] += 1 cmatrix_range[gold_r][r] += 1 dataset_size = len(test_dataset) landmark_accuracy = (landmark_accuracy * 100) / float( max(1, dataset_size)) theta_1_accuracy = (theta_1_accuracy * 100) / float( max(1, dataset_size)) theta_2_accuracy = (theta_2_accuracy * 100) / float( max(1, dataset_size)) theta_1_regression_accuracy = (BUCKET_WIDTH * theta_1_regression_accuracy) / float( max(1, dataset_size)) theta_2_regression_accuracy = (BUCKET_WIDTH * theta_2_regression_accuracy) / float( max(1, dataset_size)) r_accuracy = (r_accuracy * 100) / float(max(1, dataset_size)) accuracy = (accuracy * 100) / float(max(1, dataset_size)) logging.info( "Test accuracy on dataset of size %r is landmark %r, theta1 %r %r angle, theta2 %r %r angle, " "r %r, total acc %r", dataset_size, landmark_accuracy, theta_1_accuracy, theta_1_regression_accuracy, theta_2_accuracy, theta_2_regression_accuracy, r_accuracy, accuracy)
def test_classifier(self, agent, test_dataset, test_images): accuracy = 0 landmark_accuracy = 0 landmark_bucket_accuracy = 0 theta_1_accuracy = 0 theta_2_accuracy = 0 theta_1_regression_accuracy = 0 theta_2_regression_accuracy = 0 r_accuracy = 0 cmatrix_landmark = np.zeros((67, 67)) cmatrix_theta1 = np.zeros((self.num_buckets, self.num_buckets)) cmatrix_theta2 = np.zeros((self.num_buckets, self.num_buckets)) cmatrix_range = np.zeros((15, 15)) for data_point_ix, data_point in enumerate(test_dataset): state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, data_point=data_point, start_image=test_images[data_point_ix], previous_action=None) prob_landmark, prob_theta_1, prob_theta_2, prob_r = self.model.get_symbolic_text_batch([state]) prob_landmark_float = list(torch.exp(prob_landmark.data)[0]) prob_theta_1_float = list(torch.exp(prob_theta_1.data)[0]) prob_theta_2_float = list(torch.exp(prob_theta_2.data)[0]) prob_r_float = list(torch.exp(prob_r.data)[0]) ################################################### # Heuristic code for finding argmax over landmark but only from visible set landmark_pos_dict = state.get_landmark_pos_dict() visible_objects = self.get_existing_landmarks(landmark_pos_dict) max_score = 0 max_scoring_visible_object = -1 for i in visible_objects: if prob_landmark_float[i] > max_score: max_score = prob_landmark_float[i] max_scoring_visible_object = i landmark = max_scoring_visible_object assert landmark != -1 ################################################### # landmark = gp.get_argmax_action(prob_landmark_float) theta_1 = gp.get_argmax_action(prob_theta_1_float) theta_2 = gp.get_argmax_action(prob_theta_2_float) r = gp.get_argmax_action(prob_r_float) gold_landmark, gold_theta_1, gold_theta_2, gold_r = \ nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point) plaintext_sentence = self.get_sentence(data_point.instruction) x_pos, z_pos, y_angle = data_point.get_start_pos() landmark_r_theta_dict = self.get_all_landmark_r_theta(x_pos, z_pos, y_angle, landmark_pos_dict) if landmark_r_theta_dict[LANDMARK_NAMES[landmark]][1] == landmark_r_theta_dict[LANDMARK_NAMES[gold_landmark]][1]: landmark_bucket_accuracy += 1 if gold_landmark == landmark: landmark_accuracy += 1 else: logging.info("Sentence is %r, predicts landmark %r instead of %r", plaintext_sentence, LANDMARK_NAMES[landmark], LANDMARK_NAMES[gold_landmark]) if gold_theta_1 == theta_1: theta_1_accuracy += 1 if gold_theta_2 == theta_2: theta_2_accuracy += 1 theta_1_regression_accuracy += min((gold_theta_1 - theta_1) % self.num_buckets, self.num_buckets - (gold_theta_1 - theta_1) % self.num_buckets) theta_2_regression_accuracy += min((gold_theta_2 - theta_2) % self.num_buckets, self.num_buckets - (gold_theta_2 - theta_2) % self.num_buckets) if gold_r == r: r_accuracy += 1 if gold_landmark == landmark and gold_theta_1 == theta_1 and gold_theta_2 == theta_2 and gold_r == r: accuracy += 1 # update confusion matrix cmatrix_landmark[gold_landmark][landmark] += 1 cmatrix_theta1[gold_theta_1][theta_1] += 1 cmatrix_theta2[gold_theta_2][theta_2] += 1 cmatrix_range[gold_r][r] += 1 dataset_size = len(test_dataset) landmark_accuracy = (landmark_accuracy * 100) / float(max(1, dataset_size)) landmark_bucket_accuracy = (landmark_bucket_accuracy * 100) / float(max(1, dataset_size)) theta_1_accuracy = (theta_1_accuracy * 100) / float(max(1, dataset_size)) theta_2_accuracy = (theta_2_accuracy * 100) / float(max(1, dataset_size)) theta_1_regression_accuracy = (self.discretize * theta_1_regression_accuracy) / float(max(1, dataset_size)) theta_2_regression_accuracy = (self.discretize * theta_2_regression_accuracy) / float(max(1, dataset_size)) r_accuracy = (r_accuracy * 100) / float(max(1, dataset_size)) accuracy = (accuracy * 100) / float(max(1, dataset_size)) logging.info( "Test accuracy on dataset of size %r is %r percentage", accuracy) logging.info("Landmark accuracy is %r, landmark bucket accuracy is %r", landmark_accuracy, landmark_bucket_accuracy) logging.info("Theta 1 accuracy is %r and regression accuracy is %r degree", theta_1_accuracy, theta_1_regression_accuracy) logging.info("Theta 2 accuracy is %r and regression accuracy is %r degree", theta_2_accuracy, theta_2_regression_accuracy) logging.info("Distance accuracy is %r", r_accuracy)
def do_train(self, train_dataset, train_images, tune_dataset, tune_images, experiment_name): """ Perform training """ dataset_size = len(train_dataset) for epoch in range(1, self.max_epoch + 1): logging.info("Starting epoch %d", epoch) # Test on tuning data self.calc_log_prob(tune_dataset, tune_images, tensorboard=self.tensorboard) batch_replay_items = [] episodes_in_batch = 0 for data_point_ix, data_point in enumerate(train_dataset): if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) train_images_example = train_images[data_point_ix] image = train_images_example[0] symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment( data_point) model_state = None state = AgentObservedState(instruction=data_point.instruction, config=self.config, constants=self.constants, start_image=image, previous_action=None, pose=None, position_orientation=None, data_point=data_point) trajectory = data_point.get_trajectory() for action_ix, action in enumerate(trajectory): # Sample action using the policy # Generate probabilities over actions log_probabilities, model_state, image_emb_seq = self.model.get_probs( state, model_state) # Send the action and get feedback image = train_images_example[action_ix + 1] # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, 0, log_prob=log_probabilities, symbolic_text=symbolic_form, image_emb_seq=image_emb_seq, text_emb=model_state[0]) batch_replay_items.append(replay_item) # Update the agent state state = state.update(image, action, pose=None, position_orientation=None, data_point=data_point) log_probabilities, model_state, image_emb_seq = self.model.get_probs( state, model_state) # Store it in the replay memory list replay_item = ReplayMemoryItem( state, self.action_space.get_stop_action_index(), 0, log_prob=log_probabilities, symbolic_text=symbolic_form, image_emb_seq=image_emb_seq, text_emb=model_state[0]) batch_replay_items.append(replay_item) # Perform update episodes_in_batch += 1 if episodes_in_batch == 1: episodes_in_batch = 0 loss_val = self.do_update(batch_replay_items) del batch_replay_items[:] # in place list clear self.tensorboard.log_scalar("loss", loss_val) cross_entropy = float(self.cross_entropy.data[0]) self.tensorboard.log_scalar("cross_entropy", cross_entropy) entropy = float(self.entropy.data[0]) self.tensorboard.log_scalar("entropy", entropy) if self.action_prediction_loss is not None: action_prediction_loss = float( self.action_prediction_loss.data[0]) self.tensorboard.log_action_prediction_loss( action_prediction_loss) if self.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( self.temporal_autoencoder_loss.data[0]) self.tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if self.object_detection_loss is not None: object_detection_loss = float( self.object_detection_loss.data[0]) self.tensorboard.log_object_detection_loss( object_detection_loss) if self.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( self.symbolic_language_prediction_loss.data[0]) self.tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if self.mean_factor_entropy is not None: mean_factor_entropy = float( self.mean_factor_entropy.data[0]) self.tensorboard.log_factor_entropy_loss( mean_factor_entropy) # Save the model self.model.save_model(experiment_name + "/contextual_bandit_resnet_epoch_" + str(epoch))