def dump_data(self, train_dataset, test_dataset):

        landmark_distribution = [0] * 63
        theta_1_distribution = [0] * NO_BUCKETS
        theta_2_distribution = [0] * NO_BUCKETS
        r_distribution = [0] * 15

        for data_point in train_dataset:
            symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(
                data_point)
            landmark, theta_1, theta_2, r = symbolic_form
            landmark_distribution[landmark] += 1
            theta_1_distribution[theta_1] += 1
            theta_2_distribution[theta_2] += 1
            r_distribution[r] += 1
            landmark_string = nav_drone_symbolic_instructions.LANDMARK_NAMES[
                landmark]
            instruction = data_point.get_instruction()
            logging.info("Instruction %r, Symbolic form %r %r %r %r",
                         debug.instruction_to_string(instruction, self.config),
                         landmark_string, theta_1, theta_2, r)

        logging.info("Landmark Distribution %r", landmark_distribution)
        logging.info("Theta 1 Distribution %r", theta_1_distribution)
        logging.info("Theta 2 Distribution %r", theta_2_distribution)
        logging.info("R Distribution %r", r_distribution)
예제 #2
0
    def predict_angle_from_resnet(self, test_dataset, test_images):

        angle_accuracy = 0
        for data_point_ix, data_point in enumerate(test_dataset):

            gold_landmark, gold_theta_1, gold_theta_2, gold_r = \
                nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point)

            # Compute probabilities over list of visible objects
            log_prob_landmark, log_prob_distance, log_prob_theta = self.resnet_detection_model.get_probs(
                [[test_images[data_point_ix]]])

            prob_landmark = list(torch.exp(log_prob_landmark.data)[0])
            prob_distance = list(torch.exp(log_prob_distance.data)[0])
            prob_theta = list(torch.exp(log_prob_theta.data)[0])

            # Find the angle of the gold landmark and compare
            if gold_landmark < 63 and gold_theta_1 < 4.0:
                print "GOLD LANDMARK is " + str(gold_landmark)
                argmax_theta_val = gp.get_argmax_action(prob_theta[gold_landmark])
                print "ARGMAX THETA VAL " + str(argmax_theta_val) + " and " + str(gold_theta_1)
                if argmax_theta_val == gold_theta_1:
                    angle_accuracy += 1

        angle_accuracy = (angle_accuracy * 100.0)/float(len(test_dataset))
        logging.info("Angle accuracy of gold landmark is %r", angle_accuracy)
예제 #3
0
    def test1(self, test_dataset, test_real_dataset):

        num_data_points = 0
        num_used_landmarks = 0
        theta_accuracy = 0
        neighbouring_accuracy = 0
        symbolic_landmark_accuracy = 0

        for data_point_ix, data_point in enumerate(test_dataset):

            image, visible_objects = data_point

            # Compute probabilities over list of visible objects
            log_prob_theta = self.model.get_probs([[image]])
            prob_theta = list(torch.exp(log_prob_theta.data)[0])

            data_point_real = test_real_dataset[data_point_ix]
            gold_landmark, gold_theta_1, gold_theta_2, gold_r = \
                nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point_real)
            gold_theta_1_corrected = (gold_theta_1 + 6) % 12
            if gold_theta_1_corrected == gp.get_argmax_action(
                    prob_theta[gold_landmark]):
                symbolic_landmark_accuracy += 1

            for i in range(0, self.num_landmark):
                if i in visible_objects:
                    # predicted the distance and angle
                    argmax_theta_val = gp.get_argmax_action(prob_theta[i])
                    gold_angle = visible_objects[i][1]
                    if argmax_theta_val == gold_angle:
                        theta_accuracy += 1
                    angle_diff = min((argmax_theta_val - gold_angle) % 12,
                                     (gold_angle - argmax_theta_val) % 12)
                    if angle_diff <= 1:
                        neighbouring_accuracy += 1
                    num_used_landmarks += 1

            num_data_points += 1

        theta_accuracy /= float(max(num_used_landmarks, 1))
        neighbouring_accuracy /= float(max(num_used_landmarks, 1))
        symbolic_landmark_accuracy /= float(max(len(test_real_dataset), 1))

        logging.info(
            "Num datapoints %r, num visible landmarks %r and mean theta accuracy %r and neigbhouring accuracy %r",
            num_data_points, num_used_landmarks, theta_accuracy,
            neighbouring_accuracy)
        logging.info("Accuracy for landmark mentioned in the text is %r",
                     symbolic_landmark_accuracy)
        return theta_accuracy
예제 #4
0
    def do_train(self, agent, train_dataset, test_dataset, train_images, test_images, experiment_name):
        """ Perform training """

        dataset_size = len(train_dataset)
        clock = 0
        clock_max = 1

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)
            self.test_classifier(agent, test_dataset, test_images)

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix, dataset_size)

                batch_replay_items = []

                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=train_images[data_point_ix],
                                           previous_action=None,
                                           data_point=data_point)

                # Store it in the replay memory list
                symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point)
                replay_item = SymbolicTextReplayMemoryItem(state, symbolic_form)
                batch_replay_items.append(replay_item)

                # Global
                for replay_item in batch_replay_items:
                    self.global_replay_memory.append(replay_item)

                clock += 1
                if clock % clock_max == 0:
                    batch_replay_items = self.sample_from_global_memory()
                    self.global_replay_memory.clear()
                    clock = 0
                    # Perform update
                    loss_val = self.do_update(batch_replay_items)
                    self.tensorboard.log_loglikelihood_position(loss_val)

            # Save the model
            self.model.save_model(experiment_name + "/ml_learning_symbolic_text_prediction_epoch_" + str(epoch))
예제 #5
0
    def try_to_progress(self):

        # If in state (1) or (2) then return immediately
        if self.status == Client.WAITING_FOR_EXAMPLE or self.status == Client.WAITING_FOR_ACTION:
            return self.status

        assert self.status == Client.WAITING_TO_RECEIVE

        # If in state (3) then see if the message is available. If the message
        # is available then return to waiting for an action or a new example.
        if self.state is None:
            feedback = self.server.receive_reset_feedback_nonblocking()
        else:
            feedback = self.server.receive_feedback_nonblocking()

        if feedback is None:
            return self.status
        else:
            if self.state is None:
                # assert False, "state should not be none"
                # Feedback is in response to reset
                image, metadata = feedback

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                self.state = AgentObservedState(
                    instruction=self.current_data_point.instruction,
                    config=self.config,
                    constants=self.constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=self.current_data_point)

                # Waiting for action
                self.status = Client.WAITING_FOR_ACTION
            else:
                # Feedback is in response to an action
                image, reward, metadata = feedback
                self.total_reward += reward

                # Create a replay item unless it is forced
                if not self.forced_stop:
                    symbolic_text = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(
                        self.current_data_point)
                    replay_item = ReplayMemoryItem(
                        self.state,
                        self.last_expert_action,
                        reward,
                        log_prob=self.last_log_prob,
                        image_emb_seq=self.image_emb_seq,
                        factor_entropy=self.factor_entropy,
                        text_emb=self.model_state[0],
                        symbolic_text=symbolic_text)
                    self.local_batch_replay_items.append(replay_item)

                # Update the agent state
                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                self.state = self.state.update(
                    image,
                    self.last_action,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=self.current_data_point)

                if self.last_action == self.agent.action_space.get_stop_action_index(
                ):
                    # Update the scores based on meta_data
                    # self.meta_data_util.log_results(metadata)

                    self.__flush_to_global_batch()

                    if self.tensorboard is not None:
                        self.tensorboard.log_all_train_errors(
                            metadata["edit_dist_error"],
                            metadata["closest_dist_error"],
                            metadata["stop_dist_error"])
                    self.status = Client.WAITING_FOR_EXAMPLE
                else:

                    if self.num_action >= self.max_num_actions:
                        # Send forced stop action and wait to receive
                        self._take_forced_stop()
                        self.status = Client.WAITING_TO_RECEIVE
                    else:
                        # Wait to take another action
                        self.status = Client.WAITING_FOR_ACTION

            self.metadata = metadata
            return self.status
    def test_classifier(self, agent, test_dataset):

        accuracy = 0
        landmark_accuracy = 0
        theta_1_accuracy = 0
        theta_2_accuracy = 0
        theta_1_regression_accuracy = 0
        theta_2_regression_accuracy = 0
        r_accuracy = 0
        cmatrix_landmark = np.zeros((67, 67))
        cmatrix_theta1 = np.zeros((NO_BUCKETS, NO_BUCKETS))
        cmatrix_theta2 = np.zeros((NO_BUCKETS, NO_BUCKETS))
        cmatrix_range = np.zeros((15, 15))

        for data_point_ix, data_point in enumerate(test_dataset):
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       start_image=None,
                                       previous_action=None)

            prob_landmark, prob_theta_1, prob_theta_2, prob_r = self.model.get_symbolic_text_batch(
                [state])
            prob_landmark_float = list(torch.exp(prob_landmark.data)[0])
            prob_theta_1_float = list(torch.exp(prob_theta_1.data)[0])
            prob_theta_2_float = list(torch.exp(prob_theta_2.data)[0])
            prob_r_float = list(torch.exp(prob_r.data)[0])

            landmark = gp.get_argmax_action(prob_landmark_float)
            theta_1 = gp.get_argmax_action(prob_theta_1_float)
            theta_2 = gp.get_argmax_action(prob_theta_2_float)
            r = gp.get_argmax_action(prob_r_float)

            gold_landmark, gold_theta_1, gold_theta_2, gold_r = \
                nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point)

            plaintext_sentence = self.get_sentence(data_point.instruction)
            sentence_printed = False

            def direction(angle_binned):
                direction = "FAIL"
                if 42 <= angle_binned or 0 <= angle_binned < 6:
                    direction = "BEHIND OF"
                if 6 <= angle_binned < 18:
                    direction = "RIGHT OF"
                if 18 <= angle_binned < 30:
                    direction = "FRONT OF"
                if 30 <= angle_binned < 42:
                    direction = "LEFT OF"
                return direction

            if gold_landmark == landmark:
                landmark_accuracy += 1
            #else:
            #if not sentence_printed:
            #    print "SENTENCE IS: " + plaintext_sentence
            #    sentence_printed = True
            #print "INCORRECT LANDMARK: " + LANDMARK_NAMES[landmark] + " instead of " + LANDMARK_NAMES[gold_landmark]

            if gold_theta_1 == theta_1:
                theta_1_accuracy += 1

            if gold_theta_2 == theta_2:
                theta_2_accuracy += 1
            elif direction(theta_2) != direction(gold_theta_2):
                if not sentence_printed:
                    print "SENTENCE IS: " + plaintext_sentence
                    sentence_printed = True
                print "INCORRECT THETA: " + direction(theta_2) + "(" + str(
                    theta_2) + ")" + " instead of " + direction(
                        gold_theta_2) + "(" + str(gold_theta_2) + ")"

            theta_1_regression_accuracy += min(
                (gold_theta_1 - theta_1) % NO_BUCKETS,
                NO_BUCKETS - (gold_theta_1 - theta_1) % NO_BUCKETS)
            theta_2_regression_accuracy += min(
                (gold_theta_2 - theta_2) % NO_BUCKETS,
                NO_BUCKETS - (gold_theta_2 - theta_2) % NO_BUCKETS)

            if gold_r == r:
                r_accuracy += 1

            if gold_landmark == landmark and gold_theta_1 == theta_1 and gold_theta_2 == theta_2 and gold_r == r:
                accuracy += 1

            # update confusion matrix
            cmatrix_landmark[gold_landmark][landmark] += 1
            cmatrix_theta1[gold_theta_1][theta_1] += 1
            cmatrix_theta2[gold_theta_2][theta_2] += 1
            cmatrix_range[gold_r][r] += 1

        dataset_size = len(test_dataset)
        landmark_accuracy = (landmark_accuracy * 100) / float(
            max(1, dataset_size))
        theta_1_accuracy = (theta_1_accuracy * 100) / float(
            max(1, dataset_size))
        theta_2_accuracy = (theta_2_accuracy * 100) / float(
            max(1, dataset_size))
        theta_1_regression_accuracy = (BUCKET_WIDTH *
                                       theta_1_regression_accuracy) / float(
                                           max(1, dataset_size))
        theta_2_regression_accuracy = (BUCKET_WIDTH *
                                       theta_2_regression_accuracy) / float(
                                           max(1, dataset_size))
        r_accuracy = (r_accuracy * 100) / float(max(1, dataset_size))
        accuracy = (accuracy * 100) / float(max(1, dataset_size))

        logging.info(
            "Test accuracy on dataset of size %r is landmark %r, theta1 %r %r angle, theta2 %r %r angle, "
            "r %r, total acc %r", dataset_size, landmark_accuracy,
            theta_1_accuracy, theta_1_regression_accuracy, theta_2_accuracy,
            theta_2_regression_accuracy, r_accuracy, accuracy)
예제 #7
0
    def test_classifier(self, agent, test_dataset, test_images):

        accuracy = 0
        landmark_accuracy = 0
        landmark_bucket_accuracy = 0
        theta_1_accuracy = 0
        theta_2_accuracy = 0
        theta_1_regression_accuracy = 0
        theta_2_regression_accuracy = 0
        r_accuracy = 0
        cmatrix_landmark = np.zeros((67, 67))
        cmatrix_theta1 = np.zeros((self.num_buckets, self.num_buckets))
        cmatrix_theta2 = np.zeros((self.num_buckets, self.num_buckets))
        cmatrix_range = np.zeros((15, 15))

        for data_point_ix, data_point in enumerate(test_dataset):
            state = AgentObservedState(instruction=data_point.instruction,
                                       config=self.config,
                                       constants=self.constants,
                                       data_point=data_point,
                                       start_image=test_images[data_point_ix],
                                       previous_action=None)

            prob_landmark, prob_theta_1, prob_theta_2, prob_r = self.model.get_symbolic_text_batch([state])
            prob_landmark_float = list(torch.exp(prob_landmark.data)[0])
            prob_theta_1_float = list(torch.exp(prob_theta_1.data)[0])
            prob_theta_2_float = list(torch.exp(prob_theta_2.data)[0])
            prob_r_float = list(torch.exp(prob_r.data)[0])

            ###################################################
            # Heuristic code for finding argmax over landmark but only from visible set
            landmark_pos_dict = state.get_landmark_pos_dict()
            visible_objects = self.get_existing_landmarks(landmark_pos_dict)

            max_score = 0
            max_scoring_visible_object = -1
            for i in visible_objects:
                if prob_landmark_float[i] > max_score:
                    max_score = prob_landmark_float[i]
                    max_scoring_visible_object = i
            landmark = max_scoring_visible_object
            assert landmark != -1
            ###################################################

            # landmark = gp.get_argmax_action(prob_landmark_float)
            theta_1 = gp.get_argmax_action(prob_theta_1_float)
            theta_2 = gp.get_argmax_action(prob_theta_2_float)
            r = gp.get_argmax_action(prob_r_float)

            gold_landmark, gold_theta_1, gold_theta_2, gold_r = \
                nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(data_point)

            plaintext_sentence = self.get_sentence(data_point.instruction)

            x_pos, z_pos, y_angle = data_point.get_start_pos()
            landmark_r_theta_dict = self.get_all_landmark_r_theta(x_pos, z_pos, y_angle, landmark_pos_dict)
            if landmark_r_theta_dict[LANDMARK_NAMES[landmark]][1] == landmark_r_theta_dict[LANDMARK_NAMES[gold_landmark]][1]:
                landmark_bucket_accuracy += 1

            if gold_landmark == landmark:
                landmark_accuracy += 1
            else:
                logging.info("Sentence is %r, predicts landmark %r instead of %r",
                             plaintext_sentence,
                             LANDMARK_NAMES[landmark],
                             LANDMARK_NAMES[gold_landmark])

            if gold_theta_1 == theta_1:
                theta_1_accuracy += 1

            if gold_theta_2 == theta_2:
                theta_2_accuracy += 1

            theta_1_regression_accuracy += min((gold_theta_1 - theta_1) % self.num_buckets,
                                               self.num_buckets - (gold_theta_1 - theta_1) % self.num_buckets)
            theta_2_regression_accuracy += min((gold_theta_2 - theta_2) % self.num_buckets,
                                               self.num_buckets - (gold_theta_2 - theta_2) % self.num_buckets)

            if gold_r == r:
                r_accuracy += 1

            if gold_landmark == landmark and gold_theta_1 == theta_1 and gold_theta_2 == theta_2 and gold_r == r:
                accuracy += 1

            # update confusion matrix
            cmatrix_landmark[gold_landmark][landmark] += 1
            cmatrix_theta1[gold_theta_1][theta_1] += 1
            cmatrix_theta2[gold_theta_2][theta_2] += 1
            cmatrix_range[gold_r][r] += 1

        dataset_size = len(test_dataset)
        landmark_accuracy = (landmark_accuracy * 100) / float(max(1, dataset_size))
        landmark_bucket_accuracy = (landmark_bucket_accuracy * 100) / float(max(1, dataset_size))
        theta_1_accuracy = (theta_1_accuracy * 100) / float(max(1, dataset_size))
        theta_2_accuracy = (theta_2_accuracy * 100) / float(max(1, dataset_size))
        theta_1_regression_accuracy = (self.discretize * theta_1_regression_accuracy) / float(max(1, dataset_size))
        theta_2_regression_accuracy = (self.discretize * theta_2_regression_accuracy) / float(max(1, dataset_size))
        r_accuracy = (r_accuracy * 100) / float(max(1, dataset_size))
        accuracy = (accuracy * 100) / float(max(1, dataset_size))

        logging.info(
            "Test accuracy on dataset of size %r is %r percentage", accuracy)
        logging.info("Landmark accuracy is %r, landmark bucket accuracy is %r",
                     landmark_accuracy, landmark_bucket_accuracy)
        logging.info("Theta 1 accuracy is %r and regression accuracy is %r degree",
                     theta_1_accuracy, theta_1_regression_accuracy)
        logging.info("Theta 2 accuracy is %r and regression accuracy is %r degree",
                     theta_2_accuracy, theta_2_regression_accuracy)
        logging.info("Distance accuracy is %r", r_accuracy)
예제 #8
0
    def do_train(self, train_dataset, train_images, tune_dataset, tune_images,
                 experiment_name):
        """ Perform training """

        dataset_size = len(train_dataset)

        for epoch in range(1, self.max_epoch + 1):

            logging.info("Starting epoch %d", epoch)

            # Test on tuning data
            self.calc_log_prob(tune_dataset,
                               tune_images,
                               tensorboard=self.tensorboard)

            batch_replay_items = []
            episodes_in_batch = 0

            for data_point_ix, data_point in enumerate(train_dataset):

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)

                train_images_example = train_images[data_point_ix]
                image = train_images_example[0]
                symbolic_form = nav_drone_symbolic_instructions.get_nav_drone_symbolic_instruction_segment(
                    data_point)

                model_state = None
                state = AgentObservedState(instruction=data_point.instruction,
                                           config=self.config,
                                           constants=self.constants,
                                           start_image=image,
                                           previous_action=None,
                                           pose=None,
                                           position_orientation=None,
                                           data_point=data_point)

                trajectory = data_point.get_trajectory()
                for action_ix, action in enumerate(trajectory):

                    # Sample action using the policy
                    # Generate probabilities over actions
                    log_probabilities, model_state, image_emb_seq = self.model.get_probs(
                        state, model_state)

                    # Send the action and get feedback
                    image = train_images_example[action_ix + 1]

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   0,
                                                   log_prob=log_probabilities,
                                                   symbolic_text=symbolic_form,
                                                   image_emb_seq=image_emb_seq,
                                                   text_emb=model_state[0])
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    state = state.update(image,
                                         action,
                                         pose=None,
                                         position_orientation=None,
                                         data_point=data_point)

                log_probabilities, model_state, image_emb_seq = self.model.get_probs(
                    state, model_state)

                # Store it in the replay memory list
                replay_item = ReplayMemoryItem(
                    state,
                    self.action_space.get_stop_action_index(),
                    0,
                    log_prob=log_probabilities,
                    symbolic_text=symbolic_form,
                    image_emb_seq=image_emb_seq,
                    text_emb=model_state[0])
                batch_replay_items.append(replay_item)

                # Perform update
                episodes_in_batch += 1
                if episodes_in_batch == 1:
                    episodes_in_batch = 0
                    loss_val = self.do_update(batch_replay_items)
                    del batch_replay_items[:]  # in place list clear
                    self.tensorboard.log_scalar("loss", loss_val)
                    cross_entropy = float(self.cross_entropy.data[0])
                    self.tensorboard.log_scalar("cross_entropy", cross_entropy)
                    entropy = float(self.entropy.data[0])
                    self.tensorboard.log_scalar("entropy", entropy)
                    if self.action_prediction_loss is not None:
                        action_prediction_loss = float(
                            self.action_prediction_loss.data[0])
                        self.tensorboard.log_action_prediction_loss(
                            action_prediction_loss)
                    if self.temporal_autoencoder_loss is not None:
                        temporal_autoencoder_loss = float(
                            self.temporal_autoencoder_loss.data[0])
                        self.tensorboard.log_temporal_autoencoder_loss(
                            temporal_autoencoder_loss)
                    if self.object_detection_loss is not None:
                        object_detection_loss = float(
                            self.object_detection_loss.data[0])
                        self.tensorboard.log_object_detection_loss(
                            object_detection_loss)
                    if self.symbolic_language_prediction_loss is not None:
                        symbolic_language_prediction_loss = float(
                            self.symbolic_language_prediction_loss.data[0])
                        self.tensorboard.log_scalar(
                            "sym_language_prediction_loss",
                            symbolic_language_prediction_loss)
                    if self.mean_factor_entropy is not None:
                        mean_factor_entropy = float(
                            self.mean_factor_entropy.data[0])
                        self.tensorboard.log_factor_entropy_loss(
                            mean_factor_entropy)

            # Save the model
            self.model.save_model(experiment_name +
                                  "/contextual_bandit_resnet_epoch_" +
                                  str(epoch))