def get_3d_location(self, exploration_image, data_point, panaroma=True):

        state = AgentObservedState(
            instruction=data_point.instruction,
            config=self.config,
            constants=self.constants,
            start_image=exploration_image,
            previous_action=None,
            pose=None,
            position_orientation=data_point.get_start_pos(),
            data_point=data_point)

        volatile = self.predictor_model.get_attention_prob(state,
                                                           model_state=None)
        inferred_ix = int(
            torch.max(volatile["attention_logits"],
                      0)[1].data.cpu().numpy()[0])
        # Max pointed about that when inferred ix above is the last value then calculations are buggy. He is right.

        predicted_row = int(inferred_ix / float(192))
        predicted_col = inferred_ix % 192
        screen_pos = (predicted_row, predicted_col)

        if panaroma:
            # Index of the 6 image where the goal is
            region_index = int(predicted_col / 32)
            predicted_col = predicted_col % 32  # Column within that image where the goal is
            pos = data_point.get_start_pos()
            new_pos_angle = GoalPredictionSingle360ImageSupervisedLearningFromDisk.\
                get_new_pos_angle_from_region_index(region_index, pos)
            metadata = {
                "x_pos": pos[0],
                "z_pos": pos[1],
                "y_angle": new_pos_angle
            }
        else:
            pos = data_point.get_start_pos()
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

        row, col = predicted_row + 0.5, predicted_col + 0.5

        start_pos = current_pos_from_metadata(metadata)
        start_pose = current_pose_from_metadata(metadata)

        goal_pos = data_point.get_destination_list()[-1]
        height_drone = 2.5
        x_gen, z_gen = get_inverse_object_position(
            row, col, height_drone, 30, 32, 32,
            (start_pos[0], start_pos[1], start_pose))
        predicted_goal_pos = (x_gen, z_gen)
        x_goal, z_goal = goal_pos

        x_diff = x_gen - x_goal
        z_diff = z_gen - z_goal

        dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)

        return predicted_goal_pos, dist, screen_pos, volatile[
            "attention_probs"]
    def parse(folder_name, dataset, model):

        start = time.time()

        num_channel, height, width = model.image_module.get_final_dimension()

        # Read images
        image_dataset = []
        num_examples = len(os.listdir(folder_name))
        for i in range(0, num_examples):
            example_folder_name = folder_name + "/example_" + str(i)
            img = scipy.misc.imread(example_folder_name +
                                    "/image_0.png").swapaxes(1,
                                                             2).swapaxes(0, 1)
            images = [img]
            image_dataset.append(images)

        assert len(image_dataset) == len(dataset)

        # Read the goal state. The data for the single image can be
        # directly computed and does not need to be saved.
        goal_dataset = []
        for i in range(0, num_examples):
            data_point = dataset[i]
            pos = data_point.get_start_pos()
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

            goal_location = [
                GoalPrediction.get_goal_location(metadata, data_point, height,
                                                 width)
            ]
            _, _, row, col = goal_location[0]

            start_pos = current_pos_from_metadata(metadata)
            start_pose = current_pose_from_metadata(metadata)

            if row is not None and col is not None:
                goal_pos = data_point.get_destination_list()[-1]
                x_goal, z_goal = goal_pos
                height_drone = 2.5
                x_gen, z_gen = get_inverse_object_position(
                    row, col, height_drone, 30, height, width,
                    (start_pos[0], start_pos[1], start_pose))
                x_diff = x_gen - x_goal
                z_diff = z_gen - z_goal
                dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)
                assert dist < 0.5, "forward computation of goal should match inverse computation"

            goal_dataset.append(goal_location)

        end = time.time()
        logging.info("Parsed dataset of size %r in time % seconds",
                     len(image_dataset), (end - start))
        return image_dataset, goal_dataset
    def compute_distance_in_real_world(self, inferred_ix, row_col, data_point, panaroma=True):

        if row_col is None:
            predicted_row = int(inferred_ix / float(self.final_width))
            predicted_col = inferred_ix % self.final_width
        else:
            predicted_row, predicted_col = row_col

        if panaroma:
            region_index = int(predicted_col / 32)
            predicted_col = predicted_col % 32
            pos = data_point.get_start_pos()
            new_pos_angle = GoalPredictionSingle360ImageSupervisedLearningFromDisk.\
                get_new_pos_angle_from_region_index(region_index, pos)
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": new_pos_angle}
        else:
            pos = data_point.get_start_pos()
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

        if row_col is None:
            row, col = predicted_row + 0.5, predicted_col + 0.5
        else:
            row, col = predicted_row, predicted_col

        start_pos = current_pos_from_metadata(metadata)
        start_pose = current_pose_from_metadata(metadata)

        goal_pos = data_point.get_destination_list()[-1]
        height_drone = 2.5
        x_gen, z_gen = get_inverse_object_position(row, col, height_drone, 30, 32, 32,
                                                   (start_pos[0], start_pos[1], start_pose))
        x_goal, z_goal = goal_pos

        x_diff = x_gen - x_goal
        z_diff = z_gen - z_goal

        dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)
        return (x_gen, z_gen), dist
    def compute_distance_in_real_world(self, inferred_ix, data_point):

        predicted_row = int(inferred_ix / float(self.final_width))
        predicted_col = inferred_ix % self.final_width

        row, col = predicted_row + 0.5, predicted_col + 0.5

        pos = data_point.get_start_pos()
        metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}
        start_pos = current_pos_from_metadata(metadata)
        start_pose = current_pose_from_metadata(metadata)

        goal_pos = data_point.get_destination_list()[-1]
        height_drone = 2.5
        x_gen, z_gen = get_inverse_object_position(
            row, col, height_drone, 30, self.final_height, self.final_width,
            (start_pos[0], start_pos[1], start_pose))
        x_goal, z_goal = goal_pos

        x_diff = x_gen - x_goal
        z_diff = z_gen - z_goal

        dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)
        return dist
    def get_3d_location_for_paragraphs(self,
                                       exploration_image,
                                       instruction,
                                       start_pos,
                                       goal_pos,
                                       panaroma=True):

        state = AgentObservedState(instruction=instruction,
                                   config=self.config,
                                   constants=self.constants,
                                   start_image=exploration_image,
                                   previous_action=None,
                                   pose=None,
                                   position_orientation=start_pos,
                                   data_point=None)

        volatile = self.predictor_model.get_attention_prob(state,
                                                           model_state=None)
        inferred_ix = int(
            torch.max(volatile["attention_logits"],
                      0)[1].data.cpu().numpy()[0])

        ########################################
        # inst_string = instruction_to_string(instruction, self.config)
        # self.save_attention_prob(exploration_image, volatile["attention_probs"][:-1].view(32, 192), inst_string)
        ########################################

        predicted_row = int(inferred_ix / float(192))
        predicted_col = inferred_ix % 192

        if panaroma:
            # Index of the 6 image where the goal is
            region_index = int(predicted_col / 32)
            predicted_col = predicted_col % 32  # Column within that image where the goal is
            pos = start_pos
            new_pos_angle = GoalPredictionSingle360ImageSupervisedLearningFromDisk.\
                get_new_pos_angle_from_region_index(region_index, pos)
            metadata = {
                "x_pos": pos[0],
                "z_pos": pos[1],
                "y_angle": new_pos_angle
            }
        else:
            pos = start_pos
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

        row, col = predicted_row + 0.5, predicted_col + 0.5

        start_pos = current_pos_from_metadata(metadata)
        start_pose = current_pose_from_metadata(metadata)

        height_drone = 2.5
        x_gen, z_gen = get_inverse_object_position(
            row, col, height_drone, 30, 32, 32,
            (start_pos[0], start_pos[1], start_pose))
        predicted_goal_pos = (x_gen, z_gen)
        x_goal, z_goal = goal_pos

        x_diff = x_gen - x_goal
        z_diff = z_gen - z_goal

        dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)

        return predicted_goal_pos, dist
    def parse(folder_name, dataset):

        start = time.time()

        image_dataset = []
        num_examples = len(os.listdir(folder_name))
        for i in range(0, num_examples):
            example_folder_name = folder_name + "/example_" + str(i)
            image_names = [
                file for file in os.listdir(example_folder_name)
                if file.endswith('.png')
            ]
            num_actions = len(image_names)
            images = []
            for j in range(0, num_actions):
                img = scipy.misc.imread(example_folder_name +
                                        "/image_" + str(j) + ".png").swapaxes(
                                            1, 2).swapaxes(0, 1)
                images.append(img)
            image_dataset.append(images)

        # goal_dataset = []
        # num_examples = len(os.listdir(folder_name))
        # for i in range(0, num_examples):
        #     example_folder_name = folder_name + "/example_" + str(i)
        #     lines = open(example_folder_name + "/goal.txt").readlines()
        #     goals = []
        #     for line in lines:
        #         words = line.strip().split()
        #         assert len(words) == 4
        #         if words[0] == "None" or words[1] == "None":
        #             row, col, row_real, col_real = None, None, None, None
        #         else:
        #             row, col, row_real, col_real = int(words[0]), int(words[1]), float(words[2]), float(words[3])
        #             assert 0 <= row < 8 and 0 <= col < 8
        #         goals.append((row, col, row_real, col_real))
        #     goal_dataset.append(goals)
        #
        # assert len(image_dataset) == len(goal_dataset)
        assert len(image_dataset) == len(dataset)

        ####################################
        #  Hack for synythetic data
        goal_dataset = []
        for i in range(0, num_examples):
            data_point = dataset[i]
            pos = data_point.get_start_pos()
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

            goal_location = [
                GoalPrediction.get_goal_location(metadata, data_point, 32, 32)
            ]
            _, _, row, col = goal_location[0]

            start_pos = current_pos_from_metadata(metadata)
            start_pose = current_pose_from_metadata(metadata)

            if row is not None and col is not None:
                goal_pos = data_point.get_destination_list()[-1]
                height_drone = 2.5
                x_gen, y_gen = get_inverse_object_position(
                    row, col, height_drone, 30, 32, 32,
                    (start_pos[0], start_pos[1], start_pose))

            goal_dataset.append(goal_location)
            data_point.trajectory = [0]
            # if len(image_dataset[i]) >= 2:
            #     data_point.trajectory = [0]  # dummy action added
        #####################################

        end = time.time()
        logging.info("Parsed dataset of size %r in time % seconds",
                     len(image_dataset), (end - start))
        return image_dataset, goal_dataset
    def parse_oracle_turn(folder_name, dataset, model):

        start = time.time()

        num_channel, height, width = model.image_module.get_final_dimension()

        # Read images
        image_dataset = []
        num_examples = len(os.listdir(folder_name))
        for i in range(0, num_examples):
            data_point = dataset[i]

            ################################################
            pos = data_point.get_start_pos()
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

            turn_angle = get_turn_angle_from_metadata_datapoint(metadata, data_point)

            assert 180.0 >= turn_angle >= -180.0

            if 30.0 >= turn_angle > -30.0:
                ix = 0
                mean_turn_angle = 0
            elif 90.0 >= turn_angle > 30.0:
                ix = 1
                mean_turn_angle = 60
            elif 150.0 >= turn_angle > 90.0:
                ix = 2
                mean_turn_angle = 120
            elif -30 >= turn_angle > -90.0:
                ix = 5
                mean_turn_angle = -60
            elif -90.0 >= turn_angle > -150.0:
                ix = 4
                mean_turn_angle = -120
            else:
                ix = 3
                mean_turn_angle = 180

            print("Pose is %r, Turn Angle is %r and Mean Turn Angle is %r " % (pos[2], turn_angle, mean_turn_angle))
            new_pos_angle = pos[2] + mean_turn_angle

            while new_pos_angle < -180:
                new_pos_angle += 360.0
            while new_pos_angle > 180:
                new_pos_angle -= 360.0

            # Modify the pos to turn towards the image
            new_pos = (pos[0], pos[1], new_pos_angle)
            data_point.start_pos = new_pos

            pos = data_point.get_start_pos()
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}
            new_turn_angle = get_turn_angle_from_metadata_datapoint(metadata, data_point)
            assert 30.0 >= new_turn_angle >= -30.0, "Found turn angle of " + str(new_turn_angle)
            ################################################

            example_folder_name = folder_name + "/example_" + str(i)
            img = scipy.misc.imread(example_folder_name + "/image_" + str(ix) + ".png").swapaxes(1, 2).swapaxes(0, 1)
            images = [img]
            image_dataset.append(images)

        assert len(image_dataset) == len(dataset)

        # Read the goal state. The data for the single image can be
        # directly computed and does not need to be saved.
        goal_dataset = []
        for i in range(0, num_examples):
            data_point = dataset[i]

            pos = data_point.get_start_pos()
            metadata = {"x_pos": pos[0], "z_pos": pos[1], "y_angle": pos[2]}

            goal_location = [GoalPrediction.get_goal_location(metadata, data_point, height, width)]
            _, _, row, col = goal_location[0]

            start_pos = current_pos_from_metadata(metadata)
            start_pose = current_pose_from_metadata(metadata)

            if row is not None and col is not None:
                goal_pos = data_point.get_destination_list()[-1]
                x_goal, z_goal = goal_pos
                height_drone = 2.5
                x_gen, z_gen = get_inverse_object_position(row, col, height_drone, 30, height, width,
                                                           (start_pos[0], start_pos[1], start_pose))
                x_diff = x_gen - x_goal
                z_diff = z_gen - z_goal
                dist = math.sqrt(x_diff * x_diff + z_diff * z_diff)
                assert dist < 0.5, "forward computation of goal should match inverse computation"
            else:
                print("Warning: None found! ")

            goal_dataset.append(goal_location)

        end = time.time()
        logging.info("Parsed dataset of size %r in time % seconds", len(image_dataset), (end - start))
        return image_dataset, goal_dataset