Exemplo n.º 1
0
    def get_affine_i(self, map_poses, cam_poses, i):
        # Convert the pose from airsim coordinates to the image pixel coordinages
        # If the pose is None, use the canonical pose (global frame)
        self.prof.tick("call")
        if map_poses is not None and map_poses[i] is not None:
            map_pose_i = map_poses[i].numpy()
            map_pose_img = poses_m_to_px(
                map_pose_i, self.source_map_size_px,
                [self.world_in_map_size_px, self.world_in_map_size_px],
                self.world_size_m)
        else:
            map_pose_img = self.canonical_pose_src

        if cam_poses is not None and cam_poses[i] is not None:
            cam_pose_i = cam_poses[i].numpy()
            cam_pose_img = poses_m_to_px(
                cam_pose_i, self.dest_map_size_px,
                [self.world_in_map_size_px, self.world_in_map_size_px],
                self.world_size_m)
        else:
            cam_pose_img = self.canonical_pose_dst

        self.prof.tick("convert_pose")

        # Get the affine transformation matrix to transform the map to the new camera pose
        affine_i = self.get_old_to_new_pose_mat(map_pose_img, cam_pose_img)

        self.prof.tick("calc_affine")
        return affine_i
Exemplo n.º 2
0
    def get_affine_matrices(self, map_poses, cam_poses, batch_size):
        # Convert the pose from airsim coordinates to the image pixel coordinages
        # If the pose is None, use the canonical pose (global frame)
        if map_poses is not None:
            map_poses = map_poses.numpy(
            )  # TODO: Check if we're gonna have a list here or something
            # TODO: This is the big bottleneck. Could we precompute it in the dataloader?
            map_poses_img = poses_m_to_px(
                map_poses,
                self.source_map_size_px,
                [self.world_in_map_size_px, self.world_in_map_size_px],
                self.world_size_m,
                batch_dim=True)
        else:
            map_poses_img = self.canonical_pose_src.repeat_np(batch_size)

        if cam_poses is not None:
            cam_poses = cam_poses.numpy()
            cam_poses_img = poses_m_to_px(
                cam_poses,
                self.dest_map_size_px,
                [self.world_in_map_size_px, self.world_in_map_size_px],
                self.world_size_m,
                batch_dim=True)
        else:
            cam_poses_img = self.canonical_pose_dst.repeat_np(batch_size)

        # Get the affine transformation matrix to transform the map to the new camera pose
        affines = self.get_old_to_new_pose_matrices(map_poses_img,
                                                    cam_poses_img)

        return affines
Exemplo n.º 3
0
    def forward(self, maps, map_pose, cam_pose):
        """
        Affine transform the map from being centered around map_pose in the canonocial map frame to
        being centered around cam_pose in the canonical map frame.
        Canonical map frame is the one where the map origin aligns with the environment origin, but the env may
        or may not take up the entire map.
        :param map: map centered around the drone in map_pose
        :param map_pose: the previous drone pose in canonical map frame
        :param cam_pose: the new drone pose in canonical map frame
        :return:
        """

        # TODO: Handle the case where cam_pose is None and return a map in the canonical frame
        self.prof.tick("out")
        batch_size = maps.size(0)
        affine_matrices = torch.zeros([batch_size, 3, 3]).to(maps.device)

        self.prof.tick("init")
        for i in range(batch_size):

            # Convert the pose from airsim coordinates to the image pixel coordinages
            # If the pose is None, use the canonical pose (global frame)
            if map_pose is not None and map_pose[i] is not None:
                map_pose_i = map_pose[i].numpy()
                map_pose_img = poses_m_to_px(
                    map_pose_i, self.map_size,
                    [self.world_size_px, self.world_size_px],
                    self.world_size_m)
            else:
                map_pose_img = self.get_canonical_frame_pose()

            if cam_pose is not None and cam_pose[i] is not None:
                cam_pose_i = cam_pose[i].numpy()
                cam_pose_img = poses_m_to_px(
                    cam_pose_i, self.map_size,
                    [self.world_size_px, self.world_size_px],
                    self.world_size_m)
            else:
                cam_pose_img = self.get_canonical_frame_pose()

            self.prof.tick("pose")

            # Get the affine transformation matrix to transform the map to the new camera pose
            affine_i = self.get_old_to_new_pose_mat(map_pose_img, cam_pose_img)
            affine_matrices[i] = affine_i
            self.prof.tick("affine")

        # TODO: Do the same with OpenCV and compare results for testing

        # Apply the affine transformation on the map
        maps_out = self.affine_2d(maps, affine_matrices)

        self.prof.tick("affine_sample")
        self.prof.loop()
        self.prof.print_stats(20)

        return maps_out
Exemplo n.º 4
0
    def forward(self, maps_w, sentence_embeddings, map_poses_w, cam_poses_w, show=False):
        #show="li
        self.prof.tick(".")
        batch_size = len(maps_w)

        # Initialize the layers of the same size as the maps, but with only one channel
        new_layer_size = list(maps_w.size())
        new_layer_size[1] = 1
        all_maps_out_w = empty_float_tensor(new_layer_size, self.is_cuda, self.cuda_device)

        start_poses = self.get_start_poses(cam_poses_w, sentence_embeddings)

        poses_img = [poses_m_to_px(as_pose, self.map_size, self.world_size_px, self.world_size_m) for as_pose in start_poses]
        #poses_img = poses_as_to_img(start_poses, self.world_size, batch_dim=True)

        for i in range(batch_size):
            x = min(max(int(poses_img[i].position.data[0]), 0), new_layer_size[2] - 1)
            y = min(max(int(poses_img[i].position.data[1]), 0), new_layer_size[2] - 1)
            all_maps_out_w[i, 0, x, y] = 10.0

        if show != "":
            Presenter().show_image(all_maps_out_w[0], show, torch=True, waitkey=1)

        self.prof.tick("draw")

        # Step 3: Convert all maps to local frame
        maps_out = torch.cat([Variable(all_maps_out_w), maps_w], dim=1)
        #all_maps_w = torch.cat(all_maps_out_w, dim=0)

        self.prof.loop()
        self.prof.print_stats(10)

        return maps_out, map_poses_w
Exemplo n.º 5
0
    def draw_observability(self, image, pose_m, world_size_m, h_fov):
        image = image.copy()
        img_size_px = image.shape[1]
        pose_px = poses_m_to_px(as_pose=pose_m,
                                img_size_px=img_size_px,
                                world_size_px=img_size_px,
                                world_size_m=world_size_m,
                                batch_dim=False)
        pose_px.position[1] = img_size_px - pose_px.position[1]
        drone_yaw = -pose_px.orientation + 3.14159

        if self.coord_grid is None:
            lspace = np.linspace(0, image.shape[0] - 1, image.shape[0])
            coord_grid = np.meshgrid(lspace, lspace)
            coord_grid = [c[:, :, np.newaxis] for c in coord_grid]
            coord_grid = np.concatenate(coord_grid, axis=2)
            self.coord_grid = coord_grid

        heading = self.coord_grid - pose_px.position[np.newaxis, np.newaxis, :]
        yaws = np.arctan2(heading[:, :, 0], heading[:, :, 1])

        diff = yaws - drone_yaw
        toobig = diff > np.pi
        toosmall = diff < -np.pi
        diff[toobig] -= np.pi * 2
        diff[toosmall] += np.pi * 2
        diff = np.fabs(diff)

        visible_mask = diff < np.deg2rad(h_fov) / 2
        invisible_mask = np.logical_not(visible_mask)
        image[invisible_mask] *= 0.8
        return image
Exemplo n.º 6
0
def draw_drone_poses(drone_poses):
    num_poses = len(drone_poses)
    pic = np.zeros([num_poses, 1, 128, 128])
    # TODO: Fix this call:
    poses_map = poses_m_to_px(drone_poses, 128, batch_dim=True)
    for i, pose in enumerate(poses_map):
        x = int(pose.position[0])
        y = int(pose.position[1])
        if x > 0 and y > 0 and x < 128 and y < 128:
            pic[i, 0, x, y] = 1.0

    return torch.from_numpy(pic)
Exemplo n.º 7
0
 def draw_drone(self, image, pose_m, world_size_m):
     img_size_px = image.shape[1]
     pose_px = poses_m_to_px(as_pose=pose_m,
                             img_size_px=img_size_px,
                             world_size_px=img_size_px,
                             world_size_m=world_size_m,
                             batch_dim=False)
     pose_px.position[1] = img_size_px - pose_px.position[1]
     drone_img = self._load_drone_img()
     DRONE_SIZE_FRACTIONAL = 0.1
     drone_img_t, drone_img_mask = self._transform_img_to_pose(
         image, drone_img, pose_px, DRONE_SIZE_FRACTIONAL)
     overlaid_image = image[:, :, :3] * (
         1 - drone_img_mask[:, :, :3]
     ) + drone_img_t[:, :, :3] * drone_img_mask[:, :, :3]
     return overlaid_image
Exemplo n.º 8
0
    def top_down_visualization(self, env_id, seg_idx, rollout, domain, params):
        fd = domain == "real"
        obl = domain in ["simulator", "sim"]
        print(domain, obl)
        if params["draw_topdown"]:
            bg_image = load_env_img(
                env_id,
                self.resolution,
                self.resolution,
                real_drone=True if domain == "real" else False,
                origin_bottom_left=obl,
                flipdiag=False,
                alpha=True)
        else:
            bg_image = np.zeros((self.resolution, self.resolution, 3))
        if params["draw_landmarks"]:
            bg_image = self._draw_landmarks(bg_image, env_id)

        # Initialize stuff
        frames = []
        poses_m = []
        poses_px = []
        for sample in rollout:
            sample_image = bg_image.copy()
            frames.append(sample_image)
            state = sample["state"]
            pose_m = state.get_drone_pose()
            pose_px = poses_m_to_px(pose_m,
                                    self.resolution,
                                    self.resolution,
                                    self.world_size_m,
                                    batch_dim=False)
            poses_px.append(pose_px)
            poses_m.append(pose_m)

        instruction = rollout[0]["instruction"]
        print("Instruction: ")
        print(instruction)

        # Draw visitation distributions if requested:
        if params["include_vdist"]:
            print("Drawing visitation distributions")
            if params["ego_vdist"]:
                inner_key = "v_dist_r_inner"
                outer_key = "v_dist_r_outer"
            else:
                inner_key = "v_dist_w_inner"
                outer_key = "v_dist_w_outer"
            for i, sample in enumerate(rollout):
                v_dist_w_inner = np.flipud(sample[inner_key].transpose(
                    (2, 1, 0)))
                # Expand range of each channel separately so that stop entropy doesn't affect how trajectory looks
                v_dist_w_inner[:, :, 0] /= (
                    np.percentile(v_dist_w_inner[:, :, 0], 99.5) + 1e-9)
                v_dist_w_inner[:, :, 1] /= (
                    np.percentile(v_dist_w_inner[:, :, 1], 99.5) + 1e-9)
                v_dist_w_inner = np.clip(v_dist_w_inner, 0.0, 1.0)
                v_dist_w_outer = sample[outer_key]
                if bg_image.max() - bg_image.min() > 1e-9:
                    f = self.presenter.blend_image(frames[i], v_dist_w_inner)
                else:
                    f = self.presenter.overlaid_image(frames[i],
                                                      v_dist_w_inner,
                                                      strength=1.0)
                f = self.presenter.draw_prob_bars(f, v_dist_w_outer)
                frames[i] = f

        if params["include_layer"]:
            layer_name = params["include_layer"]
            print(f"Drawing first 3 channels of layer {layer_name}")
            accumulate = False
            invert = False
            gray = False
            if layer_name == "M_W_accum":
                accumulate = True
                layer_name = "M_W"
            if layer_name == "M_W_accum_inv":
                invert = True
                accumulate = True
                layer_name = "M_W"

            if layer_name.endswith("_Gray"):
                gray = True
                layer_name = layer_name[:-len("_Gray")]

            for i, sample in enumerate(rollout):
                layer = sample[layer_name]
                if len(layer.shape) == 4:
                    layer = layer[0, :, :, :]
                layer = layer.transpose((2, 1, 0))
                layer = np.flipud(layer)
                if layer_name in ["S_W", "F_W"]:
                    layer = layer[:, :, :3]
                else:
                    layer = layer[:, :, :3]
                if layer_name in ["S_W", "R_W", "F_W"]:
                    if gray:
                        layer -= np.percentile(layer, 1)
                        layer /= (np.percentile(layer, 99) + 1e-9)
                    else:
                        layer /= (np.percentile(layer, 97) + 1e-9)
                    layer = np.clip(layer, 0.0, 1.0)

                if layer_name in ["M_W"]:
                    # Having a 0-1 mask does not encode properly with the codec. Add a bit of imperceptible gaussian noise.
                    layer = layer.astype(np.float32)
                    layer = np.tile(layer, (1, 1, 3))

                if accumulate and i > 0:
                    layer = np.maximum(layer, prev_layer)

                prev_layer = layer
                if invert:
                    layer = 1 - layer
                if frames[i].max() > 0.01:
                    frames[i] = self.presenter.blend_image(
                        frames[i], layer[:, :, :3])
                    #frames[i] = self.presenter.overlaid_image(frames[i], layer[:, :, :3])
                else:
                    scale = (int(self.resolution / layer.shape[0]),
                             int(self.resolution / layer.shape[1]))
                    frames[i] = self.presenter.prep_image(layer[:, :, :3],
                                                          scale=scale)

        if params["include_instr"]:
            print("Drawing instruction")
            for i, sample in enumerate(rollout):
                frames[i] = self.presenter.overlay_text(
                    frames[i], sample["instruction"])

        # Draw trajectory history
        if params["draw_trajectory"]:
            print("Drawing trajectory")
            for i, sample in enumerate(rollout):
                history = poses_px[:i + 1]
                position_history = [h.position for h in history]
                frames[i] = self.presenter.draw_trajectory(
                    frames[i], position_history, self.world_size_m)

        # Draw drone
        if params["draw_drone"]:
            print("Drawing drone")
            for i, sample in enumerate(rollout):
                frames[i] = self.presenter.draw_drone(frames[i], poses_m[i],
                                                      self.world_size_m)

        # Draw observability mask:
        if params["draw_fov"]:
            print("Drawing FOV")
            for i, sample in enumerate(rollout):
                frames[i] = self.presenter.draw_observability(
                    frames[i], poses_m[i], self.world_size_m, 84)

        # Visualize
        if False:
            for i, sample in enumerate(rollout):
                self.presenter.show_image(frames[i],
                                          "sample_image",
                                          scale=1,
                                          waitkey=True)

        return frames