示例#1
0
文件: roll_out.py 项目: pianpwk/drif
    def __init__(self, instance_id=0):
        self.presenter = Presenter()
        self.instance_id = instance_id
        self.env = None

        self.word2token = None
        self.all_instructions = None
示例#2
0
 def __init__(self, run_name="", save_images=True):
     super(EvaluateBase, self).__init__()
     self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions()
     self.passing_distance = LANDMARK_REGION_RADIUS
     self.results = ResultsLandmarkSide()
     self.presenter = Presenter()
     self.run_name = run_name
     self.save_images = save_images
示例#3
0
    def forward(self, image_g, pose, sentence_embed, parent=None, show=""):

        # scale to 0-1 range
        #image_g = image_g - torch.min(image_g)
        #image_g = image_g / (torch.max(image_g) + 1e-9)

        # rotate to robot frame
        # TODO: Temporarily changed to local pose
        self.set_map(image_g, pose)
        image_r, _ = self.get_map(pose)
        """
        # normalize mean-0 std-1
        image_r = image_r - torch.mean(image_r)
        image_r = image_r / (torch.std(image_r) + 1e-9)

        ones = torch.ones_like(image_g)
        self.set_map(ones, None)
        cov_r, _ = self.get_map(pose)
        cov_r = cov_r - torch.min(cov_r)
        cov_r /= (torch.max(cov_r) + 1e-9)
        cov_rl = cov_r > 1e-8

        blackcolor = torch.min(image_g)

        #image_r[cov_rl] = blackcolor
        """

        features_r = self.feature_net(image_r)

        if parent is not None:
            parent.keep_inputs("fpv_features", features_r)

        if self.aux_ground:
            self.lang_filter.precompute_conv_weights(sentence_embed)
            features_g = self.lang_filter(features_r)
            if parent is not None:
                parent.keep_inputs("fpv_features_g", features_g)

            features_all = torch.cat([features_g, features_r], dim=1)
        else:
            features_all = features_r

        coverage = torch.ones_like(features_all)

        if show != "":
            Presenter().show_image(image_r.data[0, 0:3],
                                   show + "_img",
                                   torch=True,
                                   scale=1,
                                   waitkey=20)
            Presenter().show_image(features_r.data[0, 0:3],
                                   show,
                                   torch=True,
                                   scale=12,
                                   waitkey=20)
            #Presenter().show_image(cov_r.data[0, 0:3], show+ "_convg", torch=True, scale=1, waitkey=20)

        return features_all, coverage
示例#4
0
    def __init__(self, resolution=512):
        self.presenter = Presenter()
        self.clear()
        self.current_rollout = {}
        self.current_rollout_name = None
        self.env_image = None
        self.current_timestep = None
        self.world_size_m = P.get_current_parameters()["Setup"]["world_size_m"]

        self.resolution = resolution
示例#5
0
 def show(self, perturbed_maps, unperturbed_maps, name):
     Presenter().show_image(unperturbed_maps.data[0],
                            name + "_unperturbed",
                            torch=True,
                            waitkey=1,
                            scale=4)
     Presenter().show_image(perturbed_maps.data[0],
                            name + "_perturbed",
                            torch=True,
                            waitkey=1,
                            scale=4)
示例#6
0
文件: evaluate_nl.py 项目: hyzcn/drif
 def __init__(self, run_name="", save_images=True, entire_trajectory=True, custom_instr=None):
     super(EvaluateBase, self).__init__()
     self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions()
     self.all_i = {**self.train_i, **self.test_i, **self.dev_i}
     self.passing_distance = DEFAULT_PASSING_DISTANCE
     self.results = ResultsLandmarkSide()
     self.presenter = Presenter()
     self.run_name = run_name
     self.save_images = save_images
     self.entire_trajectory = entire_trajectory
     self.custom_instr = custom_instr
示例#7
0
 def get_viz(self):
     presenter = Presenter()
     out = {"viz_img": []}
     for i, img in enumerate(self.viz_images):
         instruction = self.instructions[i]
         if len(instruction.view([-1])) < 2:
             instruction = [0]
         else:
             instruction = list(instruction.data.cpu().numpy().squeeze())
         instruction_str = debug_untokenize_instruction(instruction)
         viz_img = presenter.overlay_text(img, instruction_str)
         out["viz_img"].append(viz_img)
     return out
示例#8
0
def test_rollout_sampler():
    policy, _ = load_model("pvn_full_bidomain")
    policy_state = policy.get_policy_state()
    from visualization import Presenter

    #roller = SimplePolicyRoller(policy_factory)
    roller = SimpleParallelPolicyRoller("pvn_full_bidomain", num_workers=4)
    rollout_sampler = RolloutSampler(roller)

    # TODO: Load some policy
    print("Sampling once")
    rollouts = rollout_sampler.sample_n_rollouts(12, policy_state)

    print("Sampling twice")
    rollouts += rollout_sampler.sample_n_rollouts(12, policy_state)

    print("Sampling thrice")
    rollouts += rollout_sampler.sample_n_rollouts(12, policy_state)

    for rollout in rollouts:
        print("Visualizing rollout")
        for sample in rollout:
            state = sample["state"]
            image = state.get_rgb_image()
            Presenter().show_image(image, "fpv", waitkey=True, scale=4)
        print("Done!")

    roller.__exit__()
    print("ding")
    def forward(self, coverage_masks, initpos_masks):
        batch_size = coverage_masks.shape[0]
        coverage_masks_initpos = (coverage_masks + initpos_masks).clamp(0, 1)

        if False:
            for i in range(batch_size):
                Presenter().show_image(coverage_masks[i, 0],
                                       "cov_mask_before",
                                       scale=4,
                                       waitkey=1)
                Presenter().show_image(coverage_masks_initpos[i, 0],
                                       "cov_mask_after",
                                       scale=4,
                                       waitkey=True)

        return coverage_masks_initpos
示例#10
0
    def forward(self, maps_w, sentence_embeddings, map_poses_w, cam_poses_w, show=False):
        #show="li
        self.prof.tick(".")
        batch_size = len(maps_w)

        # Initialize the layers of the same size as the maps, but with only one channel
        new_layer_size = list(maps_w.size())
        new_layer_size[1] = 1
        all_maps_out_w = empty_float_tensor(new_layer_size, self.is_cuda, self.cuda_device)

        start_poses = self.get_start_poses(cam_poses_w, sentence_embeddings)

        poses_img = [poses_m_to_px(as_pose, self.map_size, self.world_size_px, self.world_size_m) for as_pose in start_poses]
        #poses_img = poses_as_to_img(start_poses, self.world_size, batch_dim=True)

        for i in range(batch_size):
            x = min(max(int(poses_img[i].position.data[0]), 0), new_layer_size[2] - 1)
            y = min(max(int(poses_img[i].position.data[1]), 0), new_layer_size[2] - 1)
            all_maps_out_w[i, 0, x, y] = 10.0

        if show != "":
            Presenter().show_image(all_maps_out_w[0], show, torch=True, waitkey=1)

        self.prof.tick("draw")

        # Step 3: Convert all maps to local frame
        maps_out = torch.cat([Variable(all_maps_out_w), maps_w], dim=1)
        #all_maps_w = torch.cat(all_maps_out_w, dim=0)

        self.prof.loop()
        self.prof.print_stats(10)

        return maps_out, map_poses_w
示例#11
0
    def forward(self,
                images,
                sentence_embeddings,
                map_poses,
                proc_mask=None,
                show=""):

        # If we are supposed to use less channels than the input map has, just grab the first N channels
        if images.size(1) > self.in_channels:
            images_in = images[:, 0:self.in_channels, :, :]
        else:
            images_in = images

        # Apply the language-conditioned convolutional filter
        self.lang_filter.precompute_conv_weights(sentence_embeddings)
        images_out = self.lang_filter(images_in)

        if show != "":
            Presenter().show_image(images_out.data[0, 0:3],
                                   show,
                                   torch=True,
                                   scale=4,
                                   waitkey=1)

        # If requested, concatenate with the prior input, such that the first feature maps are from output
        # That allows chaining these modules and slicing
        if self.cat_out:
            images_out = torch.cat([images_out, images_in], dim=1)

        self.set_maps(images_out, map_poses)
        return images_out, map_poses
示例#12
0
def show_depth(image):
    grayscale = np.mean(image[:, :, 0:3], axis=2)
    depth = image[:, :, 3]
    comb = np.stack([grayscale, grayscale, depth], axis=2)
    comb -= comb.min()
    comb /= (comb.max() + 1e-9)
    Presenter().show_image(comb, "depth_alignment", torch=False, waitkey=1, scale=4)
示例#13
0
    def forward(self, masks, mask_labels, show="", iteration=0):

        if show != "":
            Presenter().show_image(masks.data[0],
                                   "pred_mask",
                                   torch=True,
                                   waitkey=1,
                                   scale=4)
            Presenter().show_image(mask_labels.data[0],
                                   "mask_labels",
                                   torch=True,
                                   waitkey=1,
                                   scale=4)
            self.logger.log_image("pred_mask",
                                  Presenter().prep_image(masks.data[0], 4),
                                  iteration)
            self.logger.log_image(
                "mask_labels",
                Presenter().prep_image(mask_labels.data[0], 4), iteration)
        if masks.size(1) == 1:
            return False

        # TODO: Handle batches if necessary
        goal_mask = masks[0, 1, :, :]
        goal_mask_flat = goal_mask.view([1, -1])
        max_val, argmax = goal_mask_flat.max(1)
        argmax_loc_x = argmax / goal_mask.size(1)
        argmax_loc_y = torch.remainder(argmax, goal_mask.size(1))
        argmax_loc = torch.cat(
            [argmax_loc_x.unsqueeze(1),
             argmax_loc_y.unsqueeze(1)], 1)

        goal_mask_l = mask_labels[0, 1, :, :]
        goal_mask_flat_l = goal_mask_l.view([1, -1])
        max_val, argmax_l = goal_mask_flat_l.max(1)
        argmax_loc_x_l = argmax_l / goal_mask_l.size(1)
        argmax_loc_y_l = torch.remainder(argmax_l, goal_mask_l.size(1))
        argmax_loc_l = torch.cat(
            [argmax_loc_x_l.unsqueeze(1),
             argmax_loc_y_l.unsqueeze(1)], 1)

        dist = (argmax_loc - argmax_loc_l).float().norm(dim=1)
        success = dist < self.ok_distance

        return success
示例#14
0
    def forward(self, select_dist, all_cam_poses, plan_mask=None, show=False):
        #show="li"
        self.prof.tick(".")

        # During rollout, plan_mask will alternate between [True] and [False]
        if plan_mask is None:
            all_dist = select_dist
            return all_dist, all_cam_poses

        full_batch_size = len(all_cam_poses)

        all_dists_out_r = []

        self.prof.tick("maps_to_global")

        # For each timestep, take the latest map that was available, transformed into this timestep
        # Do only a maximum of one transformation for any map to avoid cascading of errors!
        ptr = 0
        for i in range(full_batch_size):
            this_pose = all_cam_poses[i:i + 1]
            if plan_mask[i]:
                this_obs = (select_dist[ptr:ptr + 1], this_pose)
                ptr += 1
                self.last_observation = this_obs
            else:
                assert self.last_observation is not None, "The first observation in a sequence needs to be used!"
                last_map, last_pose = self.last_observation

                # TODO: See if we can speed this up. Perhaps batch for all timesteps inbetween observations
                self.child_transformer.set_map(last_map.inner_distribution,
                                               last_pose)
                x = self.child_transformer.get_map(this_pose)
                this_obs = Partial2DDistribution(x, last_map.outer_prob_mass)

            all_dists_out_r.append(this_obs)

            if show != "":
                Presenter().show_image(this_obs.inner_distribution.data[0,
                                                                        0:3],
                                       show,
                                       torch=True,
                                       scale=8,
                                       waitkey=50)

        self.prof.tick("integrate")

        inner_list = [x.inner_distribution for x in all_dists_out_r]
        outer_list = [x.outer_prob_mass for x in all_dists_out_r]

        all_dists_out_r = Partial2DDistribution(torch.cat(inner_list, dim=0),
                                                torch.cat(outer_list, dim=0))

        self.prof.tick("maps_to_local")
        self.prof.loop()
        self.prof.print_stats(10)

        return all_dists_out_r, all_cam_poses
    def forward_one(self, maps_r, other_features, firstseg=None):
        # TODO: Log this somewhere
        if self.map_channels < maps_r.size(1):
            maps_r = maps_r[:, 0:self.map_channels]

        if self.manual:
            max, argmax = torch.max(maps_r[:, 1])
            print(argmax)
        if True:
            maps_s = maps_r[:, :, self.t_crop:self.b_crop,
                            self.l_crop:self.r_crop].contiguous()

            # maps_s = self.downsample(maps_r)
            if self.path_only:
                # Copy over the trajectory channel, discarding the goal
                maps_in = torch.zeros_like(maps_s)
                maps_in[:, 0] = maps_s[:, 0]
            else:
                maps_in = maps_s

            DBG = run_md.IS_ROLLOUT  # or True# and False
            if DBG:
                for i in range(len(maps_s)):
                    Presenter().show_image(maps_in.data[i],
                                           "d",
                                           torch=True,
                                           waitkey=1,
                                           scale=8)

            map_features = maps_in.view([maps_s.size(0), -1])

            # other_features_zero = torch.zeros_like(other_features)
            # mlp_in_features = torch.cat([map_features, other_features_zero], dim=1)

            mlp_in_features = map_features
            if self.use_recurrence:
                if firstseg:
                    self.forget_recurrence()
                hist_features = self.last_h
            else:
                hist_features = torch.zeros(
                    [maps_s.size(0),
                     RECURRENCE_SIZE]).to(next(self.parameters()).device)

            mlp_in_features = torch.cat([mlp_in_features, hist_features],
                                        dim=1)
            mlp_in_features = self.dropout(mlp_in_features)
            actions_pred = self.mlp(mlp_in_features)

            if self.use_recurrence:
                self.last_h, self.last_c = self.recurrence(
                    actions_pred, (self.last_h, self.last_c))

            # this must be in 0-1 range for BCE loss
            actions_pred = actions_pred.clone()
            actions_pred[:, 3] = torch.sigmoid(actions_pred[:, 3])
            return actions_pred
示例#16
0
    def forward(self, masks, mask_labels):
        masks = torch.cat(masks, dim=0)
        mask_labels = torch.cat(mask_labels, dim=0)

        if masks.size(1) < mask_labels.size(1):
            mask_labels = mask_labels[:, 0:masks.size(1)].contiguous()

        global pa2d_count
        if DBG and pa2d_count % 50 == 0:
            for i in range(masks.size(0)):
                Presenter().show_image(masks.data[i], "aux_path_pred", torch=True, waitkey=1, scale=4)
                Presenter().show_image(mask_labels.data[i], "aux_path_label", torch=True, waitkey=100, scale=4)
        pa2d_count += 1

        loss = self.loss(masks, mask_labels)

        # TODO: Put accuract reporting here...
        return loss, 1
示例#17
0
    def forward(self, coverage_masks_w, cam_poses):
        pos_px = pos_m_to_px(cam_poses.position[0:1],
                             img_size_px=self.world_size_px,
                             world_size_px=self.world_size_px,
                             world_size_m=self.world_size_m)
        batch_size = coverage_masks_w.shape[0]
        # TODO: Don't do this at test-time for everything except the first action!
        assert cam_poses.position.shape[
            0] > 0, "Not implemented test-time behavior"
        pos_mask = torch.zeros_like(coverage_masks_w[0, 0])
        radius = 6  # 6 pixels is a bit less than a meter

        x = pos_px[0][0].item()
        y = pos_px[0][1].item()

        xi = int(x)
        yi = int(y)
        min_x = max(xi - radius, 0)
        min_y = max(yi - radius, 0)
        max_x = min(xi + radius, coverage_masks_w.shape[2])
        max_y = min(yi + radius, coverage_masks_w.shape[2])

        indices = [[i, j] for i in range(min_x, max_x)
                   for j in range(min_y, max_y)
                   if (x - i - 0.5)**2 + (y - j - 0.5)**2 < radius**2]
        for i, j in indices:
            pos_mask[i, j] = 1.0

        coverage_masks_w_init_pos = (
            coverage_masks_w + pos_mask[np.newaxis, np.newaxis, :, :]).clamp(
                0, 1)

        if True:
            for i in range(batch_size):
                Presenter().show_image(coverage_masks_w[i, 0],
                                       "cov_mask_before",
                                       scale=4,
                                       waitkey=1)
                Presenter().show_image(coverage_masks_w_init_pos[i, 0],
                                       "cov_mask_after",
                                       scale=4,
                                       waitkey=True)

        return coverage_masks_w_init_pos
示例#18
0
    def forward_deprecated(self, images, cam_poses, add_mask=None, show=False):
        #show="li"
        self.prof.tick(".")
        batch_size = len(cam_poses)

        assert add_mask is None or add_mask[0] is not None, "The first observation in a sequence needs to be used!"

        # Step 1: All local maps to global:
        #  TODO: Allow inputing global maps when new projector is ready
        self.child_transformer.set_maps(images, cam_poses)
        observations_g, _ = self.child_transformer.get_maps(None)

        all_maps_out_g = []

        self.prof.tick("maps_to_global")

        # TODO: Draw past trajectory on an extra channel of the semantic map
        # Step 2: Integrate serially in the global frame
        for i in range(batch_size):

            # If we don't have a map yet, initialize the map to this observation
            if self.map_memory.latest_maps is None:
                self.map_memory.set_map(observations_g[i:i+1], None)

            # Allow masking of observations
            if add_mask is None or add_mask[i]:
                # Use the map from this frame
                map_g = observations_g[i:i+1]
                self.map_memory.set_map(map_g, None)
            else:
                # Use the latest available map oriented in global frame
                map_g, _ = self.map_memory.get_map(None)

            if show != "":
                Presenter().show_image(map_g.data[0, 0:3], show, torch=True, scale=8, waitkey=50)

            all_maps_out_g.append(map_g)

        self.prof.tick("integrate")

        # Step 3: Convert all maps to local frame
        all_maps_g = torch.cat(all_maps_out_g, dim=0)

        # Write gifs for debugging
        self.dbg_write_extra(all_maps_g, None)

        self.child_transformer.set_maps(all_maps_g, None)
        maps_r, _ = self.child_transformer.get_maps(cam_poses)
        self.set_maps(maps_r, cam_poses)

        self.prof.tick("maps_to_local")
        self.prof.loop()
        self.prof.print_stats(10)

        return maps_r, cam_poses
示例#19
0
    def forward(self, visit_dist_r, map_uncoverage, firstseg=None, eval=False):
        action = self.teleoper.get_command()

        inner_goal_dist = visit_dist_r.inner_distribution

        prob_goal_inside = inner_goal_dist[0, 1].sum().detach().item()
        rectangle = np.zeros([100, 20, 3])
        fill_until = int(100 * prob_goal_inside)
        rectangle[fill_until:, :, 0] = 1.0
        Presenter().show_image(rectangle, "P(outside)", scale=4, waitkey=1)

        # Normalize channels for viewing
        inner_goal_dist[0, 0] /= (inner_goal_dist[0, 0].max() + 1e-10)
        inner_goal_dist[0, 1] /= (inner_goal_dist[0, 1].max() + 1e-10)

        Presenter().show_image(inner_goal_dist[0].detach(), "visit_dist", scale=8, waitkey=1)
        Presenter().show_image(map_uncoverage[0].detach(), "unobserved", scale=8, waitkey=1)

        action_t = torch.Tensor(action)
        return action_t
示例#20
0
    def forward(self,
                current_maps,
                coverages,
                cam_poses,
                add_mask=None,
                show=""):
        batch_size = len(cam_poses)

        assert add_mask is None or add_mask[
            0] is not None, "The first observation in a sequence needs to be used!"

        # If we don't have masked observations, just return each timestep observations
        if add_mask is None:
            self.set_maps(current_maps, cam_poses)
            return current_maps, cam_poses

        maps_r = []

        # If we have masked observations, then for timesteps where observation is masked (False), get the previous observation
        # rotated to the current frame
        for i in range(batch_size):

            # If we don't have a map yet, rotate this observation and initialize a map
            if self.latest_map is None:
                self.set_map(current_maps[i:i + 1], cam_poses[i:i + 1])
                map_g, _ = self.get_map(None)
                self.set_map(map_g, None)

            # Allow masking of observations
            if add_mask is None or add_mask[i]:
                # Transform the observation into the global (map) frame
                self.child_transformer.set_map(current_maps[i:i + 1],
                                               cam_poses[i:i + 1])
                obs_g, _ = self.child_transformer.get_map(None)

                # Remember this new map
                self.set_map(obs_g, None)

            # Return this map in the camera frame of reference
            map_r, _ = self.get_map(cam_poses[i:i + 1])

            if show != "":
                Presenter().show_image(map_r.data[0, 0:3],
                                       show,
                                       torch=True,
                                       scale=8,
                                       waitkey=1)

            maps_r.append(map_r)

        maps_r = torch.cat(maps_r, dim=0)
        self.set_maps(maps_r, cam_poses)

        return maps_r, cam_poses
示例#21
0
def browse_pvn_dataset():
    P.initialize_experiment()

    setup = P.get_current_parameters()["Setup"]
    model_sim, _ = load_model(setup["model"],
                              setup["sim_model_file"],
                              domain="sim")
    data_params = P.get_current_parameters()["Training"]

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    #dom="real"
    dom = "sim"

    dataset = model_sim.get_dataset(
        data=None,
        envs=train_envs,
        domain=dom,
        dataset_names=data_params[f"{dom}_dataset_names"],
        dataset_prefix="supervised",
        eval=False,
        halfway_only=False)

    p = Presenter()

    for example in dataset:
        if example is None:
            continue
        md = example["md"][0]
        print(
            f"Showing example: {md['env_id']}:{md['set_idx']}:{md['seg_idx']}")
        print(f"  instruction: {md['instruction']}")
        exec_len = len(example["images"])
        for i in range(exec_len):
            print(f"   timestep: {i}")
            img_i = example["images"][i]
            lm_fpv_i = example["lm_pos_fpv"][i]
            if lm_fpv_i is not None:
                img_i = p.plot_pts_on_torch_image(img_i, lm_fpv_i.long())
            p.show_image(img_i, "fpv_img_i", scale=4, waitkey=True)
示例#22
0
    def configure_landmarks(self, env_id):
        self.env_config = load_and_convert_env_config(env_id)
        self.state_positioning = True
        self.state_instructions_printed = False

        self.subscriber = rospy.Subscriber(self.img_topic, Image,
                                           self._image_callback)

        self.enter_monitor = EnterMonitor()
        self.monitor_runner = MonitorRunner(self.enter_monitor)

        env_sim_img = load_env_img(env_id,
                                   width=400,
                                   height=400,
                                   real_drone=False,
                                   origin_bottom_left=False)

        new = True
        while True:
            if self.new_image:
                Presenter().show_image(self.image_to_show,
                                       "Landmark Positioning",
                                       scale=2,
                                       waitkey=10)
                Presenter().show_image(env_sim_img,
                                       "Sim Image",
                                       scale=2,
                                       waitkey=10)
                if new:
                    cv2.moveWindow("Landmark Positioning", 20, 20)
                    cv2.moveWindow("Sim Image", 1000, 20)
                    new = False
            if self.enter_monitor.tapped or SKIP_CONFIGURATION:
                break

        sleep(1)
        cv2.destroyWindow('Landmark Positioning')
        cv2.destroyWindow("Sim Image")
        self.subscriber.unregister()
        return self.image_to_show
示例#23
0
    def forward(self, maps_r, map_structure_r):
        maps_r_cropped = maps_r.inner_distribution[:, :,
                                                   self.crop_l:self.crop_r,
                                                   self.crop_l:self.crop_r]
        batch_size = maps_r.inner_distribution.shape[0]

        # Create a context vector that encodes goal observability
        # Don't backprop into the embedding vectors - don't risk losing the only input we have
        gin = self.goal_in_vec.detach()[np.newaxis, :].repeat([batch_size, 1])
        gout = self.goal_out_vec.detach()[np.newaxis, :].repeat(
            [batch_size, 1])
        vin = self.visit_in_vec.detach()[np.newaxis, :].repeat([batch_size, 1])
        vout = self.visit_out_vec.detach()[np.newaxis, :].repeat(
            [batch_size, 1])

        p_visit_out = maps_r.outer_prob_mass[:, 0:1].detach()
        p_goal_out = maps_r.outer_prob_mass[:, 1:2].detach()

        g_context_vec = gout * p_goal_out + gin * (1 - p_goal_out)
        v_context_vec = vout * p_visit_out + vin * (1 - p_visit_out)
        obs_context_vec = torch.cat([g_context_vec, v_context_vec], dim=1)

        # 64x64 -> 16x16
        uncov_r_pooled = self.avgpool(map_structure_r)

        if False:
            conv_in_np = conv_in[0].data.cpu().numpy().transpose(1, 2, 0)
            # expand to 0-1 range
            conv_in_np[:, :, 0] /= (np.max(conv_in_np[:, :, 0]) + 1e-10)
            conv_in_np[:, :, 1] /= (np.max(conv_in_np[:, :, 1]) + 1e-10)
            conv_in_np[:, :, 2] /= (np.max(conv_in_np[:, :, 2]) + 1e-10)
            Presenter().show_image(conv_in_np, "rl_conv_in", scale=2)
            #Presenter().show_image(uncov_r_pooled[0], "uncov_pooled", scale=4)

        # From 16x16 down to 8x8
        x = self.act(self.conv1(maps_r_cropped))
        x = self.norm1(x)

        # From 16x16 down to 8x8
        c = self.act(self.structconv1(uncov_r_pooled))
        c = self.covnorm1(c)

        comb_map = torch.cat([x, c], dim=1)
        batch_size = x.shape[0]
        lin_in = comb_map.view(batch_size, -1)
        lin_in = torch.cat([lin_in, obs_context_vec], dim=1)

        x = self.act(self.linear1(lin_in))
        x = torch.cat([lin_in, x], dim=1)
        x = self.act(self.linear2(x))

        return x
示例#24
0
    def forward(self, select_images, all_cam_poses, plan_mask=None, show=False):
        #show="li"
        self.prof.tick(".")

        # During rollout, plan_mask will alternate between [True] and [False]
        if plan_mask is None:
            all_images = select_images
            return all_images, all_cam_poses

        full_batch_size = len(all_cam_poses)

        all_maps_out_r = []

        self.prof.tick("maps_to_global")

        # For each timestep, take the latest map that was available, transformed into this timestep
        # Do only a maximum of one transformation for any map to avoid cascading of errors!
        ptr = 0
        for i in range(full_batch_size):
            this_pose = all_cam_poses[i:i+1]
            if plan_mask[i]:
                this_obs = (select_images[ptr:ptr+1], this_pose)
                ptr += 1
                self.last_observation = this_obs
            else:
                assert self.last_observation is not None, "The first observation in a sequence needs to be used!"
                last_map, last_pose = self.last_observation

                # TODO: See if we can speed this up. Perhaps batch for all timesteps inbetween observations
                self.child_transformer.set_map(last_map, last_pose)
                this_obs = self.child_transformer.get_map(this_pose)

            all_maps_out_r.append(this_obs[0])

            if show != "":
                Presenter().show_image(this_obs.data[0, 0:3], show, torch=True, scale=8, waitkey=50)

        self.prof.tick("integrate")

        # Step 3: Convert all maps to local frame
        all_maps_r = torch.cat(all_maps_out_r, dim=0)

        # Write gifs for debugging
        #self.dbg_write_extra(all_maps_r, None)

        self.set_maps(all_maps_r, all_cam_poses)

        self.prof.tick("maps_to_local")
        self.prof.loop()
        self.prof.print_stats(10)

        return all_maps_r, all_cam_poses
示例#25
0
    def unbatch(self, batch):
        # Inputs
        states = self.cuda_var(batch["states"][0])
        seq_len = len(states)
        firstseg_mask = batch["firstseg_mask"][0]          # True for every timestep that is a new instruction segment
        plan_mask = batch["plan_mask"][0]                  # True for every timestep that we do visitation prediction
        actions = self.cuda_var(batch["actions"][0])

        actions_select = self.batch_select.one(actions, plan_mask, actions.device)

        # Ground truth visitation distributions (in start and global frames)
        v_dist_w_ground_truth_select = self.cuda_var(batch["traj_ground_truth"][0])
        cam_poses = self.cam_poses_from_states(states)
        cam_poses_select = self.batch_select.one(cam_poses, plan_mask, actions.device)
        v_dist_r_ground_truth_select, poses_r = self.map_transform_w_to_r(v_dist_w_ground_truth_select, None, cam_poses_select)
        self.tensor_store.keep_inputs("v_dist_w_ground_truth_select", v_dist_w_ground_truth_select)
        self.tensor_store.keep_inputs("v_dist_r_ground_truth_select", v_dist_r_ground_truth_select)

        Presenter().show_image(v_dist_w_ground_truth_select.detach().cpu()[0,0], "v_dist_w_ground_truth_select", waitkey=1, scale=4)
        Presenter().show_image(v_dist_r_ground_truth_select.detach().cpu()[0,0], "v_dist_r_ground_truth_select", waitkey=1, scale=4)

        return states, actions_select, v_dist_r_ground_truth_select, cam_poses_select, plan_mask, firstseg_mask
示例#26
0
    def forward(self, images, poses, sentence_embeds, parent=None, show=""):

        self.prof.tick("out")

        features_fpv_vis_only, features_fpv_gnd_only = self.forward_fpv_features(images, sentence_embeds, parent)

        # If we have grounding features, the overall features are a concatenation of grounded and non-grounded features
        if features_fpv_gnd_only is not None:
            features_fpv_all = torch.cat([features_fpv_gnd_only, features_fpv_vis_only], dim=1)
        else:
            features_fpv_all = features_fpv_vis_only

        # Project first-person view features on to the map in egocentric frame
        grid_maps = self.map_projection(poses)
        self.prof.tick("proj_map")
        features_r = self.grid_sampler(features_fpv_all, grid_maps)

        # Obtain an ego-centric map mask of where we have new information
        ones_size = list(features_fpv_all.size())
        ones_size[1] = 1
        tmp_ones = empty_float_tensor(ones_size, self.is_cuda, self.cuda_device).fill_(1.0)
        new_coverages = self.grid_sampler(tmp_ones, grid_maps)

        # Make sure that new_coverage is a 0/1 mask (grid_sampler applies bilinear interpolation)
        new_coverages = new_coverages - torch.min(new_coverages)
        new_coverages = new_coverages / torch.max(new_coverages)

        self.prof.tick("gsample")

        if show != "":
            Presenter().show_image(images.data[0, 0:3], show + "_img", torch=True, scale=1, waitkey=1)
            Presenter().show_image(features_r.data[0, 0:3], show, torch=True, scale=6, waitkey=1)
            Presenter().show_image(new_coverages.data[0], show + "_covg", torch=True, scale=6, waitkey=1)

        self.prof.loop()
        self.prof.print_stats(10)

        return features_r, new_coverages
示例#27
0
    def plot_pts(self, image, pts):
        """
        :param image: CxHxW image
        :param pts: Nx2 points - (H,W) coords in the image
        :return:
        """
        image = image.cpu().data.numpy()
        image = image.transpose((1, 2, 0))
        pts = pts.cpu().data.numpy()
        image[:, :, 0] = 0.0
        for pt in pts:
            image[pt[0], pt[1], 0] = 1.0

        Presenter().show_image(image[:, :, 0:3], f"aux_class_2d:{self.name}", torch=False, waitkey=1, scale=8)
示例#28
0
    def _generate_mask(self):
        m = torch.zeros([self.map_size_px, self.map_size_px])
        c_x, c_y = self.map_size_px / 2
        for x in range(c_x - self.radius, c_x + self.radius):
            for y in range(c_y - self.radius, c_y + self.radius):
                dx = x - c_x
                dy = y - c_y
                angle = math.atan2(dy, dx)
                dst = math.sqrt(dy**2 + dx**2)
                if -self.hfov / 2 < angle < self.hfov / 2 and dst < self.radius:
                    m[c_x, c_y] = 1.0

        if False:
            Presenter().show_image(m, "init_pos_mask", scale=4, waitkey=True)
示例#29
0
    def __init__(self,
                 run_name="",
                 save_images=True,
                 entire_trajectory=True,
                 custom_instr=None,
                 aug_len=None):
        super(EvaluateBase, self).__init__()
        self.train_i, self.test_i, self.dev_i, corpus = get_all_instructions()
        self.all_i = {**self.train_i, **self.test_i, **self.dev_i}
        self.passing_distance = P.get_current_parameters(
        )["Units"]["passing_distance"]
        self.results = ResultsLandmarkSide()
        self.presenter = Presenter()
        self.run_name = run_name
        self.save_images = save_images
        self.entire_trajectory = entire_trajectory
        self.custom_instr = custom_instr
        self.aug_len = aug_len

        self.visible_map = {}

        self.hfov = P.get_current_parameters(
        )["ModelPVN"]["Stage1"]["cam_h_fov"]
示例#30
0
def dyn_gt_test():
    presenter = Presenter()
    train_instr, dev_instr, test_instr, corpus = get_all_instructions()
    all_instr = {**train_instr, **dev_instr, **test_instr}

    for i in range(10):
        path = load_path(i)
        segments = all_instr[i][0]["instructions"]
        for seg in segments:
            start_idx = seg["start_idx"]
            end_idx = seg["end_idx"]
            randInt = random.randint(10, 100)

            start_pose = Pose(path[start_idx] - randInt, 0)

            if end_idx - start_idx > 0:
                randInt = random.randint(10, 100)
                new_path = get_dynamic_ground_truth(
                    path[start_idx:end_idx], (path[start_idx] - randInt))
                new_path1 = get_dynamic_ground_truth_smooth(
                    path[start_idx:end_idx], (path[start_idx] - randInt))
                presenter.plot_path(
                    i, [path[start_idx:end_idx], new_path, new_path1])