Python MapBatchFillMissing.reset примеры использования

Язык программирования: Python

Пространство имен/Пакет: learning.modules.map_to_map.map_batch_fill_missing

Класс/Тип: MapBatchFillMissing

Метод/Функция: reset

Примеров на hotexamples.com: 4

Python MapBatchFillMissing.reset - 4 примера найдено. Это лучшие примеры Python кода для learning.modules.map_to_map.map_batch_fill_missing.MapBatchFillMissing.reset, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MapBatchFillMissing(4)

reset(4)

cuda(2)

Основные методы

MapBatchFillMissing (4)

reset (4)

cuda (2)

Пример #1

Показать файл

Файл: model_sm_action_gt.py Проект: hyzcn/drif

class ModelTrajectoryToAction(ModuleWithAuxiliaries):
    def __init__(self, run_name=""):

        super(ModelTrajectoryToAction, self).__init__()
        self.model_name = "lsvd_action"
        self.run_name = run_name
        self.writer = LoggingSummaryWriter(log_dir="runs/" + run_name)

        self.params = get_current_parameters()["ModelPVN"]
        self.aux_weights = get_current_parameters()["AuxWeights"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        # Common
        # --------------------------------------------------------------------------------------------------------------
        self.map_transform_w_to_s = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size=self.params["world_size_px"])

        self.map_transform_r_to_w = MapTransformerBase(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["global_map_size"],
            world_size=self.params["world_size_px"])

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                manual=self.params["manual_rule"],
                path_only=self.params["action_in_path_only"],
                recurrence=self.params["action_recurrence"])

        self.spatialsoftmax = SpatialSoftmax2d()
        self.gt_fill_missing = MapBatchFillMissing(
            self.params["local_map_size"], self.params["world_size_px"])

        # Don't freeze the trajectory to action weights, because it will be pre-trained during path-prediction training
        # and finetuned on all timesteps end-to-end
        enable_weight_saving(self.map_to_action,
                             "map_to_action",
                             alwaysfreeze=False,
                             neverfreeze=True)

        self.action_loss = ActionLoss()

        self.env_id = None
        self.seg_idx = None
        self.prev_instruction = None
        self.seq_step = 0
        self.get_act_start_pose = None
        self.gt_labels = None

    # TODO: Try to hide these in a superclass or something. They take up a lot of space:
    def cuda(self, device=None):
        ModuleWithAuxiliaries.cuda(self, device)
        self.map_to_action.cuda(device)
        self.action_loss.cuda(device)
        self.map_transform_w_to_s.cuda(device)
        self.map_transform_r_to_w.cuda(device)
        self.gt_fill_missing.cuda(device)
        return self

    def get_iter(self):
        return int(self.iter.data[0])

    def inc_iter(self):
        self.iter += 1

    def init_weights(self):
        self.map_to_action.init_weights()

    def reset(self):
        # TODO: This is error prone. Create a class StatefulModule, iterate submodules and reset all stateful modules
        super(ModelTrajectoryToAction, self).reset()
        self.map_transform_w_to_s.reset()
        self.map_transform_r_to_w.reset()
        self.gt_fill_missing.reset()

    def setEnvContext(self, context):
        print("Set env context to: " + str(context))
        self.env_id = context["env_id"]

    def start_segment_rollout(self):
        import rollout.run_metadata as md
        m_size = self.params["local_map_size"]
        w_size = self.params["world_size_px"]
        self.gt_labels = get_top_down_ground_truth_static_global(
            md.ENV_ID, md.START_IDX, md.END_IDX, m_size, m_size, w_size,
            w_size)
        self.seg_idx = md.SEG_IDX
        self.gt_labels = self.maybe_cuda(self.gt_labels)
        if self.params["clear_history"]:
            self.start_sequence()

    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        prof = SimpleProfiler(print=True)
        prof.tick(".")
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state
        state = Variable(none_padded_seq_to_tensor([state_np]))

        #print("Act: " + debug_untokenize_instruction(instruction))

        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        if first_step:
            self.get_act_start_pose = self.cam_poses_from_states(state[0:1])

        self.seq_step += 1

        # This is for training the policy to mimic the ground-truth state distribution with oracle actions
        # b_traj_gt_w_select = b_traj_ground_truth[b_plan_mask_t[:, np.newaxis, np.newaxis, np.newaxis].expand_as(b_traj_ground_truth)].view([-1] + gtsize)
        traj_gt_w = Variable(self.gt_labels)
        b_poses = self.cam_poses_from_states(state)
        # TODO: These source and dest should go as arguments to get_maps (in forward pass not params)
        transformer = MapTransformerBase(
            source_map_size=self.params["global_map_size"],
            world_size=self.params["world_size_px"],
            dest_map_size=self.params["local_map_size"])
        self.maybe_cuda(transformer)
        transformer.set_maps(traj_gt_w, None)
        traj_gt_r, _ = transformer.get_maps(b_poses)
        self.clear_inputs("traj_gt_r_select")
        self.clear_inputs("traj_gt_w_select")
        self.keep_inputs("traj_gt_r_select", traj_gt_r)
        self.keep_inputs("traj_gt_w_select", traj_gt_w)

        action = self(traj_gt_r, firstseg=[self.seq_step == 1])

        output_action = action.squeeze().data.cpu().numpy()

        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0
        output_action[3] = output_stop

        return output_action

    def deterministic_action(self, action_mean, action_std, stop_prob):
        batch_size = action_mean.size(0)
        action = Variable(
            empty_float_tensor((batch_size, 4), self.is_cuda,
                               self.cuda_device))
        action[:, 0:3] = action_mean[:, 0:3]
        action[:, 3] = stop_prob
        return action

    def sample_action(self, action_mean, action_std, stop_prob):
        action = torch.normal(action_mean, action_std)
        stop = torch.bernoulli(stop_prob)
        return action, stop

    # This is called before beginning an execution sequence
    def start_sequence(self):
        self.seq_step = 0
        self.reset()
        print("RESETTED!")
        return

    def cam_poses_from_states(self, states):
        cam_pos = states[:, 9:12]
        cam_rot = states[:, 12:16]
        pose = Pose(cam_pos, cam_rot)
        return pose

    def save(self, epoch):
        filename = self.params[
            "map_to_action_file"] + "_" + self.run_name + "_" + str(epoch)
        save_pytorch_model(self.map_to_action, filename)
        print("Saved action model to " + filename)

    def forward(self, traj_gt_r, firstseg=None):
        """
        :param images: BxCxHxW batch of images (observations)
        :param states: BxK batch of drone states
        :param instructions: BxM LongTensor where M is the maximum length of any instruction
        :param instr_lengths: list of len B of integers, indicating length of each instruction
        :param has_obs: list of booleans of length B indicating whether the given element in the sequence has an observation
        :param yield_semantic_maps: If true, will not compute actions (full model), but return the semantic maps that
            were built along the way in response to the images. This is ugly, but allows code reuse
        :return:
        """
        action_pred = self.map_to_action(traj_gt_r,
                                         None,
                                         fistseg_mask=firstseg)
        out_action = self.deterministic_action(action_pred[:, 0:3], None,
                                               action_pred[:, 3])
        self.keep_inputs("action", out_action)
        self.prof.tick("map_to_action")

        return out_action

    def maybe_cuda(self, tensor):
        if self.is_cuda:
            if False:
                if type(tensor) is Variable:
                    tensor.data.pin_memory()
                elif type(tensor) is Pose:
                    pass
                elif type(tensor) is torch.FloatTensor:
                    tensor.pin_memory()
            return tensor.cuda()
        else:
            return tensor

    def cuda_var(self, tensor):
        return cuda_var(tensor, self.is_cuda, self.cuda_device)

    # Forward pass for training (with batch optimizations
    def sup_loss_on_batch(self, batch, eval):
        self.prof.tick("out")

        action_loss_total = Variable(
            empty_float_tensor([1], self.is_cuda, self.cuda_device))

        if batch is None:
            print("Skipping None Batch")
            return action_loss_total

        actions = self.maybe_cuda(batch["actions"])
        states = self.maybe_cuda(batch["states"])

        firstseg_mask = batch["firstseg_mask"]

        # Auxiliary labels
        traj_ground_truth_select = self.maybe_cuda(batch["traj_ground_truth"])
        # stops = self.maybe_cuda(batch["stops"])
        metadata = batch["md"]
        batch_size = actions.size(0)
        count = 0

        # Loop thru batch
        for b in range(batch_size):
            seg_idx = -1

            self.reset()

            self.prof.tick("out")
            b_seq_len = len_until_nones(metadata[b])

            # TODO: Generalize this
            # Slice the data according to the sequence length
            b_metadata = metadata[b][:b_seq_len]
            b_actions = actions[b][:b_seq_len]
            b_traj_ground_truth_select = traj_ground_truth_select[b]
            b_states = states[b][:b_seq_len]

            self.keep_inputs("traj_gt_global_select",
                             b_traj_ground_truth_select)

            #b_firstseg = get_obs_mask_segstart(b_metadata)
            b_firstseg = firstseg_mask[b][:b_seq_len]

            # ----------------------------------------------------------------------------
            # Optional Auxiliary Inputs
            # ----------------------------------------------------------------------------
            gtsize = list(b_traj_ground_truth_select.size())[1:]
            b_poses = self.cam_poses_from_states(b_states)
            # TODO: These source and dest should go as arguments to get_maps (in forward pass not params)
            transformer = MapTransformerBase(
                source_map_size=self.params["global_map_size"],
                world_size=self.params["world_size_px"],
                dest_map_size=self.params["local_map_size"])
            self.maybe_cuda(transformer)
            transformer.set_maps(b_traj_ground_truth_select, None)
            traj_gt_local_select, _ = transformer.get_maps(b_poses)
            self.keep_inputs("traj_gt_r_select", traj_gt_local_select)
            self.keep_inputs("traj_gt_w_select", b_traj_ground_truth_select)

            # ----------------------------------------------------------------------------

            self.prof.tick("inputs")

            actions = self(traj_gt_local_select, firstseg=b_firstseg)
            action_losses, _ = self.action_loss(b_actions,
                                                actions,
                                                batchreduce=False)
            action_losses = self.action_loss.batch_reduce_loss(action_losses)
            action_loss = self.action_loss.reduce_loss(action_losses)
            action_loss_total = action_loss
            count += b_seq_len

            self.prof.tick("loss")

        action_loss_avg = action_loss_total / (count + 1e-9)
        prefix = self.model_name + ("/eval" if eval else "/train")
        self.writer.add_scalar(prefix + "/action_loss",
                               action_loss_avg.data.cpu()[0], self.get_iter())

        self.prof.tick("out")

        prefix = self.model_name + ("/eval" if eval else "/train")
        self.writer.add_dict(prefix, get_current_meters(), self.get_iter())

        self.inc_iter()

        self.prof.tick("summaries")
        self.prof.loop()
        self.prof.print_stats(1)

        return action_loss_avg

    def get_dataset(self, data=None, envs=None, dataset_name=None, eval=False):
        # TODO: Maybe use eval here
        data_sources = []
        data_sources.append(aup.PROVIDER_TRAJECTORY_GROUND_TRUTH_STATIC)
        return SegmentDataset(data=data,
                              env_list=envs,
                              dataset_name=dataset_name,
                              aux_provider_names=data_sources,
                              segment_level=True)

Пример #2

Показать файл

Файл: model_pvn_stage2_actor_critic.py Проект: pianpwk/drif

class PVN_Stage2_ActorCritic(nn.Module):

    def __init__(self, run_name="", model_instance_name="only"):

        super(PVN_Stage2_ActorCritic, self).__init__()
        self.model_name = "pvn_stage2_ac"
        self.run_name = run_name
        self.instance_name = model_instance_name
        self.writer = LoggingSummaryWriter(log_dir=f"{get_logging_dir()}/runs/{run_name}/{self.instance_name}")

        self.params_s1 = get_current_parameters()["ModelPVN"]["Stage1"]
        self.params_s2 = get_current_parameters()["ModelPVN"]["Stage2"]
        self.params = get_current_parameters()["ModelPVN"]["ActorCritic"]

        self.oob = self.params_s1.get("clip_observability")
        self.ignore_struct = self.params.get("ignore_structured_input", False)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        self.tensor_store = KeyTensorStore()

        # Common
        # --------------------------------------------------------------------------------------------------------------
        # Wrap the Stage1 model so that it becomes invisible to PyTorch, get_parameters etc, and doesn't get optimized

        self.map_transform_w_to_r = MapTransformer(source_map_size=self.params_s1["global_map_size"],
                                                       dest_map_size=self.params_s1["local_map_size"],
                                                       world_size_px=self.params_s1["world_size_px"],
                                                       world_size_m=self.params_s1["world_size_m"])

        self.action_base = PVN_Stage2_RLBase(map_channels=self.params_s2["map_to_act_channels"],
                                             map_struct_channels=self.params_s2["map_structure_channels"],
                                             crop_size=self.params_s2["crop_size"],
                                             map_size=self.params_s1["local_map_size"],
                                             h1=self.params["h1"], h2=self.params["h2"],
                                             structure_h1=self.params["structure_h1"],
                                             obs_dim=self.params["obs_dim"],
                                             name="action")

        self.value_base = PVN_Stage2_RLBase(map_channels=self.params_s2["map_to_act_channels"],
                                            map_struct_channels=self.params_s2["map_structure_channels"],
                                            crop_size=self.params_s2["crop_size"],
                                            map_size=self.params_s1["local_map_size"],
                                            h1=self.params["h1"], h2=self.params["h2"],
                                            structure_h1=self.params["structure_h1"],
                                            obs_dim=self.params["obs_dim"],
                                            name="value")

        self.action_head = PVN_Stage2_ActionHead(h2=self.params["h2"])
        self.value_head = PVN_Stage2_ValueHead(h2=self.params["h2"])

        self.spatialsoftmax = SpatialSoftmax2d()
        self.batch_select = MapBatchSelect()
        self.gt_fill_missing = MapBatchFillMissing(
            self.params_s1["local_map_size"],
            self.params_s1["world_size_px"],
            self.params_s1["world_size_m"])

        self.env_id = None
        self.seg_idx = None
        self.prev_instruction = None
        self.seq_step = 0
        self.get_act_start_pose = None
        self.gt_labels = None

        # PPO interface:
        self.is_recurrent = False

    def make_picklable(self):
        self.writer = DummySummaryWriter()

    def init_weights(self):
        self.action_base.init_weights()
        self.value_base.init_weights()
        self.action_head.init_weights()
        self.value_head.init_weights()

    def get_iter(self):
        return int(self.iter.data[0])

    def inc_iter(self):
        self.iter += 1

    def reset(self):
        self.tensor_store.reset()
        self.gt_fill_missing.reset()

    def setEnvContext(self, context):
        print("Set env context to: " + str(context))
        self.env_id = context["env_id"]

    def cam_poses_from_states(self, states):
        cam_pos = states[:, 9:12]
        cam_rot = states[:, 12:16]
        pose = Pose(cam_pos, cam_rot)
        return pose

    def forward(self, v_dist_r, map_structure_r, eval=False, summarize=False):

        ACPROF = False
        prof = SimpleProfiler(print=ACPROF, torch_sync=ACPROF)

        if isinstance(v_dist_r, list):
            inners = torch.cat([m.inner_distribution for m in v_dist_r], dim=0)
            outers = torch.cat([m.outer_prob_mass for m in v_dist_r], dim=0)
            v_dist_r = Partial2DDistribution(inners, outers)
            map_structure_r = torch.stack(map_structure_r, dim=0)

        if self.ignore_struct:
            map_structure_r = torch.zeros_like(map_structure_r)

        prof.tick("ac:inputs")
        avec = self.action_base(v_dist_r, map_structure_r)
        prof.tick("ac:networks - action_base")
        vvec = self.value_base(v_dist_r, map_structure_r)
        prof.tick("ac:networks - value_base")
        action_scores = self.action_head(avec)
        prof.tick("ac:networks - action_head")
        value_pred = self.value_head(vvec)
        prof.tick("ac:networks - value head")

        xvel_mean = action_scores[:, 0]
        xvel_std = F.softplus(action_scores[:, 2])
        xvel_dist = FixedNormal(xvel_mean, xvel_std)

        yawrate_mean = action_scores[:, 3]
        yawrate_std = F.softplus(action_scores[:, 5])
        yawrate_dist = FixedNormal(yawrate_mean, yawrate_std)

        # Skew it towards not stopping in the beginning
        stop_logits = action_scores[:, 6]
        stop_dist = FixedBernoulli(logits=stop_logits)

        prof.tick("ac:distributions")
        prof.loop()
        prof.print_stats(1)

        return xvel_dist, yawrate_dist, stop_dist, value_pred

    def sample_action(self, xvel_dist, yawrate_dist, stop_dist):
        # Sample action from the predicted distributions
        xvel_sample = xvel_dist.sample()
        yawrate_sample = yawrate_dist.sample()
        stop = stop_dist.sample()
        return xvel_sample, yawrate_sample, stop

    def mode_action(self, xvel_dist, yawrate_dist, stop_dist):
        xvel_sample = xvel_dist.mode()
        yawrate_sample = yawrate_dist.mode()
        stop = stop_dist.mean
        return xvel_sample, yawrate_sample, stop

    def action_logprob(self, xvel_dist, yawrate_dist, stop_dist, xvel, yawrate, stop):
        xvel_logprob = xvel_dist.log_prob(xvel)
        yawrate_logprob = yawrate_dist.log_prob(yawrate)
        stop_logprob = stop_dist.log_prob(stop)
        return xvel_logprob, yawrate_logprob, stop_logprob

    def evaluate_actions(self, maps_r_batch, map_cov_r_batch, hidden_states_batch, masks_batch, actions_batch, global_step):
        xvel_dist, yawrate_dist, stop_dist, value_pred = self(maps_r_batch, map_cov_r_batch, summarize=True)

        # X-vel and yaw rate. Drop sideways velocity
        x_vel = actions_batch[:, 0]
        yawrate = actions_batch[:, 2]
        stops = actions_batch[:, 3]

        # Get action log-probabilities
        x_vel_log_probs, yawrate_log_probs, stop_log_probs = \
            self.action_logprob(xvel_dist, yawrate_dist, stop_dist, x_vel, yawrate, stops)

        x_vel_entropy = xvel_dist.entropy().mean()
        yawrate_entropy = yawrate_dist.entropy().mean()
        stop_entropy = stop_dist.entropy().mean()

        entropy = x_vel_entropy + yawrate_entropy + stop_entropy
        log_probs = x_vel_log_probs + yawrate_log_probs + stop_log_probs

        i = self.get_iter()
        if i % 17 == 0:
            stop_probs = stop_dist.probs.detach().cpu().mean()
            self.writer.add_scalar(f"{self.model_name}/stopprob", stop_probs.item(), i)

            if ACT_DIST == "normal":
                x_vel_mean = xvel_dist.mean.detach().cpu().mean(dim=0)
                x_vel_std = xvel_dist.stddev.detach().cpu().mean(dim=0)
                yawrate_mean = yawrate_dist.mean.detach().cpu().mean(dim=0)
                yawrate_std = yawrate_dist.stddev.detach().cpu().mean(dim=0)
                self.writer.add_scalar(f"{self.model_name}/x_vel_mean", x_vel_mean.item(), i)
                self.writer.add_scalar(f"{self.model_name}/x_vel_std", x_vel_std.item(), i)
                self.writer.add_scalar(f"{self.model_name}/yawrate_mean", yawrate_mean.item(), i)
                self.writer.add_scalar(f"{self.model_name}/yawrate_std", yawrate_std.item(), i)

            elif ACT_DIST == "beta":
                x_vel_alpha = xvel_dist.concentration0.detach().cpu().mean(dim=0)
                x_vel_beta = xvel_dist.concentration1.detach().cpu().mean(dim=0)
                yawrate_alpha = yawrate_dist.concentration0.detach().cpu().mean(dim=0)
                yawrate_beta = yawrate_dist.concentration1.detach().cpu().mean(dim=0)
                self.writer.add_scalar(f"{self.model_name}/vel_x_alpha", x_vel_alpha.item(), i)
                self.writer.add_scalar(f"{self.model_name}/vel_x_beta", x_vel_beta.item(), i)
                self.writer.add_scalar(f"{self.model_name}/yaw_rate_alpha", yawrate_alpha.item(), i)
                self.writer.add_scalar(f"{self.model_name}/yaw_rate_beta", yawrate_beta.item(), i)

        self.inc_iter()

        return value_pred, log_probs, entropy, None

    def cuda_var(self, tensor):
        return tensor.to(next(self.parameters()).device)

Пример #3

Показать файл

Файл: model_sm_rss_global.py Проект: dxsun/drif

class ModelTrajectoryTopDown(ModuleWithAuxiliaries):

    def __init__(self, run_name="", model_class=MODEL_RSS,
                 aux_class_features=False, aux_grounding_features=False,
                 aux_class_map=False, aux_grounding_map=False, aux_goal_map=False,
                 aux_lang=False, aux_traj=False, rot_noise=False, pos_noise=False):

        super(ModelTrajectoryTopDown, self).__init__()
        self.model_name = "sm_trajectory" + str(model_class)
        self.model_class = model_class
        print("Init model of type: ", str(model_class))
        self.run_name = run_name
        self.writer = LoggingSummaryWriter(log_dir="runs/" + run_name)

        self.params = get_current_parameters()["Model"]
        self.aux_weights = get_current_parameters()["AuxWeights"]

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        # Auxiliary Objectives
        self.use_aux_class_features = aux_class_features
        self.use_aux_grounding_features = aux_grounding_features
        self.use_aux_class_on_map = aux_class_map
        self.use_aux_grounding_on_map = aux_grounding_map
        self.use_aux_goal_on_map = aux_goal_map
        self.use_aux_lang = aux_lang
        self.use_aux_traj_on_map = aux_traj
        self.use_aux_reg_map = self.aux_weights["regularize_map"]

        self.use_rot_noise = rot_noise
        self.use_pos_noise = pos_noise


        # Path-pred FPV model definition
        # --------------------------------------------------------------------------------------------------------------

        self.img_to_features_w = FPVToGlobalMap(
            source_map_size=self.params["global_map_size"], world_size_px=self.params["world_size_px"], world_size=self.params["world_size_m"],
            res_channels=self.params["resnet_channels"], map_channels=self.params["feature_channels"],
            img_w=self.params["img_w"], img_h=self.params["img_h"], img_dbg=IMG_DBG)

        self.map_accumulator_w = LeakyIntegratorGlobalMap(source_map_size=self.params["global_map_size"], world_in_map_size=self.params["world_size_px"])

        # Pre-process the accumulated map to do language grounding if necessary - in the world reference frame
        if self.use_aux_grounding_on_map and not self.use_aux_grounding_features:
            self.map_processor_a_w = LangFilterMapProcessor(
                source_map_size=self.params["global_map_size"],
                world_size=self.params["world_size_px"],
                embed_size=self.params["emb_size"],
                in_channels=self.params["feature_channels"],
                out_channels=self.params["relevance_channels"],
                spatial=False, cat_out=True)
        else:
            self.map_processor_a_w = IdentityMapProcessor(source_map_size=self.params["global_map_size"], world_size=self.params["world_size_px"])

        if self.use_aux_goal_on_map:
            self.map_processor_b_r = LangFilterMapProcessor(source_map_size=self.params["local_map_size"],
                                                            world_size=self.params["world_size_px"],
                                                            embed_size=self.params["emb_size"],
                                                            in_channels=self.params["relevance_channels"],
                                                            out_channels=self.params["goal_channels"],
                                                            spatial=True, cat_out=True)
        else:
            self.map_processor_b_r = IdentityMapProcessor(source_map_size=self.params["local_map_size"],
                                                          world_size=self.params["world_size_px"])

        pred_channels = self.params["goal_channels"] + self.params["relevance_channels"]

        # Common
        # --------------------------------------------------------------------------------------------------------------

        # Sentence Embedding
        self.sentence_embedding = SentenceEmbeddingSimple(
            self.params["word_emb_size"], self.params["emb_size"], self.params["emb_layers"])

        self.map_transform_w_to_r = MapTransformerBase(source_map_size=self.params["global_map_size"],
                                                       dest_map_size=self.params["local_map_size"],
                                                       world_size=self.params["world_size_px"])
        self.map_transform_r_to_w = MapTransformerBase(source_map_size=self.params["local_map_size"],
                                                       dest_map_size=self.params["global_map_size"],
                                                       world_size=self.params["world_size_px"])

        # Batch select is used to drop and forget semantic maps at those timestaps that we're not planning in
        self.batch_select = MapBatchSelect()
        # Since we only have path predictions for some timesteps (the ones not dropped above), we use this to fill
        # in the missing pieces by reorienting the past trajectory prediction into the frame of the current timestep
        self.map_batch_fill_missing = MapBatchFillMissing(self.params["local_map_size"], self.params["world_size_px"])

        # Passing true to freeze will freeze these weights regardless of whether they've been explicitly reloaded or not
        enable_weight_saving(self.sentence_embedding, "sentence_embedding", alwaysfreeze=False)

        # Output an action given the global semantic map
        if self.params["map_to_action"] == "downsample2":
            self.map_to_action = EgoMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"])

        elif self.params["map_to_action"] == "cropped":
            self.map_to_action = CroppedMapToActionTriplet(
                map_channels=self.params["map_to_act_channels"],
                map_size=self.params["local_map_size"],
                other_features_size=self.params["emb_size"]
            )

        # Don't freeze the trajectory to action weights, because it will be pre-trained during path-prediction training
        # and finetuned on all timesteps end-to-end
        enable_weight_saving(self.map_to_action, "map_to_action", alwaysfreeze=False, neverfreeze=True)

        # Auxiliary Objectives
        # --------------------------------------------------------------------------------------------------------------

        # We add all auxiliaries that are necessary. The first argument is the auxiliary name, followed by parameters,
        # followed by variable number of names of inputs. ModuleWithAuxiliaries will automatically collect these inputs
        # that have been saved with keep_auxiliary_input() during execution
        if aux_class_features:
            self.add_auxiliary(ClassAuxiliary2D("aux_class", None,  self.params["feature_channels"], self.params["num_landmarks"], self.params["dropout"],
                                                "fpv_features", "lm_pos_fpv", "lm_indices"))
        if aux_grounding_features:
            self.add_auxiliary(ClassAuxiliary2D("aux_ground", None, self.params["relevance_channels"], 2, self.params["dropout"],
                                                "fpv_features_g", "lm_pos_fpv", "lm_mentioned"))
        if aux_class_map:
            self.add_auxiliary(ClassAuxiliary2D("aux_class_map", self.params["world_size_px"], self.params["feature_channels"], self.params["num_landmarks"], self.params["dropout"],
                                                "map_s_w_select", "lm_pos_map_select", "lm_indices_select"))
        if aux_grounding_map:
            self.add_auxiliary(ClassAuxiliary2D("aux_grounding_map", self.params["world_size_px"], self.params["relevance_channels"], 2, self.params["dropout"],
                                                "map_a_w_select", "lm_pos_map_select", "lm_mentioned_select"))
        if aux_goal_map:
            self.add_auxiliary(GoalAuxiliary2D("aux_goal_map", self.params["goal_channels"], self.params["world_size_px"],
                                               "map_b_w", "goal_pos_map"))
        # RSS model uses templated data for landmark and side prediction
        if self.use_aux_lang and self.params["templates"]:
            self.add_auxiliary(ClassAuxiliary("aux_lang_lm", self.params["emb_size"], self.params["num_landmarks"], 1,
                                                "sentence_embed", "lm_mentioned_tplt"))
            self.add_auxiliary(ClassAuxiliary("aux_lang_side", self.params["emb_size"], self.params["num_sides"], 1,
                                                "sentence_embed", "side_mentioned_tplt"))
        # CoRL model uses alignment-model groundings
        elif self.use_aux_lang:
            # one output for each landmark, 2 classes per output. This is for finetuning, so use the embedding that's gonna be fine tuned
            self.add_auxiliary(ClassAuxiliary("aux_lang_lm_nl", self.params["emb_size"], 2, self.params["num_landmarks"],
                                                "sentence_embed", "lang_lm_mentioned"))
        if self.use_aux_traj_on_map:
            self.add_auxiliary(PathAuxiliary2D("aux_path", "map_b_r_select", "traj_gt_r_select"))

        if self.use_aux_reg_map:
            self.add_auxiliary(FeatureRegularizationAuxiliary2D("aux_regularize_features", None, "l1",
                                                                "map_s_w_select", "lm_pos_map_select"))

        self.goal_good_criterion = GoalPredictionGoodCriterion(ok_distance=3.2)
        self.goal_acc_meter = MovingAverageMeter(10)

        self.print_auxiliary_info()

        self.action_loss = ActionLoss()

        self.env_id = None
        self.prev_instruction = None
        self.seq_step = 0

    # TODO: Try to hide these in a superclass or something. They take up a lot of space:
    def cuda(self, device=None):
        ModuleWithAuxiliaries.cuda(self, device)
        self.sentence_embedding.cuda(device)
        self.map_accumulator_w.cuda(device)
        self.map_processor_a_w.cuda(device)
        self.map_processor_b_r.cuda(device)
        self.img_to_features_w.cuda(device)
        self.map_to_action.cuda(device)
        self.action_loss.cuda(device)
        self.map_batch_fill_missing.cuda(device)
        self.map_transform_w_to_r.cuda(device)
        self.map_transform_r_to_w.cuda(device)
        self.batch_select.cuda(device)
        self.map_batch_fill_missing.cuda(device)
        return self

    def get_iter(self):
        return int(self.iter.data[0])

    def inc_iter(self):
        self.iter += 1

    def init_weights(self):
        self.img_to_features_w.init_weights()
        self.map_accumulator_w.init_weights()
        self.sentence_embedding.init_weights()
        self.map_to_action.init_weights()
        self.map_processor_a_w.init_weights()
        self.map_processor_b_r.init_weights()

    def reset(self):
        # TODO: This is error prone. Create a class StatefulModule, iterate submodules and reset all stateful modules
        super(ModelTrajectoryTopDown, self).reset()
        self.sentence_embedding.reset()
        self.img_to_features_w.reset()
        self.map_accumulator_w.reset()
        self.map_processor_a_w.reset()
        self.map_processor_b_r.reset()
        self.map_transform_w_to_r.reset()
        self.map_transform_r_to_w.reset()
        self.map_batch_fill_missing.reset()
        self.prev_instruction = None

    def setEnvContext(self, context):
        print("Set env context to: " + str(context))
        self.env_id = context["env_id"]

    def save_viz(self, images_in):
        imsave(get_viz_dir() + "fpv_" + str(self.seq_step) + ".png", images_in)
        features_cam = self.get_inputs_batch("fpv_features")[-1, 0, 0:3]
        save_tensor_as_img(features_cam, "F_c", self.env_id)
        feature_map_torch = self.get_inputs_batch("f_w")[-1, 0, 0:3]
        save_tensor_as_img(feature_map_torch, "F_w", self.env_id)
        coverage_map_torch = self.get_inputs_batch("m_w")[-1, 0, 0:3]
        save_tensor_as_img(coverage_map_torch, "M_w", self.env_id)
        semantic_map_torch = self.get_inputs_batch("map_s_w_select")[-1, 0, 0:3]
        save_tensor_as_img(semantic_map_torch, "S_w", self.env_id)
        relmap_torch = self.get_inputs_batch("map_a_w_select")[-1, 0, 0:3]
        save_tensor_as_img(relmap_torch, "R_w", self.env_id)
        relmap_r_torch = self.get_inputs_batch("map_a_r_select")[-1, 0, 0:3]
        save_tensor_as_img(relmap_r_torch, "R_r", self.env_id)
        goalmap_torch = self.get_inputs_batch("map_b_w_select")[-1, 0, 0:3]
        save_tensor_as_img(goalmap_torch, "G_w", self.env_id)
        goalmap_r_torch = self.get_inputs_batch("map_b_r_select")[-1, 0, 0:3]
        save_tensor_as_img(goalmap_r_torch, "G_r", self.env_id)

        action = self.get_inputs_batch("action")[-1].data.cpu().squeeze().numpy()
        action_fname = self.get_viz_dir() + "action_" + str(self.seq_step) + ".png"
        Presenter().save_action(action, action_fname, "")

    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc)

        # Save materials for paper and presentation
        if False:
            self.save_viz(images_np_pure)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > 0.5 else 0
        output_action[3] = output_stop

        return output_action

    def deterministic_action(self, action_mean, action_std, stop_prob):
        batch_size = action_mean.size(0)
        action = Variable(empty_float_tensor((batch_size, 4), self.is_cuda, self.cuda_device))
        action[:, 0:3] = action_mean[:, 0:3]
        action[:, 3] = stop_prob
        return action

    def sample_action(self, action_mean, action_std, stop_prob):
        action = torch.normal(action_mean, action_std)
        stop = torch.bernoulli(stop_prob)
        return action, stop

    # This is called before beginning an execution sequence
    def start_sequence(self):
        self.seq_step = 0
        self.reset()
        print("RESETTED!")
        return

    # TODO: Move this somewhere and standardize
    def cam_poses_from_states(self, states):
        cam_pos = states[:, 9:12]
        cam_rot = states[:, 12:16]

        pos_variance = 0
        rot_variance = 0
        if self.use_pos_noise:
            pos_variance = self.params["noisy_pos_variance"]
        if self.use_rot_noise:
            rot_variance = self.params["noisy_rot_variance"]

        pose = Pose(cam_pos, cam_rot)
        if self.use_pos_noise or self.use_rot_noise:
            pose = get_noisy_poses_torch(pose, pos_variance, rot_variance, cuda=self.is_cuda, cuda_device=self.cuda_device)
        return pose

    def forward(self, images, states, instructions, instr_lengths, has_obs=None, plan=None, save_maps_only=False, pos_enc=None, noisy_poses=None):
        """
        :param images: BxCxHxW batch of images (observations)
        :param states: BxK batch of drone states
        :param instructions: BxM LongTensor where M is the maximum length of any instruction
        :param instr_lengths: list of len B of integers, indicating length of each instruction
        :param has_obs: list of booleans of length B indicating whether the given element in the sequence has an observation
        :param yield_semantic_maps: If true, will not compute actions (full model), but return the semantic maps that
            were built along the way in response to the images. This is ugly, but allows code reuse
        :return:
        """
        cam_poses = self.cam_poses_from_states(states)
        g_poses = None#[None for pose in cam_poses]
        self.prof.tick("out")

        #print("Trn: " + debug_untokenize_instruction(instructions[0].data[:instr_lengths[0]]))

        # Calculate the instruction embedding
        if instructions is not None:
            # TODO: Take batch of instructions and their lengths, return batch of embeddings. Store the last one as internal state
            sent_embeddings = self.sentence_embedding(instructions, instr_lengths)
            self.keep_inputs("sentence_embed", sent_embeddings)
        else:
            sent_embeddings = self.sentence_embedding.get()

        self.prof.tick("embed")

        # Extract and project features onto the egocentric frame for each image
        features_w, coverages_w = self.img_to_features_w(images, cam_poses, sent_embeddings, self, show="")
        self.prof.tick("img_to_map_frame")
        self.keep_inputs("f_w", features_w)
        self.keep_inputs("m_w", coverages_w)

        # Accumulate the egocentric features in a global map
        maps_w = self.map_accumulator_w(features_w, coverages_w, add_mask=has_obs, show="acc" if IMG_DBG else "")
        map_poses_w = g_poses

        # TODO: Maybe keep maps_w if necessary
        #self.keep_inputs("map_sm_local", maps_m)
        self.prof.tick("map_accumulate")

        # Throw away those timesteps that don't correspond to planning timesteps
        maps_w_select, map_poses_w_select, cam_poses_select, noisy_poses_select, _, sent_embeddings_select, pos_enc = \
            self.batch_select(maps_w, map_poses_w, cam_poses, noisy_poses, None, sent_embeddings, pos_enc, plan)

        # Only process the maps on planning timesteps
        if len(maps_w_select) > 0:
            self.keep_inputs("map_s_w_select", maps_w_select)
            self.prof.tick("batch_select")

            # Process the map via the two map_procesors
            # Do grounding of objects in the map chosen to do so
            maps_w_select, map_poses_w_select = self.map_processor_a_w(maps_w_select, sent_embeddings_select, map_poses_w_select, show="")
            self.keep_inputs("map_a_w_select", maps_w_select)

            self.prof.tick("map_proc_gnd")

            self.map_transform_w_to_r.set_maps(maps_w_select, map_poses_w_select)
            maps_m_select, map_poses_m_select = self.map_transform_w_to_r.get_maps(cam_poses_select)

            self.keep_inputs("map_a_r_select", maps_w_select)
            self.prof.tick("transform_w_to_r")

            self.keep_inputs("map_a_r_perturbed_select", maps_m_select)

            self.prof.tick("map_perturb")

            # Include positional encoding for path prediction
            if pos_enc is not None:
                sent_embeddings_pp = torch.cat([sent_embeddings_select, pos_enc.unsqueeze(1)], dim=1)
            else:
                sent_embeddings_pp = sent_embeddings_select

            # Process the map via the two map_procesors (e.g. predict the trajectory that we'll be taking)
            maps_m_select, map_poses_m_select = self.map_processor_b_r(maps_m_select, sent_embeddings_pp, map_poses_m_select)

            self.keep_inputs("map_b_r_select", maps_m_select)

            if True:
                self.map_transform_r_to_w.set_maps(maps_m_select, map_poses_m_select)
                maps_b_w_select, _ = self.map_transform_r_to_w.get_maps(None)
                self.keep_inputs("map_b_w_select", maps_b_w_select)

            self.prof.tick("map_proc_b")

        else:
            maps_m_select = None

        maps_m, map_poses_m = self.map_batch_fill_missing(maps_m_select, cam_poses, plan, show="")
        self.keep_inputs("map_b_r", maps_m)
        self.prof.tick("map_fill_missing")

        # Keep global maps for auxiliary objectives if necessary
        if self.input_required("map_b_w"):
            maps_b, _ = self.map_processor_b_r.get_maps(g_poses)
            self.keep_inputs("map_b_w", maps_b)

        self.prof.tick("keep_global_maps")

        if run_metadata.IS_ROLLOUT:
            pass
            #Presenter().show_image(maps_m.data[0, 0:3], "plan_map_now", torch=True, scale=4, waitkey=1)
            #Presenter().show_image(maps_w.data[0, 0:3], "sm_map_now", torch=True, scale=4, waitkey=1)
        self.prof.tick("viz")

        # Output the final action given the processed map
        action_pred = self.map_to_action(maps_m, sent_embeddings)
        out_action = self.deterministic_action(action_pred[:, 0:3], None, action_pred[:, 3])

        self.keep_inputs("action", out_action)
        self.prof.tick("map_to_action")

        return out_action

    # TODO: The below two methods seem to do the same thing
    def maybe_cuda(self, tensor):
        if self.is_cuda:
            return tensor.cuda()
        else:
            return tensor

    def cuda_var(self, tensor):
        return cuda_var(tensor, self.is_cuda, self.cuda_device)

    # Forward pass for training (with batch optimizations
    def sup_loss_on_batch(self, batch, eval):
        self.prof.tick("out")

        action_loss_total = Variable(empty_float_tensor([1], self.is_cuda, self.cuda_device))

        if batch is None:
            print("Skipping None Batch")
            return action_loss_total

        images = self.maybe_cuda(batch["images"])

        instructions = self.maybe_cuda(batch["instr"])
        instr_lengths = batch["instr_len"]
        states = self.maybe_cuda(batch["states"])
        actions = self.maybe_cuda(batch["actions"])

        # Auxiliary labels
        lm_pos_fpv = batch["lm_pos_fpv"]
        lm_pos_map = batch["lm_pos_map"]
        lm_indices = batch["lm_indices"]
        goal_pos_map = batch["goal_loc"]

        TEMPLATES = True
        if TEMPLATES:
            lm_mentioned_tplt = batch["lm_mentioned_tplt"]
            side_mentioned_tplt = batch["side_mentioned_tplt"]
        else:
            lm_mentioned = batch["lm_mentioned"]
            lang_lm_mentioned = batch["lang_lm_mentioned"]

        # stops = self.maybe_cuda(batch["stops"])
        masks = self.maybe_cuda(batch["masks"])
        # This is the first-timestep metadata
        metadata = batch["md"]

        seq_len = images.size(1)
        batch_size = images.size(0)
        count = 0
        correct_goal_count = 0
        goal_count = 0

        # Loop thru batch
        for b in range(batch_size):
            seg_idx = -1

            self.reset()

            self.prof.tick("out")
            b_seq_len = len_until_nones(metadata[b])

            # TODO: Generalize this
            # Slice the data according to the sequence length
            b_metadata = metadata[b][:b_seq_len]
            b_images = images[b][:b_seq_len]
            b_instructions = instructions[b][:b_seq_len]
            b_instr_len = instr_lengths[b][:b_seq_len]
            b_states = states[b][:b_seq_len]
            b_actions = actions[b][:b_seq_len]
            b_lm_pos_fpv = lm_pos_fpv[b][:b_seq_len]
            b_lm_pos_map = lm_pos_map[b][:b_seq_len]
            b_lm_indices = lm_indices[b][:b_seq_len]
            b_goal_pos = goal_pos_map[b][:b_seq_len]
            if not TEMPLATES:
                b_lang_lm_mentioned = lang_lm_mentioned[b][:b_seq_len]
                b_lm_mentioned = lm_mentioned[b][:b_seq_len]

            b_lm_pos_map = [self.cuda_var(s.long()) if s is not None else None for s in b_lm_pos_map]
            b_lm_pos_fpv = [self.cuda_var((s / RESNET_FACTOR).long()) if s is not None else None for s in b_lm_pos_fpv]
            b_lm_indices = [self.cuda_var(s) if s is not None else None for s in b_lm_indices]
            b_goal_pos = self.cuda_var(b_goal_pos)
            if not TEMPLATES:
                b_lang_lm_mentioned = self.cuda_var(b_lang_lm_mentioned)
                b_lm_mentioned = [self.cuda_var(s) if s is not None else None for s in b_lm_mentioned]

            # TODO: Figure out how to keep these properly. Perhaps as a whole batch is best
            # TODO: Introduce a key-value store (encapsulate instead of inherit)
            self.keep_inputs("lm_pos_fpv", b_lm_pos_fpv)
            self.keep_inputs("lm_pos_map", b_lm_pos_map)
            self.keep_inputs("lm_indices", b_lm_indices)
            self.keep_inputs("goal_pos_map", b_goal_pos)
            if not TEMPLATES:
                self.keep_inputs("lang_lm_mentioned", b_lang_lm_mentioned)
                self.keep_inputs("lm_mentioned", b_lm_mentioned)

            # TODO: Abstract all of these if-elses in a modular way once we know which ones are necessary
            if TEMPLATES:
                b_lm_mentioned_tplt = lm_mentioned_tplt[b][:b_seq_len]
                b_side_mentioned_tplt = side_mentioned_tplt[b][:b_seq_len]
                b_side_mentioned_tplt = self.cuda_var(b_side_mentioned_tplt)
                b_lm_mentioned_tplt = self.cuda_var(b_lm_mentioned_tplt)
                self.keep_inputs("lm_mentioned_tplt", b_lm_mentioned_tplt)
                self.keep_inputs("side_mentioned_tplt", b_side_mentioned_tplt)

                b_lm_mentioned = b_lm_mentioned_tplt


            b_obs_mask = [True for _ in range(b_seq_len)]
            b_plan_mask = [True for _ in range(b_seq_len)]
            b_plan_mask_t_cpu = torch.Tensor(b_plan_mask) == True
            b_plan_mask_t = self.maybe_cuda(b_plan_mask_t_cpu)
            b_pos_enc = None

            # ----------------------------------------------------------------------------
            # Optional Auxiliary Inputs
            # ----------------------------------------------------------------------------
            if self.input_required("lm_pos_map_select"):
                b_lm_pos_map_select = [lm_pos for i,lm_pos in enumerate(b_lm_pos_map) if b_plan_mask[i]]
                self.keep_inputs("lm_pos_map_select", b_lm_pos_map_select)
            if self.input_required("lm_indices_select"):
                b_lm_indices_select = [lm_idx for i,lm_idx in enumerate(b_lm_indices) if b_plan_mask[i]]
                self.keep_inputs("lm_indices_select", b_lm_indices_select)
            if self.input_required("lm_mentioned_select"):
                b_lm_mentioned_select = [lm_m for i,lm_m in enumerate(b_lm_mentioned) if b_plan_mask[i]]
                self.keep_inputs("lm_mentioned_select", b_lm_mentioned_select)

            # ----------------------------------------------------------------------------

            self.prof.tick("inputs")

            actions = self(b_images, b_states, b_instructions, b_instr_len,
                           has_obs=b_obs_mask, plan=b_plan_mask, pos_enc=b_pos_enc)

            action_losses, _ = self.action_loss(b_actions, actions, batchreduce=False)

            self.prof.tick("call")

            action_losses = self.action_loss.batch_reduce_loss(action_losses)
            action_loss = self.action_loss.reduce_loss(action_losses)

            action_loss_total = action_loss
            count += b_seq_len

            self.prof.tick("loss")

        action_loss_avg = action_loss_total / (count + 1e-9)

        self.prof.tick("out")

        # Doing this in the end (outside of se
        aux_losses = self.calculate_aux_loss(reduce_average=True)
        aux_loss = self.combine_aux_losses(aux_losses, self.aux_weights)

        prefix = self.model_name + ("/eval" if eval else "/train")

        self.writer.add_dict(prefix, get_current_meters(), self.get_iter())
        self.writer.add_dict(prefix, aux_losses, self.get_iter())
        self.writer.add_scalar(prefix + "/action_loss", action_loss_avg.data.cpu()[0], self.get_iter())
        # TODO: Log value here
        self.writer.add_scalar(prefix + "/goal_accuracy", self.goal_acc_meter.get(), self.get_iter())

        self.prof.tick("auxiliaries")

        total_loss = action_loss_avg + aux_loss

        self.inc_iter()

        self.prof.tick("summaries")
        self.prof.loop()
        self.prof.print_stats(1)

        return total_loss

    def get_dataset(self, data=None, envs=None, dataset_name=None, eval=False):
        # TODO: Maybe use eval here
        #if self.fpv:
        data_sources = []
        # If we're running auxiliary objectives, we need to include the data sources for the auxiliary labels
        #if self.use_aux_class_features or self.use_aux_class_on_map or self.use_aux_grounding_features or self.use_aux_grounding_on_map:
        #if self.use_aux_goal_on_map:
        data_sources.append(aup.PROVIDER_LM_POS_DATA)
        data_sources.append(aup.PROVIDER_GOAL_POS)
        #data_sources.append(aup.PROVIDER_LANDMARKS_MENTIONED)
        data_sources.append(aup.PROVIDER_LANG_TEMPLATE)

        #if self.use_rot_noise or self.use_pos_noise:
        #    data_sources.append(aup.PROVIDER_POSE_NOISE)

        return SegmentDataset(data=data, env_list=envs, dataset_name=dataset_name, aux_provider_names=data_sources, segment_level=True)

Пример #4

Показать файл

class PVN_Stage1_Bidomain_Original(nn.Module):
    def __init__(self, run_name="", domain="sim"):

        super(PVN_Stage1_Bidomain_Original, self).__init__()
        self.model_name = "pvn_stage1"
        self.run_name = run_name
        self.domain = domain
        self.writer = LoggingSummaryWriter(
            log_dir=f"{get_logging_dir()}/runs/{run_name}/{self.domain}")
        #self.writer = DummySummaryWriter()

        self.root_params = get_current_parameters()["ModelPVN"]
        self.params = self.root_params["Stage1"]
        self.use_aux = self.root_params["UseAux"]
        self.aux_weights = self.root_params["AuxWeights"]

        if self.params.get("weight_override"):
            aux_weights_override_name = "AuxWeightsRealOverride" if self.domain == "real" else "AuxWeightsSimOverride"
            aux_weights_override = self.root_params.get(
                aux_weights_override_name)
            if aux_weights_override:
                print(
                    f"Overriding auxiliary weights for domain: {self.domain}")
                self.aux_weights = dict_merge(self.aux_weights,
                                              aux_weights_override)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        self.tensor_store = KeyTensorStore()
        self.losses = AuxiliaryLosses()

        # Auxiliary Objectives
        self.do_perturb_maps = self.params["perturb_maps"]
        print("Perturbing maps: ", self.do_perturb_maps)

        # Path-pred FPV model definition
        # --------------------------------------------------------------------------------------------------------------

        self.num_feature_channels = self.params[
            "feature_channels"]  # + params["relevance_channels"]
        self.num_map_channels = self.params["pathpred_in_channels"]

        self.img_to_features_w = FPVToGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"],
            res_channels=self.params["resnet_channels"],
            map_channels=self.params["feature_channels"],
            img_w=self.params["img_w"],
            img_h=self.params["img_h"],
            cam_h_fov=self.params["cam_h_fov"],
            domain=domain,
            img_dbg=IMG_DBG)

        self.map_accumulator_w = LeakyIntegratorGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        self.add_init_pos_to_coverage = AddDroneInitPosToCoverage(
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"],
            map_size_px=self.params["local_map_size"])

        # Pre-process the accumulated map to do language grounding if necessary - in the world reference frame
        self.map_processor_grounding = LangFilterMapProcessor(
            embed_size=self.params["emb_size"],
            in_channels=self.params["feature_channels"],
            out_channels=self.params["relevance_channels"],
            spatial=False,
            cat_out=False)

        ratio_prior_channels = self.params["feature_channels"]

        # Process the global accumulated map
        self.path_predictor_lingunet = RatioPathPredictor(
            self.params["lingunet"],
            prior_channels_in=self.params["feature_channels"],
            posterior_channels_in=self.params["pathpred_in_channels"],
            dual_head=self.params["predict_confidence"],
            compute_prior=self.params["compute_prior"],
            use_prior=self.params["use_prior_only"],
            oob=self.params["clip_observability"])

        print("UNet Channels: " + str(self.num_map_channels))
        print("Feature Channels: " + str(self.num_feature_channels))

        # TODO:O Verify that config has the same randomization parameters (yaw, pos, etc)
        self.second_transform = self.do_perturb_maps or self.params[
            "predict_in_start_frame"]

        # Sentence Embedding
        self.sentence_embedding = SentenceEmbeddingSimple(
            self.params["word_emb_size"], self.params["emb_size"],
            self.params["emb_layers"], self.params["emb_dropout"])

        self.map_transform_local_to_local = MapTransformer(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        self.map_transform_global_to_local = MapTransformer(
            source_map_size=self.params["global_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        self.map_transform_local_to_global = MapTransformer(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        self.map_transform_s_to_p = self.map_transform_local_to_local
        self.map_transform_w_to_s = self.map_transform_global_to_local
        self.map_transform_w_to_r = self.map_transform_global_to_local
        self.map_transform_r_to_s = self.map_transform_local_to_local
        self.map_transform_r_to_w = self.map_transform_local_to_global
        self.map_transform_p_to_w = self.map_transform_local_to_global
        self.map_transform_p_to_r = self.map_transform_local_to_local

        # Batch select is used to drop and forget semantic maps at those timestaps that we're not planning in
        self.batch_select = MapBatchSelect()
        # Since we only have path predictions for some timesteps (the ones not dropped above), we use this to fill
        # in the missing pieces by reorienting the past trajectory prediction into the frame of the current timestep
        self.map_batch_fill_missing = MapBatchFillMissing(
            self.params["local_map_size"], self.params["world_size_px"],
            self.params["world_size_m"])

        self.spatialsoftmax = SpatialSoftmax2d()
        self.visitation_softmax = VisitationSoftmax()

        #TODO:O Use CroppedMapToActionTriplet in Wrapper as Stage2
        # Auxiliary Objectives
        # --------------------------------------------------------------------------------------------------------------

        # We add all auxiliaries that are necessary. The first argument is the auxiliary name, followed by parameters,
        # followed by variable number of names of inputs. ModuleWithAuxiliaries will automatically collect these inputs
        # that have been saved with keep_auxiliary_input() during execution
        if self.use_aux["class_features"]:
            self.losses.add_auxiliary(
                ClassAuxiliary2D("class_features",
                                 self.params["feature_channels"],
                                 self.params["num_landmarks"], 0,
                                 "fpv_features", "lm_pos_fpv", "lm_indices"))
        if self.use_aux["grounding_features"]:
            self.losses.add_auxiliary(
                ClassAuxiliary2D("grounding_features",
                                 self.params["relevance_channels"], 2, 0,
                                 "fpv_features_g", "lm_pos_fpv",
                                 "lm_mentioned"))
        if self.use_aux["class_map"]:
            self.losses.add_auxiliary(
                ClassAuxiliary2D("class_map", self.params["feature_channels"],
                                 self.params["num_landmarks"], 0, "S_W_select",
                                 "lm_pos_map_select", "lm_indices_select"))
        if self.use_aux["grounding_map"]:
            self.losses.add_auxiliary(
                ClassAuxiliary2D("grounding_map",
                                 self.params["relevance_channels"], 2, 0,
                                 "R_W_select", "lm_pos_map_select",
                                 "lm_mentioned_select"))
        # CoRL model uses alignment-model groundings
        if self.use_aux["lang"]:
            # one output for each landmark, 2 classes per output. This is for finetuning, so use the embedding that's gonna be fine tuned
            self.losses.add_auxiliary(
                ClassAuxiliary("lang", self.params["emb_size"], 2,
                               self.params["num_landmarks"], "sentence_embed",
                               "lang_lm_mentioned"))

        if self.use_aux["regularize_map"]:
            self.losses.add_auxiliary(
                FeatureRegularizationAuxiliary2D("regularize_map", "l1",
                                                 "S_W_select"))

        lossfunc = self.params["path_loss_function"]
        if self.params["clip_observability"]:
            self.losses.add_auxiliary(
                PathAuxiliary2D("visitation_dist", lossfunc,
                                self.params["clip_observability"],
                                "log_v_dist_s_select",
                                "v_dist_s_ground_truth_select", "SM_S_select"))
        else:
            self.losses.add_auxiliary(
                PathAuxiliary2D("visitation_dist", lossfunc,
                                self.params["clip_observability"],
                                "log_v_dist_s_select",
                                "v_dist_s_ground_truth_select", "SM_S_select"))

        self.goal_good_criterion = GoalPredictionGoodCriterion(
            ok_distance=self.params["world_size_px"] * 0.1)
        self.goal_acc_meter = MovingAverageMeter(10)
        self.visible_goal_acc_meter = MovingAverageMeter(10)
        self.invisible_goal_acc_meter = MovingAverageMeter(10)
        self.visible_goal_frac_meter = MovingAverageMeter(10)

        self.losses.print_auxiliary_info()

        self.total_goals = 0
        self.correct_goals = 0

        self.env_id = None
        self.env_img = None
        self.seg_idx = None
        self.prev_instruction = None
        self.seq_step = 0

        self.should_save_path_overlays = False

    def make_picklable(self):
        self.writer = DummySummaryWriter()

    def steal_cross_domain_modules(self, other_self):
        self.iter = other_self.iter
        self.losses = other_self.losses
        self.sentence_embedding = other_self.sentence_embedding
        self.map_accumulator_w = other_self.map_accumulator_w
        self.map_processor_grounding = other_self.map_processor_grounding
        self.path_predictor_lingunet = other_self.path_predictor_lingunet
        #self.img_to_features_w = other_self.img_to_features_w

    def both_domain_parameters(self, other_self):
        # This function iterates and yields parameters from this module and the other module, but does not yield
        # shared parameters twice.
        # First yield all of the other module's parameters
        for p in other_self.parameters():
            yield p
        # Then yield all the parameters from the this module that are not shared with the other one
        for p in self.img_to_features_w.parameters():
            yield p
        return

    def get_iter(self):
        return int(self.iter.data[0])

    def inc_iter(self):
        self.iter += 1

    def load_state_dict(self, state_dict, strict=True):
        super(PVN_Stage1_Bidomain_Original,
              self).load_state_dict(state_dict, strict)

    def init_weights(self):
        self.img_to_features_w.init_weights()
        self.map_accumulator_w.init_weights()
        self.sentence_embedding.init_weights()
        self.map_processor_grounding.init_weights()
        self.path_predictor_lingunet.init_weights()

    def reset(self):
        # TODO: This is error prone. Create a class StatefulModule, iterate submodules and reset all stateful modules
        self.tensor_store.reset()
        self.sentence_embedding.reset()
        self.img_to_features_w.reset()
        self.map_accumulator_w.reset()
        self.map_batch_fill_missing.reset()
        self.prev_instruction = None

    def setEnvContext(self, context):
        print("Set env context to: " + str(context))
        self.env_id = context["env_id"]
        self.env_img = env.load_env_img(self.env_id, 256, 256)
        self.env_img = self.env_img[:, :, [2, 1, 0]]

    def set_save_path_overlays(self, save_path_overlays):
        self.should_save_path_overlays = save_path_overlays

    #TODO:O Figure out what to do with save_ground_truth_overlays

    def print_metrics(self):
        print(f"Model {self.model_name}:{self.domain} metrics:")
        print(
            f"   Goal accuracy: {float(self.correct_goals) / self.total_goals}"
        )

    def goal_visible(self, masks, goal_pos):
        goal_mask = masks.detach()[0, 0, :, :]
        goal_pos = goal_pos[0].long().detach()
        visible = bool(
            (goal_mask[goal_pos[0], goal_pos[1]] > 0.5).detach().cpu().item())
        return visible

    # This is called before beginning an execution sequence
    def start_sequence(self):
        self.seq_step = 0
        self.reset()
        return

    def cam_poses_from_states(self, states):
        cam_pos = states[:, 9:12]
        cam_rot = states[:, 12:16]
        pose = Pose(cam_pos, cam_rot)
        return pose

    def forward(self,
                images,
                states,
                instructions,
                instr_lengths,
                plan=None,
                noisy_start_poses=None,
                start_poses=None,
                firstseg=None,
                select_only=True,
                halfway=False,
                grad_noise=False,
                rl=False):
        """
        :param images: BxCxHxW batch of images (observations)
        :param states: BxK batch of drone states
        :param instructions: BxM LongTensor where M is the maximum length of any instruction
        :param instr_lengths: list of len B of integers, indicating length of each instruction
        :param plan: list of B booleans indicating True for timesteps where we do planning and False otherwise
        :param noisy_start_poses: list of noisy start poses (for data-augmentation). These define the path-prediction frame at training time
        :param start_poses: list of drone start poses (these should be equal in practice)
        :param firstseg: list of booleans indicating True if a new segment starts at that timestep
        :param select_only: boolean indicating whether to only compute visitation distributions for planning timesteps (default True)
        :param rl: boolean indicating if we're doing reinforcement learning. If yes, output more than the visitation distribution
        :return:
        """
        cam_poses = self.cam_poses_from_states(states)
        g_poses = None  # None pose is a placeholder for the canonical global pose.
        self.prof.tick("out")

        self.tensor_store.keep_inputs("fpv", images)

        # Calculate the instruction embedding
        if instructions is not None:
            # TODO: Take batch of instructions and their lengths, return batch of embeddings. Store the last one as internal state
            # TODO: There's an assumption here that there's only a single instruction in the batch and it doesn't change
            # UNCOMMENT THE BELOW LINE TO REVERT BACK TO GENERAL CASE OF SEPARATE INSTRUCTION PER STEP
            if self.params["ignore_instruction"]:
                # If we're ignoring instructions, just feed in an instruction that consists of a single zero-token
                sent_embeddings = self.sentence_embedding(
                    torch.zeros_like(instructions[0:1, 0:1]),
                    torch.ones_like(instr_lengths[0:1]))
            else:
                sent_embeddings = self.sentence_embedding(
                    instructions[0:1], instr_lengths[0:1])
            self.tensor_store.keep_inputs("sentence_embed", sent_embeddings)
        else:
            sent_embeddings = self.sentence_embedding.get()

        self.prof.tick("embed")

        # Extract and project features onto the egocentric frame for each image
        F_W, M_W = self.img_to_features_w(images,
                                          cam_poses,
                                          sent_embeddings,
                                          self.tensor_store,
                                          show="",
                                          halfway=halfway)

        # For training the critic, this is as far as we need to poceed with the computation.
        # self.img_to_features_w has stored computed feature maps inside the tensor store, which will then be retrieved by the critic
        if halfway == True:  # Warning: halfway must be True not truthy
            return None, None

        self.tensor_store.keep_inputs("F_w", F_W)
        self.tensor_store.keep_inputs("M_w", M_W)
        self.prof.tick("img_to_map_frame")

        # Accumulate the egocentric features in a global map
        reset_mask = firstseg if self.params["clear_history"] else None

        # Consider the space very near the drone and right under it as observed - draw ones on the observability mask
        # If we treat that space as unobserved, then there's going to be a gap in the visitation distribution, which
        # makes training with RL more difficult, as there is no reward feedback if the drone doesn't cross that gap.
        if self.params.get("cover_init_pos", False):
            StartMasks_R = self.add_init_pos_to_coverage.get_init_pos_masks(
                M_W.shape[0], M_W.device)
            StartMasks_W, _ = self.map_transform_r_to_w(
                StartMasks_R, cam_poses, None)
            M_W = self.add_init_pos_to_coverage(M_W, StartMasks_W)

        S_W, SM_W = self.map_accumulator_w(F_W,
                                           M_W,
                                           reset_mask=reset_mask,
                                           show="acc" if IMG_DBG else "")
        S_W_poses = g_poses
        self.prof.tick("map_accumulate")

        # If we're training Stage 2 with imitation learning from ground truth visitation distributions, we want to
        # compute observability masks with the same code that's used in Stage 1 to avoid mistakes.
        if halfway == "observability":
            map_uncoverage_w = 1 - SM_W
            return map_uncoverage_w

        # Throw away those timesteps that don't correspond to planning timesteps
        S_W_select, SM_W_select, S_W_poses_select, cam_poses_select, noisy_start_poses_select, start_poses_select, sent_embeddings_select = \
            self.batch_select(S_W, SM_W, S_W_poses, cam_poses, noisy_start_poses, start_poses, sent_embeddings, plan)

        #maps_m_prior_select, maps_m_posterior_select = None, None

        # Only process the maps on plannieng timesteps
        if len(S_W_select) == 0:
            return None

        self.tensor_store.keep_inputs("S_W_select", S_W_select)
        self.prof.tick("batch_select")

        # Process the map via the two map_procesors
        # Do grounding of objects in the map chosen to do so
        if self.use_aux["grounding_map"]:
            R_W_select, RS_W_poses_select = self.map_processor_grounding(
                S_W_select, sent_embeddings_select, S_W_poses_select, show="")
            self.tensor_store.keep_inputs("R_W_select", R_W_select)
            self.prof.tick("map_proc_gnd")
            # Concatenate grounding map and semantic map along channel dimension
            RS_W_select = torch.cat([S_W_select, R_W_select], 1)

        else:
            RS_W_select = S_W_select
            RS_W_poses_select = S_W_poses_select

        s_poses_select = start_poses_select if self.params[
            "predict_in_start_frame"] else cam_poses_select
        RS_S_select, RS_S_poses_select = self.map_transform_w_to_s(
            RS_W_select, RS_W_poses_select, s_poses_select)
        SM_S_select, SM_S_poses_select = self.map_transform_w_to_s(
            SM_W_select, S_W_poses_select, s_poses_select)

        assert SM_S_poses_select == RS_S_poses_select, "Masks and maps should have the same pose in start frame"

        self.tensor_store.keep_inputs("RS_S_select", RS_S_select)
        self.tensor_store.keep_inputs("SM_S_select", SM_S_select)
        self.prof.tick("transform_w_to_s")

        # Data augmentation for trajectory prediction
        map_poses_clean_select = None
        # TODO: Figure out if we can just swap out start poses for noisy poses and get rid of separate noisy poses
        if self.do_perturb_maps:
            assert noisy_start_poses_select is not None, "Noisy poses must be provided if we're perturbing maps"
            RS_P_select, RS_P_poses_select = self.map_transform_s_to_p(
                RS_S_select, RS_S_poses_select, noisy_start_poses_select)
        else:
            RS_P_select, RS_P_poses_select = RS_S_select, RS_S_poses_select

        self.tensor_store.keep_inputs("RS_perturbed_select", RS_P_select)
        self.prof.tick("map_perturb")

        sent_embeddings_pp = sent_embeddings_select

        # Run lingunet on the map to predict visitation distribution scores (pre-softmax)
        # ---------
        log_v_dist_p_select, v_dist_p_poses_select = self.path_predictor_lingunet(
            RS_P_select,
            sent_embeddings_pp,
            RS_P_poses_select,
            tensor_store=self.tensor_store)
        # ---------

        self.prof.tick("pathpred")

        # TODO: Shouldn't we be transforming probability distributions instead of scores? Otherwise OOB space will have weird values
        # Transform distributions back to world reference frame and keep them (these are the model outputs)
        both_inner_w, v_dist_w_poses_select = self.map_transform_p_to_w(
            log_v_dist_p_select.inner_distribution, v_dist_p_poses_select,
            None)
        log_v_dist_w_select = Partial2DDistribution(
            both_inner_w, log_v_dist_p_select.outer_prob_mass)
        self.tensor_store.keep_inputs("log_v_dist_w_select",
                                      log_v_dist_w_select)

        # Transform distributions back to start reference frame and keep them (for auxiliary objective)
        both_inner_s, v_dist_s_poses_select = self.map_transform_p_to_r(
            log_v_dist_p_select.inner_distribution, v_dist_p_poses_select,
            start_poses_select)
        log_v_dist_s_select = Partial2DDistribution(
            both_inner_s, log_v_dist_p_select.outer_prob_mass)
        self.tensor_store.keep_inputs("log_v_dist_s_select",
                                      log_v_dist_s_select)

        # prime number will mean that it will alternate between sim and real
        if self.get_iter() % 23 == 0:
            lsfm = SpatialSoftmax2d()
            for i in range(S_W_select.shape[0]):
                Presenter().show_image(S_W_select.detach().cpu()[i, 0:3],
                                       f"{self.domain}_s_w_select",
                                       scale=4,
                                       waitkey=1)
                Presenter().show_image(lsfm(
                    log_v_dist_s_select.inner_distribution).detach().cpu()[i],
                                       f"{self.domain}_v_dist_s_select",
                                       scale=4,
                                       waitkey=1)
                Presenter().show_image(lsfm(
                    log_v_dist_p_select.inner_distribution).detach().cpu()[i],
                                       f"{self.domain}_v_dist_p_select",
                                       scale=4,
                                       waitkey=1)
                Presenter().show_image(RS_P_select.detach().cpu()[i, 0:3],
                                       f"{self.domain}_rs_p_select",
                                       scale=4,
                                       waitkey=1)
                break

        self.prof.tick("transform_back")

        # If we're predicting the trajectory only on some timesteps, then for each timestep k, use the map from
        # timestep k if predicting on timestep k. otherwise use the map from timestep j - the last timestep
        # that had a trajectory prediction, rotated in the frame of timestep k.
        if select_only:
            # If we're just pre-training the trajectory prediction, don't waste time on generating the missing maps
            log_v_dist_w = log_v_dist_w_select
            v_dist_w_poses = v_dist_w_poses_select
        else:
            raise NotImplementedError("select_only must be True")

        return_list = [log_v_dist_w, v_dist_w_poses]
        if rl:
            internals_for_rl = {
                "map_coverage_w": SM_W,
                "map_uncoverage_w": 1 - SM_W
            }
            return_list.append(internals_for_rl)

        return tuple(return_list)

    def maybe_cuda(self, tensor):
        return tensor.to(next(self.parameters()).device)

    def cuda_var(self, tensor):
        return tensor.to(next(self.parameters()).device)

    def unbatch(self, batch, halfway=False):
        # Inputs
        images = self.maybe_cuda(batch["images"][0])
        seq_len = len(images)
        instructions = self.maybe_cuda(batch["instr"][0][:seq_len])
        instr_lengths = batch["instr_len"][0][:seq_len]
        states = self.maybe_cuda(batch["states"][0])

        if not halfway:

            plan_mask = batch["plan_mask"][
                0]  # True for every timestep that we do visitation prediction
            firstseg_mask = batch["firstseg_mask"][
                0]  # True for every timestep that is a new instruction segment

            # Labels (including for auxiliary losses)
            lm_pos_fpv = batch["lm_pos_fpv"][
                0]  # All object 2D coordinates in the first-person image
            lm_pos_map_m = batch["lm_pos_map"][
                0]  # All object 2D coordinates in the semantic map
            lm_indices = batch["lm_indices"][0]  # All object class indices
            goal_pos_map_m = batch["goal_loc"][
                0]  # Goal location in the world in meters_and_metrics
            lm_mentioned = batch["lm_mentioned"][
                0]  # 1/0 labels whether object was mentioned/not mentioned in template instruction
            # TODO: We're taking the FIRST label here. SINGLE SEGMENT ASSUMPTION
            lang_lm_mentioned = batch["lang_lm_mentioned"][0][
                0]  # integer labes as to which object was mentioned
            start_poses = batch["start_poses"][0]
            noisy_start_poses = get_noisy_poses_torch(
                start_poses.numpy(),
                self.params["pos_variance"],
                self.params["rot_variance"],
                cuda=False,
                cuda_device=None)

            # Ground truth visitation distributions (in start and global frames)
            v_dist_w_ground_truth_select = self.maybe_cuda(
                batch["traj_ground_truth"][0])
            start_poses_select = self.batch_select.one(
                start_poses, plan_mask, v_dist_w_ground_truth_select.device)
            v_dist_s_ground_truth_select, poses_s = self.map_transform_w_to_s(
                v_dist_w_ground_truth_select, None, start_poses_select)
            #self.tensor_store.keep_inputs("v_dist_w_ground_truth_select", v_dist_w_ground_truth_select)
            self.tensor_store.keep_inputs("v_dist_s_ground_truth_select",
                                          v_dist_s_ground_truth_select)
            #Presenter().show_image(v_dist_s_ground_truth_select.detach().cpu()[0,0], "v_dist_s_ground_truth_select", waitkey=1, scale=4)
            #Presenter().show_image(v_dist_w_ground_truth_select.detach().cpu()[0,0], "v_dist_w_ground_truth_select", waitkey=1, scale=4)

            lm_pos_map_px = [
                torch.from_numpy(
                    transformations.pos_m_to_px(p.numpy(),
                                                self.params["global_map_size"],
                                                self.params["world_size_m"],
                                                self.params["world_size_px"]))
                if p is not None else None for p in lm_pos_map_m
            ]
            goal_pos_map_px = torch.from_numpy(
                transformations.pos_m_to_px(goal_pos_map_m.numpy(),
                                            self.params["global_map_size"],
                                            self.params["world_size_m"],
                                            self.params["world_size_px"]))

            resnet_factor = self.img_to_features_w.img_to_features.get_downscale_factor(
            )
            lm_pos_fpv = [
                self.cuda_var(
                    (s / resnet_factor).long()) if s is not None else None
                for s in lm_pos_fpv
            ]
            lm_indices = [
                self.cuda_var(s) if s is not None else None for s in lm_indices
            ]
            lm_mentioned = [
                self.cuda_var(s) if s is not None else None
                for s in lm_mentioned
            ]
            lang_lm_mentioned = self.cuda_var(lang_lm_mentioned)
            lm_pos_map_px = [
                self.cuda_var(s.long()) if s is not None else None
                for s in lm_pos_map_px
            ]
            goal_pos_map_px = self.cuda_var(goal_pos_map_px)

            self.tensor_store.keep_inputs("lm_pos_fpv", lm_pos_fpv)
            self.tensor_store.keep_inputs("lm_pos_map", lm_pos_map_px)
            self.tensor_store.keep_inputs("lm_indices", lm_indices)
            self.tensor_store.keep_inputs("lm_mentioned", lm_mentioned)
            self.tensor_store.keep_inputs("lang_lm_mentioned",
                                          lang_lm_mentioned)
            self.tensor_store.keep_inputs("goal_pos_map", goal_pos_map_px)

            lm_pos_map_select = [
                lm_pos for i, lm_pos in enumerate(lm_pos_map_px)
                if plan_mask[i]
            ]
            lm_indices_select = [
                lm_idx for i, lm_idx in enumerate(lm_indices) if plan_mask[i]
            ]
            lm_mentioned_select = [
                lm_m for i, lm_m in enumerate(lm_mentioned) if plan_mask[i]
            ]
            goal_pos_map_select = [
                pos for i, pos in enumerate(goal_pos_map_px) if plan_mask[i]
            ]

            self.tensor_store.keep_inputs("lm_pos_map_select",
                                          lm_pos_map_select)
            self.tensor_store.keep_inputs("lm_indices_select",
                                          lm_indices_select)
            self.tensor_store.keep_inputs("lm_mentioned_select",
                                          lm_mentioned_select)
            self.tensor_store.keep_inputs("goal_pos_map_select",
                                          goal_pos_map_select)

        # We won't need this extra information
        else:
            noisy_poses, start_poses, noisy_start_poses = None, None, None
            plan_mask, firstseg_mask = None, None

        metadata = batch["md"][0][0]
        env_id = metadata["env_id"]
        self.tensor_store.set_flag("env_id", env_id)

        return images, states, instructions, instr_lengths, plan_mask, firstseg_mask, start_poses, noisy_start_poses, metadata

    # Forward pass for training
    def sup_loss_on_batch(self,
                          batch,
                          eval,
                          halfway=False,
                          grad_noise=False,
                          disable_losses=[]):
        self.prof.tick("out")
        self.reset()

        if batch is None:
            print("Skipping None Batch")
            zero = torch.zeros([1]).float().to(next(self.parameters()).device)
            return zero, self.tensor_store

        images, states, instructions, instr_len, plan_mask, firstseg_mask, \
         start_poses, noisy_start_poses, metadata = self.unbatch(batch, halfway=halfway)
        self.prof.tick("unbatch_inputs")

        # ----------------------------------------------------------------------------
        _ = self(images,
                 states,
                 instructions,
                 instr_len,
                 plan=plan_mask,
                 firstseg=firstseg_mask,
                 noisy_start_poses=start_poses if eval else noisy_start_poses,
                 start_poses=start_poses,
                 select_only=True,
                 halfway=halfway,
                 grad_noise=grad_noise)
        # ----------------------------------------------------------------------------

        if self.should_save_path_overlays:
            self.save_path_overlays(metadata)

        # If we run the model halfway, we only need to calculate features needed for the wasserstein loss
        # If we want to include more features in wasserstein critic, have to run the forward pass a bit further
        if halfway and not halfway == "v2":
            return None, self.tensor_store

        # The returned values are not used here - they're kept in the tensor store which is used as an input to a loss
        self.prof.tick("call")

        if not halfway:
            # Calculate goal-prediction accuracy:
            goal_pos = self.tensor_store.get_inputs_batch("goal_pos_map",
                                                          cat_not_stack=True)
            success_goal = self.goal_good_criterion(
                self.tensor_store.get_inputs_batch("log_v_dist_w_select",
                                                   cat_not_stack=True),
                goal_pos)
            acc = 1.0 if success_goal else 0.0
            self.goal_acc_meter.put(acc)
            goal_visible = self.goal_visible(
                self.tensor_store.get_inputs_batch("M_w", cat_not_stack=True),
                goal_pos)
            if goal_visible:
                self.visible_goal_acc_meter.put(acc)
            else:
                self.invisible_goal_acc_meter.put(acc)
            self.visible_goal_frac_meter.put(1.0 if goal_visible else 0.0)

            self.correct_goals += acc
            self.total_goals += 1

            self.prof.tick("goal_acc")

        if halfway == "v2":
            disable_losses = ["visitation_dist", "lang"]

        losses, metrics = self.losses.calculate_aux_loss(
            tensor_store=self.tensor_store,
            reduce_average=True,
            disable_losses=disable_losses)
        loss = self.losses.combine_losses(losses, self.aux_weights)

        self.prof.tick("calc_losses")

        prefix = self.model_name + ("/eval" if eval else "/train")
        iteration = self.get_iter()
        self.writer.add_dict(prefix, get_current_meters(), iteration)
        self.writer.add_dict(prefix, losses, iteration)
        self.writer.add_dict(prefix, metrics, iteration)

        if not halfway:
            self.writer.add_scalar(prefix + "/goal_accuracy",
                                   self.goal_acc_meter.get(), iteration)
            self.writer.add_scalar(prefix + "/visible_goal_accuracy",
                                   self.visible_goal_acc_meter.get(),
                                   iteration)
            self.writer.add_scalar(prefix + "/invisible_goal_accuracy",
                                   self.invisible_goal_acc_meter.get(),
                                   iteration)
            self.writer.add_scalar(prefix + "/visible_goal_fraction",
                                   self.visible_goal_frac_meter.get(),
                                   iteration)

        self.inc_iter()

        self.prof.tick("summaries")
        self.prof.loop()
        self.prof.print_stats(1)

        return loss, self.tensor_store

    def get_dataset(self,
                    data=None,
                    envs=None,
                    domain=None,
                    dataset_names=None,
                    dataset_prefix=None,
                    eval=False,
                    halfway_only=False):
        # TODO: Maybe use eval here
        data_sources = []
        # If we're running auxiliary objectives, we need to include the data sources for the auxiliary labels
        #if self.use_aux_class_features or self.use_aux_class_on_map or self.use_aux_grounding_features or self.use_aux_grounding_on_map:
        #if self.use_aux_goal_on_map:
        if not halfway_only:
            data_sources.append(aup.PROVIDER_LM_POS_DATA)
            data_sources.append(aup.PROVIDER_GOAL_POS)

            # Adding these in this order will compute poses with added noise and compute trajectory ground truth
            # in the reference frame of these noisy poses
            data_sources.append(aup.PROVIDER_START_POSES)

            if self.do_perturb_maps:
                print("PERTURBING MAPS!")
                # TODO: The noisy poses from the provider are not actually used!! Those should replace states instead!
                data_sources.append(aup.PROVIDER_NOISY_POSES)
                # TODO: Think this through. Perhaps we actually want dynamic ground truth given a noisy start position
                if self.params["predict_in_start_frame"]:
                    data_sources.append(
                        aup.PROVIDER_TRAJECTORY_GROUND_TRUTH_STATIC)
                else:
                    data_sources.append(
                        aup.PROVIDER_TRAJECTORY_GROUND_TRUTH_DYNAMIC_NOISY)
            else:
                print("NOT Perturbing Maps!")
                data_sources.append(aup.PROVIDER_NOISY_POSES)
                if self.params["predict_in_start_frame"]:
                    data_sources.append(
                        aup.PROVIDER_TRAJECTORY_GROUND_TRUTH_STATIC)
                else:
                    data_sources.append(
                        aup.PROVIDER_TRAJECTORY_GROUND_TRUTH_DYNAMIC)

            data_sources.append(aup.PROVIDER_LANDMARKS_MENTIONED)

            templates = get_current_parameters()["Environment"]["templates"]
            if templates:
                data_sources.append(aup.PROVIDER_LANG_TEMPLATE)

        return SegmentDataset(data=data,
                              env_list=envs,
                              domain=domain,
                              dataset_names=dataset_names,
                              dataset_prefix=dataset_prefix,
                              aux_provider_names=data_sources,
                              segment_level=True)