Пример #1
0
    def sup_loss_on_batch(self, batch, eval):
        self.prof.tick("out")

        action_loss_total = Variable(
            empty_float_tensor([1], self.is_cuda, self.cuda_device))

        if batch is None:
            print("Skipping None Batch")
            return action_loss_total

        images = self.maybe_cuda(batch["images"])

        instructions = self.maybe_cuda(batch["instr"])
        instr_lengths = batch["instr_len"]
        states = self.maybe_cuda(batch["states"])
        actions = self.maybe_cuda(batch["actions"])

        # Auxiliary labels
        lm_pos_fpv = batch["lm_pos_fpv"]
        lm_pos_map = batch["lm_pos_map"]
        lm_indices = batch["lm_indices"]
        goal_pos_map = batch["goal_loc"]

        # TODO: Get rid of this. We will have lm_mentioned booleans and lm_mentioned_idx integers and that's it.
        TEMPLATES = True
        if TEMPLATES:
            lm_mentioned_tplt = batch["lm_mentioned_tplt"]
            side_mentioned_tplt = batch["side_mentioned_tplt"]
        else:
            lang_lm_mentioned = batch["lang_lm_mentioned"]
        lm_mentioned = batch["lm_mentioned"]

        # stops = self.maybe_cuda(batch["stops"])
        masks = self.maybe_cuda(batch["masks"])
        # This is the first-timestep metadata
        metadata = batch["md"]

        seq_len = images.size(1)
        batch_size = images.size(0)
        count = 0
        correct_goal_count = 0
        goal_count = 0

        # Loop thru batch
        for b in range(batch_size):
            seg_idx = -1

            self.reset()

            self.prof.tick("out")
            b_seq_len = len_until_nones(metadata[b])

            # TODO: Generalize this
            # Slice the data according to the sequence length
            b_metadata = metadata[b][:b_seq_len]
            b_images = images[b][:b_seq_len]
            b_instructions = instructions[b][:b_seq_len]
            b_instr_len = instr_lengths[b][:b_seq_len]
            b_states = states[b][:b_seq_len]
            b_actions = actions[b][:b_seq_len]
            b_lm_pos_fpv = lm_pos_fpv[b][:b_seq_len]
            b_lm_pos_map = lm_pos_map[b][:b_seq_len]
            b_lm_indices = lm_indices[b][:b_seq_len]
            b_goal_pos = goal_pos_map[b][:b_seq_len]
            if not TEMPLATES:
                b_lang_lm_mentioned = lang_lm_mentioned[b][:b_seq_len]
            b_lm_mentioned = lm_mentioned[b][:b_seq_len]

            # Convert landmark and goal position from meters_and_metrics to pixels
            b_lm_pos_map = [
                torch.from_numpy(
                    transformations.pos_m_to_px(p.numpy(),
                                                self.params["global_map_size"],
                                                self.params["world_size_m"],
                                                self.params["world_size_px"]))
                if p is not None else None for p in b_lm_pos_map
            ]

            b_goal_pos = torch.from_numpy(
                transformations.pos_m_to_px(b_goal_pos.numpy(),
                                            self.params["global_map_size"],
                                            self.params["world_size_m"],
                                            self.params["world_size_px"]))

            b_lm_pos_map = [
                self.cuda_var(s.long()) if s is not None else None
                for s in b_lm_pos_map
            ]
            b_lm_pos_fpv = [
                self.cuda_var(
                    (s / RESNET_FACTOR).long()) if s is not None else None
                for s in b_lm_pos_fpv
            ]
            b_lm_indices = [
                self.cuda_var(s) if s is not None else None
                for s in b_lm_indices
            ]
            b_goal_pos = self.cuda_var(b_goal_pos)
            if not TEMPLATES:
                b_lang_lm_mentioned = self.cuda_var(b_lang_lm_mentioned)
            b_lm_mentioned = [
                self.cuda_var(s) if s is not None else None
                for s in b_lm_mentioned
            ]

            # TODO: Figure out how to keep these properly. Perhaps as a whole batch is best
            # TODO: Introduce a key-value store (encapsulate instead of inherit)
            self.tensor_store.keep_inputs("lm_pos_fpv", b_lm_pos_fpv)
            self.tensor_store.keep_inputs("lm_pos_map", b_lm_pos_map)
            self.tensor_store.keep_inputs("lm_indices", b_lm_indices)
            self.tensor_store.keep_inputs("goal_pos_map", b_goal_pos)
            if not TEMPLATES:
                self.tensor_store.keep_inputs("lang_lm_mentioned",
                                              b_lang_lm_mentioned)
            self.tensor_store.keep_inputs("lm_mentioned", b_lm_mentioned)

            # TODO: Abstract all of these if-elses in a modular way once we know which ones are necessary
            if TEMPLATES:
                b_lm_mentioned_tplt = lm_mentioned_tplt[b][:b_seq_len]
                b_side_mentioned_tplt = side_mentioned_tplt[b][:b_seq_len]
                b_side_mentioned_tplt = self.cuda_var(b_side_mentioned_tplt)
                b_lm_mentioned_tplt = self.cuda_var(b_lm_mentioned_tplt)
                self.tensor_store.keep_inputs("lm_mentioned_tplt",
                                              b_lm_mentioned_tplt)
                self.tensor_store.keep_inputs("side_mentioned_tplt",
                                              b_side_mentioned_tplt)

                #b_lm_mentioned = b_lm_mentioned_tplt

            b_obs_mask = [True for _ in range(b_seq_len)]
            b_plan_mask = [True for _ in range(b_seq_len)]
            b_plan_mask_t_cpu = torch.Tensor(b_plan_mask) == True
            b_plan_mask_t = self.maybe_cuda(b_plan_mask_t_cpu)
            b_pos_enc = None

            # ----------------------------------------------------------------------------
            # Optional Auxiliary Inputs
            # ----------------------------------------------------------------------------
            if self.aux_losses.input_required("lm_pos_map_select"):
                b_lm_pos_map_select = [
                    lm_pos for i, lm_pos in enumerate(b_lm_pos_map)
                    if b_plan_mask[i]
                ]
                self.tensor_store.keep_inputs("lm_pos_map_select",
                                              b_lm_pos_map_select)
            if self.aux_losses.input_required("lm_indices_select"):
                b_lm_indices_select = [
                    lm_idx for i, lm_idx in enumerate(b_lm_indices)
                    if b_plan_mask[i]
                ]
                self.tensor_store.keep_inputs("lm_indices_select",
                                              b_lm_indices_select)
            if self.aux_losses.input_required("lm_mentioned_select"):
                b_lm_mentioned_select = [
                    lm_m for i, lm_m in enumerate(b_lm_mentioned)
                    if b_plan_mask[i]
                ]
                self.tensor_store.keep_inputs("lm_mentioned_select",
                                              b_lm_mentioned_select)

            # ----------------------------------------------------------------------------

            self.prof.tick("inputs")

            actions = self(b_images,
                           b_states,
                           b_instructions,
                           b_instr_len,
                           has_obs=b_obs_mask,
                           plan=b_plan_mask,
                           pos_enc=b_pos_enc)

            action_losses, _ = self.action_loss(b_actions,
                                                actions,
                                                batchreduce=False)

            self.prof.tick("call")

            action_losses = self.action_loss.batch_reduce_loss(action_losses)
            action_loss = self.action_loss.reduce_loss(action_losses)

            action_loss_total = action_loss
            count += b_seq_len

            self.prof.tick("loss")

        action_loss_avg = action_loss_total / (count + 1e-9)

        self.prof.tick("out")

        # Doing this in the end (outside of se
        aux_losses = self.aux_losses.calculate_aux_loss(self.tensor_store,
                                                        reduce_average=True)
        aux_loss = self.aux_losses.combine_losses(aux_losses, self.aux_weights)

        prefix = self.model_name + ("/eval" if eval else "/train")

        self.writer.add_dict(prefix, get_current_meters(), self.get_iter())
        self.writer.add_dict(prefix, aux_losses, self.get_iter())
        self.writer.add_scalar(prefix + "/action_loss",
                               action_loss_avg.data.cpu().item(),
                               self.get_iter())
        # TODO: Log value here
        self.writer.add_scalar(prefix + "/goal_accuracy",
                               self.goal_acc_meter.get(), self.get_iter())

        self.prof.tick("auxiliaries")

        total_loss = action_loss_avg + aux_loss

        self.inc_iter()

        self.prof.tick("summaries")
        self.prof.loop()
        self.prof.print_stats(1)

        return total_loss
Пример #2
0
    def sup_loss_on_batch(self, batch, eval=False, viz=False):

        if eval:
            self.eval()
        else:
            self.train()

        images = cuda_var(batch["images"], self.is_cuda, self.cuda_device)
        instructions = cuda_var(batch["instr"], self.is_cuda, self.cuda_device)
        instruction_masks = cuda_var(batch["instr_mask"], self.is_cuda,
                                     self.cuda_device)
        label_masks = cuda_var(batch["traj_labels"], self.is_cuda,
                               self.cuda_device)

        # Each of the above is a list of lists of tensors, where the outer list is over the batch and the inner list
        # is over the segments. Loop through and accumulate loss for each batch sequentially, and for each segment.
        # Reset model state (embedding etc) between batches, but not between segments.
        # We don't process each batch in batch-mode, because it's complicated, with the varying number of segments and all.

        batch_size = len(images)
        total_class_loss = Variable(empty_float_tensor([1], self.is_cuda,
                                                       self.cuda_device),
                                    requires_grad=True)
        total_ground_loss = Variable(empty_float_tensor([1], self.is_cuda,
                                                        self.cuda_device),
                                     requires_grad=True)
        count = 0

        label_masks = self.label_pool(label_masks)
        mask_pred, features, emb_loss = self(images, instructions,
                                             instruction_masks)

        if BCE:
            mask_pred_flat = mask_pred.view(-1, 1)
            label_masks_flat = label_masks - torch.min(label_masks)
            label_masks_flat = label_masks_flat / (
                torch.max(label_masks_flat) + 1e-9)
            label_masks_flat = label_masks_flat.view(-1, 1).clamp(0, 1)
            main_loss = self.mask_loss(mask_pred_flat, label_masks_flat)

        elif NLL:
            mask_pred_1 = F.softmax(mask_pred, 1, _stacklevel=5)
            mask_pred_2 = 1 - mask_pred_1
            mask_pred_1 = mask_pred_1.unsqueeze(1)
            mask_pred_2 = mask_pred_2.unsqueeze(1)
            mask_pred = torch.cat((mask_pred_1, mask_pred_2), dim=1)
            label_masks = label_masks.clamp(0, 1)
            if self.is_cuda:
                label_masks = label_masks.type(torch.cuda.LongTensor)
            else:
                label_masks = label_masks.type(torch.LongTensor)
            main_loss = self.mask_loss(mask_pred, label_masks)

        elif CE:
            # Crossentropy2D internally applies logsoftmax to mask_pred,
            # but labels are already assumed to be a valid probability distribution, so no softmax is applied
            main_loss = self.mask_loss(mask_pred, label_masks)
            # So for nice plotting, we must manually do it
            mask_pred = self.spatialsoftmax(mask_pred)
        else:
            main_loss = self.mask_loss(mask_pred, label_masks)

        # sum emb loss if batch size > 1
        if type(emb_loss) == tuple:
            emb_loss = sum(emb_loss)

        # Extract the feature vectors corresponding to every landmark's location in the map
        # Apply a linear layer to classify which of the 64 landmarks it is
        # The landmark positions have to be divided by the same factor as the ResNet scaling factor
        lcount = 0
        for i in range(batch_size):
            if self.class_loss and len(batch["lm_pos"][i]) > 0:
                lcount += 1
                landmark_pos = cuda_var(batch["lm_pos"][i], self.is_cuda,
                                        self.cuda_device)
                landmark_indices = cuda_var(batch["lm_indices"][i],
                                            self.is_cuda, self.cuda_device)
                landmark_coords = (landmark_pos / 8).long()
                lm_features = self.gather2d(features[i:i + 1, 0:32],
                                            landmark_coords)
                lm_pred = self.aux_class_linear(lm_features)
                class_loss = self.aux_loss(lm_pred, landmark_indices)
                total_class_loss = total_class_loss + class_loss

            if self.ground_loss and len(batch["lm_pos"][i]) > 0:
                landmark_pos = cuda_var(batch["lm_pos"][i], self.is_cuda,
                                        self.cuda_device)
                landmark_mentioned = cuda_var(batch["lm_mentioned"][i],
                                              self.is_cuda, self.cuda_device)
                landmark_coords = (landmark_pos / 8).long()
                g_features = self.gather2d(features[i:i + 1, 32:35],
                                           landmark_coords)
                lm_pred = self.aux_ground_linear(g_features)
                ground_loss = self.aux_loss(lm_pred, landmark_mentioned)
                total_ground_loss = total_ground_loss + ground_loss

        total_class_loss = total_class_loss / (lcount + 1e-9)
        total_ground_loss = total_ground_loss / (lcount + 1e-9)
        count += 1

        # Just visualization and debugging code
        if self.get_iter() % 50 == 0:
            presenter = Presenter()
            pred_viz_np = presenter.overlaid_image(images[0].data,
                                                   mask_pred[0].data)
            labl_viz_np = presenter.overlaid_image(images[0].data,
                                                   label_masks[0].data)
            comp = np.concatenate((pred_viz_np, labl_viz_np), axis=1)
            presenter.show_image(comp, "path_pred")

            if hasattr(self.sentence_embedding, "save_att_map"):
                self.sentence_embedding.save_att_map(self.get_iter(), i)

        total_loss = main_loss + 0.1 * total_class_loss + 0.001 * emb_loss + 0.1 * total_ground_loss
        total_loss = total_loss / (count + 1e-9)

        self.write_summaires("eval" if eval else "train", self.get_iter(),
                             total_loss, main_loss, emb_loss, total_class_loss,
                             total_ground_loss)
        self.inc_iter()

        return total_loss
Пример #3
0
 def deterministic_action(self, action_mean, action_std, stop_prob):
     batch_size = action_mean.size(0)
     action = Variable(empty_float_tensor((batch_size, 4), self.is_cuda, self.cuda_device))
     action[:, 0:3] = action_mean[:, 0:3]
     action[:, 3] = stop_prob
     return action
Пример #4
0
    def forward(self, word_ids, lengths=None):
        # TODO: Get rid of this and abstract in another layer

        if isinstance(word_ids, list) and lengths is None:
            word_ids, lengths = sequence_list_to_tensor([word_ids])
            if self.is_cuda:
                word_ids = word_ids.cuda(
                )  #size: [2, 500] [batch size, max intruction len]
                lengths = lengths.cuda()  #instruction length
        word_embeddings = self.embedding(
            word_ids)  #size: [2, 500, 20] embedding size: 20
        batch_size = word_embeddings.size(0)  # size:2
        sentence_embeddings = Variable(
            empty_float_tensor(
                (batch_size,
                 self.lstm_size * self.factor * self.num_attn_heads),
                self.is_cuda, self.cuda_device))  #size [2,80]

        penal = 0

        for i in range(batch_size):
            length = int(lengths[i])
            if length == 0:
                print("Empty caption")
                continue
            embeddings_i = word_embeddings[i, 0:length].unsqueeze(
                1)  # size: [instruction length, 1, 20]
            h0 = Variable(
                empty_float_tensor(
                    (self.lstm_layers * self.factor, 1, self.lstm_size),
                    self.is_cuda))  #size: [2, 1, 40]
            c0 = Variable(
                empty_float_tensor(
                    (self.lstm_layers * self.factor, 1, self.lstm_size),
                    self.is_cuda))  #size: [2, 1, 40]
            outputs, states = self.lstm_txt(
                embeddings_i, (h0, c0)
            )  #output size: [intr_len, 1, 80]  #2 states: forward and backwward.  size: [2, 1, 40]
            H = outputs.squeeze(dim=1)  #size: [instr_len, 80]
            hidden, cell = (states[0].squeeze(dim=1), states[1].squeeze(dim=1)
                            )  #size: 2x[2,40]

            #self-attention
            s1 = self.W_s1(H)
            s2 = self.W_s2(F.tanh(s1))
            A = F.softmax(s2.t(), dim=1)
            M = torch.mm(A, H)

            AAt = torch.mm(A, A.t())
            for j in range(self.num_attn_heads):
                AAt[j, j] = 0
            p = torch.norm(AAt, 2)
            penal += p * p

        penal /= batch_size
        # Mean-reduce the 1st (sequence) dimension
        #sentence_embedding = torch.mean(M, 0) #size [80]
        sentence_embedding = M.view(-1)
        sentence_embeddings[i] = sentence_embedding.squeeze()

        if self.n_batch % 2000 == 0 and self.idx2word is not None:
            str_id = word_ids[-1][:length].data.cpu().numpy()
            instr = [self.idx2word[str(i)] for i in str_id]
            Att = A.data.cpu().numpy()
            filepath = get_self_attention_path(
            ) + "sample_instructions/sample_intr-{}-{}.txt".format(
                self.n_epoch, self.n_batch)
            # with open(filepath, "w") as f:
            #     for w in zip(instr, Att[0], Att[1], Att[2], Att[3], Att[4]):
            #         f.write(str(w)+"\n")

            imgpath = get_self_attention_path(
            ) + "instruction_heatmap/intr_heatmap-{}-{}.png".format(
                self.n_epoch, self.n_batch)

            # plt.close()
            plt.figure(figsize=(len(instr) / 6, 1.8))
            plt.pcolor(Att)
            plt.xticks(np.linspace(0.5,
                                   len(instr) - 0.5, len(instr)),
                       instr,
                       rotation=90,
                       fontsize=10)
            plt.gcf().subplots_adjust(bottom=0.5)
            plt.savefig(imgpath)
            # plt.show()
            self.n_batch += 1

        return sentence_embeddings, penal
Пример #5
0
    def sup_loss_on_batch(self, batch, eval):
        self.prof.tick("out")

        action_loss_total = Variable(
            empty_float_tensor([1], self.is_cuda, self.cuda_device))

        if batch is None:
            print("Skipping None Batch")
            return action_loss_total

        images = self.maybe_cuda(batch["images"])
        instructions = self.maybe_cuda(batch["instr"])
        instr_lengths = batch["instr_len"]
        actions = self.maybe_cuda(batch["actions"])

        metadata = batch["md"]

        batch_size = images.size(0)
        count = 0

        # Loop thru batch
        for b in range(batch_size):
            self.reset()
            self.prof.tick("out")
            b_seq_len = len_until_nones(metadata[b])

            # TODO: Generalize this
            # Slice the data according to the sequence length
            b_metadata = metadata[b][:b_seq_len]
            b_images = images[b][:b_seq_len]
            b_instructions = instructions[b][:b_seq_len]
            b_instr_len = instr_lengths[b][:b_seq_len]
            b_actions = actions[b][:b_seq_len]

            # ----------------------------------------------------------------------------

            self.prof.tick("inputs")

            actions = self(b_images, b_instructions, b_instr_len)

            action_losses, _ = self.action_loss(b_actions,
                                                actions,
                                                batchreduce=False)

            self.prof.tick("call")
            action_losses = self.action_loss.batch_reduce_loss(action_losses)
            action_loss = self.action_loss.reduce_loss(action_losses)
            action_loss_total = action_loss
            count += b_seq_len

            self.prof.tick("loss")

        action_loss_avg = action_loss_total / (count + 1e-9)

        self.prof.tick("out")

        prefix = self.model_name + ("/eval" if eval else "/train")

        self.writer.add_dict(prefix, get_current_meters(), self.get_iter())
        self.writer.add_scalar(prefix + "/action_loss",
                               action_loss_avg.data.cpu()[0], self.get_iter())

        total_loss = action_loss_avg

        self.inc_iter()

        self.prof.loop()
        self.prof.print_stats(1)

        return total_loss