Exemplo n.º 1
0
    def forward(self, map, goal_pos):
        batch_size = len(map)

        map = torch.cat(map, dim=0)

        if map.size(1) > self.channels_in:
            map = map[:, 0:self.channels_in, :, :]

        loss_out = None
        for i in range(batch_size):

            goal_pos_i = goal_pos[i]
            map_i = map[i:i+1]

            goal_coords_in_map = as_to_img(goal_pos_i, self.map_world_size).long()
            neg_samples = 1
            neg_coords_size = list(goal_coords_in_map.size())
            neg_coords_size[0] = neg_coords_size[0] * neg_samples
            all_coords_size = list(goal_coords_in_map.size())
            all_coords_size[0] += neg_coords_size[0]

            goal_negative_coords_in_map = empty_float_tensor(neg_coords_size)
            range_min = 0
            range_max = self.map_world_size
            goal_negative_coords_in_map.uniform_(range_min, range_max)

            goal_coords_in_map = goal_coords_in_map
            goal_negative_coords_in_map = goal_negative_coords_in_map

            goal_negative_coords_in_map = cuda_var(goal_negative_coords_in_map.long(), self.is_cuda, self.cuda_device)

            sample_pt_coords = torch.cat([goal_coords_in_map, goal_negative_coords_in_map], dim=0).long()
            sample_pt_labels = cuda_var(empty_float_tensor([all_coords_size[0]]).long(), self.is_cuda, self.cuda_device)
            sample_pt_labels[0] = 1
            sample_pt_labels[1:] = 0

            sample_pt_features = self.gather_2d(map_i, sample_pt_coords)

            if DBG:
                self.plot_pts(map[0], sample_pt_coords)

            pt_predictions = self.goal_linear(sample_pt_features)
            aux_loss_goal = self.loss(pt_predictions, sample_pt_labels)

            _, pred_idx = torch.max(pt_predictions.data, 1)
            correct = torch.sum((pred_idx == sample_pt_labels.data).long())
            total = float(len(sample_pt_labels))
            accuracy = correct / total
            self.accuracy_meter.put(accuracy)
            log_value(self.name + "/accuracy", self.accuracy_meter.get())

            if loss_out is None:
                loss_out = aux_loss_goal
            else:
                loss_out += aux_loss_goal

            # TODO: Consider batch size / count

        return loss_out, batch_size
Exemplo n.º 2
0
    def forward(self, instructions_batch):
        token_lists, _ = instructions_batch
        batch_size = len(token_lists)
        dims = (self.num_layers, batch_size, self.hidden_dim)
        hidden = (Variable(
            empty_float_tensor(*dims, self.is_cuda, self.cuda_device)),
                  Variable(
                      empty_float_tensor(*dims, self.is_cuda,
                                         self.cuda_device)))

        # pad text tokens with 0's
        text_lengths = np.array([len(tokens) for tokens in token_lists])
        tokens_batch = [[] for _ in range(batch_size)]
        for i in range(batch_size):
            num_zeros = text_lengths[0] - text_lengths[i]
            tokens_batch[i] = token_lists[i] + [0] * num_zeros
        tokens_batch = cuda_var(torch.from_numpy(np.array(tokens_batch)))

        # swap so batch dimension is second, sequence dimension is first
        tokens_batch = tokens_batch.transpose(0, 1)
        emb_sentence = self.embedding(tokens_batch)
        packed_input = pack_padded_sequence(emb_sentence, text_lengths)
        lstm_out_packed, _ = self.lstm(packed_input, hidden)
        # return average output embedding
        lstm_out, seq_lengths = pad_packed_sequence(lstm_out_packed)
        lstm_out = lstm_out.transpose(0, 1)
        sum_emb_list = []
        for i, seq_out in enumerate(lstm_out):
            seq_len = seq_lengths[i]
            sum_emb = torch.sum(seq_out[:seq_len], 0) / seq_len
            sum_emb_list.append(sum_emb.view(1, -1))
        return torch.cat(sum_emb_list)
Exemplo n.º 3
0
    def forward(self, images, instructions, instruction_masks):
        emb = self.sentence_embedding(instructions,
                                      torch.sum(instruction_masks, 1))

        # If the embedding returns an internal auxiliary, loss, pass it along
        emb_loss = cuda_var(torch.zeros([1]), self.is_cuda, self.cuda_device)
        if type(emb) is tuple:
            emb, emb_loss = emb

        feature_map = self.feature_net(images)
        feature_map = self.dropout2d(feature_map)

        if self.ground_loss:
            self.lang_filter.precompute_conv_weights(emb)
            ground_map = self.lang_filter(feature_map)
            feature_map = torch.cat([feature_map, ground_map], dim=1)

        # TODO: Testing breaking of gradients between ResNet and UNet
        if cut_gradients:
            feature_map_fwd = Variable(feature_map.data)
        else:
            feature_map_fwd = feature_map

        #if self.ground_loss:
        #    feature_map_fwd = feature_map_fwd[:, 0:3, :, :]

        pred_mask = self.unet(feature_map_fwd, emb)

        return pred_mask, feature_map, emb_loss
Exemplo n.º 4
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t, state, instruction, instr_len, plan=plan_now, pos_enc=step_enc)

        # Save materials for paper and presentation
        if False:
            self.save_viz(images_np_pure)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > 0.5 else 0
        output_action[3] = output_stop

        return output_action
Exemplo n.º 5
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        for tok in instruction:
            if tok >= self.params["vocab_size"] or tok < 0:
                raise Exception("Word embeddings out of bounds")
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            img_in_t = img_in_t.cuda(self.cuda_device)

        self.seq_step += 1

        action = self(img_in_t, instruction, instr_len)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if (stop_prob > 0.5
                            or self.seq_step >= self.trajectory_len - 5) else 0
        output_action[3] = output_stop

        #print("action: ", output_action)

        return output_action
Exemplo n.º 6
0
    def forward(self, cam_pose):
        batch_size = len(cam_pose)
        out_cpu = empty_float_tensor(
            [batch_size, self.map_size, self.map_size, 2])

        # TODO: parallel for loop this
        for i in range(batch_size):
            mapping_i_np = self.projector.get_projection_mapping(
                cam_pose[i].position.cpu().data.numpy(),
                cam_pose[i].orientation.cpu().data.numpy(),
                range1=True)
            mapping_i = torch.from_numpy(mapping_i_np).float()
            out_cpu[i, :, :, :] = mapping_i

        out = cuda_var(out_cpu, self.is_cuda, self.cuda_device)
        return out
Exemplo n.º 7
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        self.prev_instruction = instruction

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        self.seq_step += 1

        action = self(img_in_t, state, instruction, instr_len)

        output_action = action.squeeze().data.cpu().numpy()
        print("action: ", output_action)

        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_threshold"] else 0
        output_action[3] = output_stop

        return output_action
Exemplo n.º 8
0
 def cuda_var(self, tensor):
     return cuda_var(tensor, self.is_cuda, self.cuda_device)
Exemplo n.º 9
0
def train_top_down_pred():
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]
    launch_ui()

    env = PomdpInterface()

    print("model_name:", setup["top_down_model"])
    print("model_file:", setup["top_down_model_file"])

    model, model_loaded = load_model(
        model_name_override=setup["top_down_model"],
        model_file_override=setup["top_down_model_file"])

    exec_model, wrapper_model_loaded = load_model(
        model_name_override=setup["wrapper_model"],
        model_file_override=setup["wrapper_model_file"])

    affine2d = Affine2D()
    if model.is_cuda:
        affine2d.cuda()

    eval_envs = get_correct_eval_env_id_list()
    print("eval_envs:", eval_envs)
    train_instructions, dev_instructions, test_instructions, corpus = get_all_instructions(
        max_size=setup["max_envs"])
    all_instr = {
        **train_instructions,
        **dev_instructions,
        **train_instructions
    }
    token2term, word2token = get_word_to_token_map(corpus)

    dataset = model.get_dataset(envs=eval_envs,
                                dataset_name="supervised",
                                eval=True,
                                seg_level=False)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=1,
                            pin_memory=True)

    for b, batch in list(enumerate(dataloader)):
        print("batch:", batch)
        images = batch["images"]
        instructions = batch["instr"]
        label_masks = batch["traj_labels"]
        affines = batch["affines_g_to_s"]
        env_ids = batch["env_id"]
        set_idxs = batch["set_idx"]
        seg_idxs = batch["seg_idx"]

        env_id = env_ids[0][0]
        set_idx = set_idxs[0][0]
        print("env_id of this batch:", env_id)
        env.set_environment(
            env_id, instruction_set=all_instr[env_id][set_idx]["instructions"])
        env.reset(0)

        num_segments = len(instructions[0])
        print("num_segments in this batch:", num_segments)
        write_instruction("")
        write_real_instruction("None")
        instruction_str = read_instruction_file()
        print("Initial instruction: ", instruction_str)

        # TODO: Reset model state here if we keep any temporal memory etc
        for s in range(num_segments):
            start_state = env.reset(s)
            keep_going = True
            real_instruction = cuda_var(instructions[0][s], setup["cuda"], 0)
            tmp = list(real_instruction.data.cpu()[0].numpy())
            real_instruction_str = debug_untokenize_instruction(tmp)
            write_real_instruction(real_instruction_str)
            #write_instruction(real_instruction_str)
            #instruction_str = real_instruction_str

            image = cuda_var(images[0][s], setup["cuda"], 0)
            label_mask = cuda_var(label_masks[0][s], setup["cuda"], 0)
            affine_g_to_s = affines[0][s]
            print("Your current environment:")
            with open(
                    "/storage/dxsun/unreal_config_nl/configs/configs/random_config_"
                    + str(env_id) + ".json") as fp:
                config = json.load(fp)
            print(config)
            while keep_going:
                write_real_instruction(real_instruction_str)

                while True:
                    cv2.waitKey(200)
                    instruction = read_instruction_file()
                    if instruction == "CMD: Next":
                        print("Advancing")
                        keep_going = False
                        write_empty_instruction()
                        break
                    elif instruction == "CMD: Reset":
                        print("Resetting")
                        env.reset(s)
                        write_empty_instruction()
                    elif len(instruction.split(" ")) > 1:
                        instruction_str = instruction
                        print("Executing: ", instruction_str)
                        break

                if not keep_going:
                    continue

                #instruction_str = read_instruction_file()
                # TODO: Load instruction from file
                tok_instruction = tokenize_instruction(instruction_str,
                                                       word2token)
                instruction_t = torch.LongTensor(tok_instruction).unsqueeze(0)
                instruction_v = cuda_var(instruction_t, setup["cuda"], 0)
                instruction_mask = torch.ones_like(instruction_v)
                tmp = list(instruction_t[0].numpy())
                instruction_dbg_str = debug_untokenize_instruction(
                    tmp, token2term)

                # import matplotlib.pyplot as plt
                #plt.plot(image.squeeze(0).permute(1,2,0).cpu().numpy())
                #plt.show()

                res = model(image, instruction_v, instruction_mask)
                mask_pred = res[0]
                shp = mask_pred.shape
                mask_pred = F.softmax(mask_pred.view([2, -1]), 1).view(shp)
                #mask_pred = softmax2d(mask_pred)

                # TODO: Rotate the mask_pred to the global frame
                affine_s_to_g = np.linalg.inv(affine_g_to_s)
                S = 8.0
                affine_scale_up = np.asarray([[S, 0, 0], [0, S, 0], [0, 0, 1]])
                affine_scale_down = np.linalg.inv(affine_scale_up)

                affine_pred_to_g = np.dot(
                    affine_scale_down, np.dot(affine_s_to_g, affine_scale_up))
                #affine_pred_to_g_t = torch.from_numpy(affine_pred_to_g).float()

                mask_pred_np = mask_pred.data.cpu().numpy()[0].transpose(
                    1, 2, 0)
                mask_pred_g_np = apply_affine(mask_pred_np, affine_pred_to_g,
                                              32, 32)
                print("Sum of global mask: ", mask_pred_g_np.sum())
                mask_pred_g = torch.from_numpy(
                    mask_pred_g_np.transpose(2, 0,
                                             1)).float()[np.newaxis, :, :, :]
                exec_model.set_ground_truth_visitation_d(mask_pred_g)

                # Create a batch axis for pytorch
                #mask_pred_g = affine2d(mask_pred, affine_pred_to_g_t[np.newaxis, :, :])

                mask_pred_np[:, :, 0] -= mask_pred_np[:, :, 0].min()
                mask_pred_np[:, :, 0] /= (mask_pred_np[:, :, 0].max() + 1e-9)
                mask_pred_np[:, :, 0] *= 2.0
                mask_pred_np[:, :, 1] -= mask_pred_np[:, :, 1].min()
                mask_pred_np[:, :, 1] /= (mask_pred_np[:, :, 1].max() + 1e-9)

                presenter = Presenter()
                presenter.show_image(mask_pred_g_np,
                                     "mask_pred_g",
                                     torch=False,
                                     waitkey=1,
                                     scale=4)
                #import matplotlib.pyplot as plt
                #print("image.data shape:", image.data.cpu().numpy().shape)
                #plt.imshow(image.data.squeeze().permute(1,2,0).cpu().numpy())
                #plt.show()
                # presenter.show_image(image.data, "mask_pred_g", torch=False, waitkey=1, scale=4)
                #import pdb; pdb.set_trace()
                pred_viz_np = presenter.overlaid_image(image.data,
                                                       mask_pred_np,
                                                       channel=0)
                # TODO: Don't show labels
                # TODO: OpenCV colours
                #label_mask_np = p.data.cpu().numpy()[0].transpose(1,2,0)
                labl_viz_np = presenter.overlaid_image(image.data,
                                                       label_mask.data,
                                                       channel=0)
                viz_img_np = np.concatenate((pred_viz_np, labl_viz_np), axis=1)
                viz_img_np = pred_viz_np

                viz_img = presenter.overlay_text(viz_img_np,
                                                 instruction_dbg_str)
                cv2.imshow("interactive viz", viz_img)
                cv2.waitKey(100)

                rollout_model(exec_model, env, env_ids[0][s], set_idxs[0][s],
                              seg_idxs[0][s], tok_instruction)
                write_instruction("")
Exemplo n.º 10
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        instruction_str = debug_untokenize_instruction(instruction)

        # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps)
        if first_step:
            if self.rviz is not None:
                self.rviz.publish_instruction_text(
                    "instruction", debug_untokenize_instruction(instruction))

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t,
                      state,
                      instruction,
                      instr_len,
                      plan=plan_now,
                      pos_enc=step_enc)

        passive_mode_debug_projections = True
        if passive_mode_debug_projections:
            self.show_landmark_locations(loop=False, states=state)
            self.reset()

        # Run auxiliary objectives for debugging purposes (e.g. to compute classification predictions)
        if self.params.get("run_auxiliaries_at_test_time"):
            _, _ = self.aux_losses.calculate_aux_loss(self.tensor_store,
                                                      reduce_average=True)
            overlaid = self.get_overlaid_classification_results(
                whole_batch=False)

        # Save materials for analysis and presentation
        if self.params["write_figures"]:
            self.save_viz(images_np_pure, instruction_str)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        output_stop = 1 if stop_prob > self.params["stop_p"] else 0
        output_action[3] = output_stop

        return output_action
Exemplo n.º 11
0
    def sup_loss_on_batch(self, batch, eval=False, viz=False):

        if eval:
            self.eval()
        else:
            self.train()

        images = cuda_var(batch["images"], self.is_cuda, self.cuda_device)
        instructions = cuda_var(batch["instr"], self.is_cuda, self.cuda_device)
        instruction_masks = cuda_var(batch["instr_mask"], self.is_cuda,
                                     self.cuda_device)
        label_masks = cuda_var(batch["traj_labels"], self.is_cuda,
                               self.cuda_device)

        # Each of the above is a list of lists of tensors, where the outer list is over the batch and the inner list
        # is over the segments. Loop through and accumulate loss for each batch sequentially, and for each segment.
        # Reset model state (embedding etc) between batches, but not between segments.
        # We don't process each batch in batch-mode, because it's complicated, with the varying number of segments and all.

        batch_size = len(images)
        total_class_loss = Variable(empty_float_tensor([1], self.is_cuda,
                                                       self.cuda_device),
                                    requires_grad=True)
        total_ground_loss = Variable(empty_float_tensor([1], self.is_cuda,
                                                        self.cuda_device),
                                     requires_grad=True)
        count = 0

        label_masks = self.label_pool(label_masks)
        mask_pred, features, emb_loss = self(images, instructions,
                                             instruction_masks)

        if BCE:
            mask_pred_flat = mask_pred.view(-1, 1)
            label_masks_flat = label_masks - torch.min(label_masks)
            label_masks_flat = label_masks_flat / (
                torch.max(label_masks_flat) + 1e-9)
            label_masks_flat = label_masks_flat.view(-1, 1).clamp(0, 1)
            main_loss = self.mask_loss(mask_pred_flat, label_masks_flat)

        elif NLL:
            mask_pred_1 = F.softmax(mask_pred, 1, _stacklevel=5)
            mask_pred_2 = 1 - mask_pred_1
            mask_pred_1 = mask_pred_1.unsqueeze(1)
            mask_pred_2 = mask_pred_2.unsqueeze(1)
            mask_pred = torch.cat((mask_pred_1, mask_pred_2), dim=1)
            label_masks = label_masks.clamp(0, 1)
            if self.is_cuda:
                label_masks = label_masks.type(torch.cuda.LongTensor)
            else:
                label_masks = label_masks.type(torch.LongTensor)
            main_loss = self.mask_loss(mask_pred, label_masks)

        elif CE:
            # Crossentropy2D internally applies logsoftmax to mask_pred,
            # but labels are already assumed to be a valid probability distribution, so no softmax is applied
            main_loss = self.mask_loss(mask_pred, label_masks)
            # So for nice plotting, we must manually do it
            mask_pred = self.spatialsoftmax(mask_pred)
        else:
            main_loss = self.mask_loss(mask_pred, label_masks)

        # sum emb loss if batch size > 1
        if type(emb_loss) == tuple:
            emb_loss = sum(emb_loss)

        # Extract the feature vectors corresponding to every landmark's location in the map
        # Apply a linear layer to classify which of the 64 landmarks it is
        # The landmark positions have to be divided by the same factor as the ResNet scaling factor
        lcount = 0
        for i in range(batch_size):
            if self.class_loss and len(batch["lm_pos"][i]) > 0:
                lcount += 1
                landmark_pos = cuda_var(batch["lm_pos"][i], self.is_cuda,
                                        self.cuda_device)
                landmark_indices = cuda_var(batch["lm_indices"][i],
                                            self.is_cuda, self.cuda_device)
                landmark_coords = (landmark_pos / 8).long()
                lm_features = self.gather2d(features[i:i + 1, 0:32],
                                            landmark_coords)
                lm_pred = self.aux_class_linear(lm_features)
                class_loss = self.aux_loss(lm_pred, landmark_indices)
                total_class_loss = total_class_loss + class_loss

            if self.ground_loss and len(batch["lm_pos"][i]) > 0:
                landmark_pos = cuda_var(batch["lm_pos"][i], self.is_cuda,
                                        self.cuda_device)
                landmark_mentioned = cuda_var(batch["lm_mentioned"][i],
                                              self.is_cuda, self.cuda_device)
                landmark_coords = (landmark_pos / 8).long()
                g_features = self.gather2d(features[i:i + 1, 32:35],
                                           landmark_coords)
                lm_pred = self.aux_ground_linear(g_features)
                ground_loss = self.aux_loss(lm_pred, landmark_mentioned)
                total_ground_loss = total_ground_loss + ground_loss

        total_class_loss = total_class_loss / (lcount + 1e-9)
        total_ground_loss = total_ground_loss / (lcount + 1e-9)
        count += 1

        # Just visualization and debugging code
        if self.get_iter() % 50 == 0:
            presenter = Presenter()
            pred_viz_np = presenter.overlaid_image(images[0].data,
                                                   mask_pred[0].data)
            labl_viz_np = presenter.overlaid_image(images[0].data,
                                                   label_masks[0].data)
            comp = np.concatenate((pred_viz_np, labl_viz_np), axis=1)
            presenter.show_image(comp, "path_pred")

            if hasattr(self.sentence_embedding, "save_att_map"):
                self.sentence_embedding.save_att_map(self.get_iter(), i)

        total_loss = main_loss + 0.1 * total_class_loss + 0.001 * emb_loss + 0.1 * total_ground_loss
        total_loss = total_loss / (count + 1e-9)

        self.write_summaires("eval" if eval else "train", self.get_iter(),
                             total_loss, main_loss, emb_loss, total_class_loss,
                             total_ground_loss)
        self.inc_iter()

        return total_loss
Exemplo n.º 12
0
def evaluate():
    P.initialize_experiment()

    model, model_loaded = load_model()
    eval_envs = get_correct_eval_env_id_list()

    model.eval()
    dataset_name = P.get_current_parameters().get("Data").get("dataset_name")
    dataset = model.get_dataset(data=None,
                                envs=eval_envs,
                                dataset_prefix=dataset_name,
                                dataset_prefix="supervised",
                                eval=eval,
                                seg_level=False)
    dataloader = DataLoader(dataset,
                            collate_fn=dataset.collate_fn,
                            batch_size=1,
                            shuffle=False,
                            num_workers=4,
                            pin_memory=True,
                            timeout=0)

    count = 0
    success = 0
    total_dist = 0

    for batch in dataloader:
        if batch is None:
            print("None batch!")
            continue

        images = batch["images"]
        instructions = batch["instr"]
        label_masks = batch["traj_labels"]

        # Each of the above is a list of lists of tensors, where the outer list is over the batch and the inner list
        # is over the segments. Loop through and accumulate loss for each batch sequentially, and for each segment.
        # Reset model state (embedding etc) between batches, but not between segments.
        # We don't process each batch in batch-mode, because it's complicated, with the varying number of segments and all.
        # TODO: This code is outdated and wrongly discretizes the goal location. Grab the fixed version from the old branch.

        batch_size = len(images)
        print("batch: ", count)
        print("successes: ", success)

        for i in range(batch_size):
            num_segments = len(instructions[i])

            for s in range(num_segments):
                instruction = cuda_var(instructions[i][s], model.is_cuda,
                                       model.cuda_device)
                instruction_mask = torch.ones_like(instruction)
                image = cuda_var(images[i][s], model.is_cuda,
                                 model.cuda_device)
                label_mask = cuda_var(label_masks[i][s], model.is_cuda,
                                      model.cuda_device)

                label_mask = model.label_pool(label_mask)

                goal_mask_l = label_mask[0, 1, :, :]
                goal_mask_l_np = goal_mask_l.data.cpu().numpy()
                goal_mask_l_flat = np.reshape(goal_mask_l_np, [-1])
                max_index_l = np.argmax(goal_mask_l_flat)
                argmax_loc_l = np.asarray([
                    int(max_index_l / goal_mask_l_np.shape[1]),
                    int(max_index_l % goal_mask_l_np.shape[1])
                ])

                if np.sum(goal_mask_l_np) < 0.01:
                    continue

                mask_pred, features, emb_loss = model(image, instruction,
                                                      instruction_mask)
                goal_mask = mask_pred[0, 1, :, :]
                goal_mask_np = goal_mask.data.cpu().numpy()
                goal_mask_flat = np.reshape(goal_mask_np, [-1])
                max_index = np.argmax(goal_mask_flat)

                argmax_loc = np.asarray([
                    int(max_index / goal_mask_np.shape[1]),
                    int(max_index % goal_mask_np.shape[1])
                ])

                dist = np.linalg.norm(argmax_loc - argmax_loc_l)
                if dist < OK_DIST:
                    success += 1
                count += 1
                total_dist += dist

    print("Correct goal predictions: ", success)
    print("Total evaluations: ", count)
    print("total dist: ", total_dist)
    print("avg dist: ", total_dist / float(count))
    print("success rate: ", success / float(count))
Exemplo n.º 13
0
 def reset(self):
     self.last_h = cuda_var(torch.zeros(1, 1, self.hidden_size),
                            self.is_cuda, self.cuda_device)
     self.last_c = cuda_var(torch.zeros(1, 1, self.hidden_size),
                            self.is_cuda, self.cuda_device)
Exemplo n.º 14
0
    def get_action(self, state, instruction):
        """
        Given a DroneState (from PomdpInterface) and instruction, produce a numpy 4D action (x, y, theta, pstop)
        :param state: DroneState object with the raw image from the simulator
        :param instruction: Tokenized instruction given the corpus
        #TODO: Absorb corpus within model
        :return:
        """
        # TODO: Simplify this
        self.eval()
        images_np_pure = state.image
        state_np = state.state

        #print("Act: " + debug_untokenize_instruction(instruction))

        images_np = standardize_image(images_np_pure)
        image_fpv = Variable(none_padded_seq_to_tensor([images_np]))
        state = Variable(none_padded_seq_to_tensor([state_np]))
        # Add the batch dimension

        first_step = True
        if instruction == self.prev_instruction:
            first_step = False
        self.prev_instruction = instruction
        instruction_str = debug_untokenize_instruction(instruction)

        # TODO: Move this to PomdpInterface (for now it's here because this is already visualizing the maps)
        if first_step:
            if self.rviz is not None:
                self.rviz.publish_instruction_text(
                    "instruction", debug_untokenize_instruction(instruction))
        #if first_step:
        #    say(debug_untokenize_instruction(instruction))

        img_in_t = image_fpv
        img_in_t.volatile = True

        instr_len = [len(instruction)] if instruction is not None else None
        instruction = torch.LongTensor(instruction).unsqueeze(0)
        instruction = cuda_var(instruction, self.is_cuda, self.cuda_device)

        state.volatile = True

        if self.is_cuda:
            if img_in_t is not None:
                img_in_t = img_in_t.cuda(self.cuda_device)
            state = state.cuda(self.cuda_device)

        step_enc = None
        plan_now = None

        self.seq_step += 1

        action = self(img_in_t,
                      state,
                      instruction,
                      instr_len,
                      plan=plan_now,
                      pos_enc=step_enc)

        # Save materials for analysis and presentation
        if self.params["write_figures"]:
            self.save_viz(images_np_pure, instruction_str)

        output_action = action.squeeze().data.cpu().numpy()
        stop_prob = output_action[3]
        print(f"P(STOP): {stop_prob}")
        output_stop = 1 if stop_prob > self.params["stop_p"] else 0
        output_action[3] = output_stop

        return output_action