Exemplo n.º 1
0
def get_pool_loc(ims,
                 image_id,
                 flag_,
                 size=(7, 7),
                 spatial_scale=1,
                 batch_size=1):
    spatial_locs = []
    union_box_out = []
    pers_out = []
    objs_out = []

    flag = 'train'
    max_pool = nn.AdaptiveMaxPool2d(size)

    for batch in range(batch_size):
        this_image = int(image_id[batch])

        if int(flag_[batch][0]) == 0:
            flag = 'train'
        elif int(flag_[batch][0]) == 2:
            flag = 'test'

        a = helpers_pre.get_compact_detections(this_image, flag)
        roi_pers, roi_objs = a['person_bbx'], a['objects_bbx']
        union_box = helpers_pre.get_attention_maps(this_image, flag)
        union_box_out.append(torch.tensor(union_box).cuda().float())

        W, H, C = ims[batch].size()[1], ims[batch].size()[2], ims[batch].size(
        )[0]
        spatial_scale = [W, H, W, H]
        image_this_batch = ims[batch]
        roi_pers = roi_pers * spatial_scale
        roi_objs = roi_objs * spatial_scale

        ##### Pooling Persons ##########
        for index, roi_val in enumerate(roi_pers):
            x1, y1, x2, y2 = int(roi_val[0]), int(roi_val[1]), int(
                roi_val[2]), int(roi_val[3])
            sp = [x1, y1, x2, y2, x2 - x1, y2 - y1]
            im = image_this_batch.narrow(
                0, 0,
                image_this_batch.size()[0])[..., y1:(y2 + 1), x1:(x2 + 1)]
            pooled = max_pool(im)
            pers_out.append((pooled))
            spatial_locs.append(sp)

    ### Pooling Objects #####
        for index, roi_val in enumerate(roi_objs):
            x1, y1, x2, y2 = int(roi_val[0]), int(roi_val[1]), int(
                roi_val[2]), int(roi_val[3])
            sp = [x1, y1, x2, y2, x2 - x1, y2 - y1]
            im = image_this_batch.narrow(
                0, 0,
                image_this_batch.size()[0])[..., y1:(y2 + 1), x1:(x2 + 1)]
            pooled = max_pool(im)
            objs_out.append((pooled))
            spatial_locs.append(sp)
    #import pdb;pdb.set_trace()
    return torch.stack(pers_out), torch.stack(
        objs_out), spatial_locs, torch.cat(union_box_out)
Exemplo n.º 2
0
def train_test(
    model,
    optimizer,
    scheduler,
    dataloader,
    number_of_epochs,
    break_point,
    saving_epoch,
    folder_name,
    batch_size,
    infr,
    start_epoch,
    mean_best,
    visualize,
):
    global positive_count, negative_count

    #### Creating the folder where the results would be stored##########

    try:
        os.mkdir(folder_name)
    except OSError as exc:
        if exc.errno != errno.EEXIST:
            raise
        pass
    file_name = folder_name + '/' + 'result.pickle'

    ####################################################################

    loss_epoch_train = []
    loss_epoch_val = []
    loss_epoch_test = []
    initial_time = time.time()
    result = []

    lst_feat_vis = []
    lst_feat_int = []
    lst_feat_grp = []
    lst_feat_att = []
    lst_label = []

    # #### Freeing out the cache memories from gpus and declaring the phases######

    torch.cuda.empty_cache()
    phases = ['train', 'val', 'test']

    if infr == 't' and visualize == 'f':  # ## If running from a pretrained model only for saving best result ######
        start_epoch = start_epoch - 1
        phases = ['test']
        end_of_epochs = start_epoch + 1
        print('Only doing testing for storing result from a model')
    elif visualize != 'f':

        if visualize not in phases:
            print(
                'ERROR! Asked to show result from a unknown set.The choice should be among train,val,test'
            )
            return
        else:
            phases = [visualize]
            end_of_epochs = start_epoch + 1
            print('Only showing predictions from a model')
    else:
        end_of_epochs = start_epoch + number_of_epochs

# #### Starting the Epochs#############

#----Save seed model-------------------------
    save_checkpoint(
        {
            'epoch': 'seed',
            'state_dict': model.state_dict(),
            'mean_best': mean_best,
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
        },
        filename=folder_name + '/' + 'seed_' + 'checkpoint.pth.tar')
    #--------------------------------------------
    for epoch in range(start_epoch, end_of_epochs):
        scheduler.step()
        print('Epoch {}/{}'.format(epoch + 1, end_of_epochs))
        print('-' * 10)

        print('Lr: {}'.format(scheduler.get_lr()))

        initial_time_epoch = time.time()

        for phase in phases:
            if phase == 'train':
                model.train()
            elif phase == 'val':
                model.train()
            else:
                model.eval()

            print('In {}'.format(phase))
            detections_train = []
            detections_val = []
            detections_test = []

            true_scores_class = np.ones([1, 80], dtype=int)
            true_scores = np.ones([1, 29], dtype=int)
            true_scores_single = np.ones([1, 1], dtype=int)
            predicted_scores = np.ones([1, 29], dtype=float)
            predicted_scores_single = np.ones([1, 1], dtype=float)
            predicted_scores_class = np.ones([1, 80], dtype=float)
            acc_epoch = 0
            iteration = 1

            torch.cuda.empty_cache()

            # ###Starting the iterations##################

            for (iterr, i) in enumerate(tqdm(dataloader[phase])):
                if iterr % 20 == 0:
                    torch.cuda.empty_cache()

                inputs = i[0].to(device)
                labels = i[1].to(device)
                labels_single = i[2].to(device)
                image_id = i[3]
                pairs_info = i[4]
                ambiguity_score = i[5]
                class_dist = i[6]
                class_bias = i[7]
                minbatch_size = len(pairs_info)

                optimizer.zero_grad()
                if phase == 'train':
                    nav = torch.tensor([[0, epoch]] * minbatch_size).to(device)
                elif phase == 'val':
                    nav = torch.tensor([[1, epoch]] * minbatch_size).to(device)
                else:
                    nav = torch.tensor([[2, epoch]] * minbatch_size).to(device)

            # import pdb;pdb.set_trace()........

                true = labels.data.cpu().numpy()
                true_single = labels_single.data.cpu().numpy()

                with torch.set_grad_enabled(phase == 'train'
                                            or phase == 'val'):
                    model_out = model(
                        inputs,
                        pairs_info,
                        pairs_info,
                        image_id,
                        nav,
                        phase,
                    )
                    outputs = model_out[0]
                    outputs_single = model_out[1]
                    outputs_combine = model_out[2]
                    outputs_gem = model_out[3]

                    # if infr == 't' and phase == 'test':
                    # if phase == 'train' or phase == 'val':
                    #     # out_feat_vis = model_out[4].cpu().numpy()
                    #     # out_feat_int = model_out[5].cpu().numpy()
                    #     # out_feat_grp = model_out[6].cpu().numpy()
                    #     # out_feat_att = model_out[7].cpu().numpy()
                    #     out_labels = labels.cpu().numpy()

                    #     elim_label = labels.sum(1).cpu().numpy()
                    #     elim_idx = np.where(elim_label==0)[0]

                    #     # out_feat_vis = np.delete(out_feat_vis, elim_idx, axis=0)
                    #     # out_feat_int = np.delete(out_feat_int, elim_idx, axis=0)
                    #     # out_feat_grp = np.delete(out_feat_grp, elim_idx, axis=0)
                    #     # out_feat_att = np.delete(out_feat_att, elim_idx, axis=0)
                    #     out_label = np.delete(out_labels, elim_idx, axis=0)

                    #     # lst_feat_vis.append(out_feat_vis)
                    #     # lst_feat_int.append(out_feat_int)
                    #     # lst_feat_grp.append(out_feat_grp)
                    #     # lst_feat_att.append(out_feat_att)
                    #     lst_label.append(out_label)
                    #     print(len(lst_label))

                    # outputs_pose=model_out[7]

                    predicted_HOI = sigmoid(outputs).data.cpu().numpy()
                    predicted_HOI_combine = \
                        sigmoid(outputs_combine).data.cpu().numpy()
                    predicted_single = \
                        sigmoid(outputs_single).data.cpu().numpy()
                    predicted_gem = \
                        sigmoid(outputs_gem).data.cpu().numpy()
                    predicted_HOI_pair = predicted_HOI

                    # predicted_HOI_pose=sigmoid(outputs_pose).data.cpu().numpy()

                    start_index = 0
                    start_obj = 0
                    start_pers = 0
                    start_tot = 0
                    pers_index = 1
                    persons_score_extended = np.zeros([1, 1])
                    objects_score_extended = np.zeros([1, 1])
                    class_ids_extended = np.zeros([1, 1])
                    persons_np_extended = np.zeros([1, 4])
                    objects_np_extended = np.zeros([1, 4])
                    start_no_obj = 0
                    class_ids_total = []

                    # ############ Extending Person and Object Boxes and confidence scores to Multiply with all Pairs##########

                    for batch in range(len(pairs_info)):

                        persons_score = []
                        objects_score = []
                        class_ids = []
                        objects_score.append(float(1))

                        this_image = int(image_id[batch])
                        scores_total = \
                            helpers_pre.get_compact_detections(this_image,
                                phase)
                        (persons_score, objects_score, persons_np,
                         objects_np, class_ids) = \
                            (scores_total['person_bbx_score'],
                             scores_total['objects_bbx_score'],
                             scores_total['person_bbx'],
                             scores_total['objects_bbx'],
                             scores_total['class_id_objects'])
                        temp_scores = \
                            extend(np.array(persons_score).reshape(len(persons_score),
                                   1), int(pairs_info[batch][1]))
                        persons_score_extended = \
                            np.concatenate([persons_score_extended,
                                temp_scores])
                        temp_scores = extend(persons_np,
                                             int(pairs_info[batch][1]))
                        persons_np_extended = \
                            np.concatenate([persons_np_extended,
                                temp_scores])
                        temp_scores = \
                            extend_object(np.array(objects_score).reshape(len(objects_score),
                                1), int(pairs_info[batch][0]))
                        objects_score_extended = \
                            np.concatenate([objects_score_extended,
                                temp_scores])
                        temp_scores = extend_object(objects_np,
                                                    int(pairs_info[batch][0]))
                        objects_np_extended = \
                            np.concatenate([objects_np_extended,
                                temp_scores])
                        temp_scores = \
                            extend_object(np.array(class_ids).reshape(len(class_ids),
                                1), int(pairs_info[batch][0]))
                        class_ids_extended = \
                            np.concatenate([class_ids_extended,
                                temp_scores])
                        class_ids_total.append(class_ids)

                        start_pers = start_pers \
                            + int(pairs_info[batch][0])
                        start_obj = start_obj \
                            + int(pairs_info[batch][1])
                        start_tot = start_tot \
                            + int(pairs_info[batch][1]) \
                            * int(pairs_info[batch][0])

                    # ##################################################################################################################

                # ### Applying LIS#######

                    persons_score_extended = \
                        LIS(persons_score_extended, 8.3, 12, 10)
                    objects_score_extended = \
                        LIS(objects_score_extended, 8.3, 12, 10)

                    # #################################

                    # #### Multiplying the score from different streams along with the prior function from ican##########

                    predicted_HOI = predicted_HOI \
                        * predicted_HOI_combine * predicted_single \
                        * predicted_gem * objects_score_extended[1:] \
                        * persons_score_extended[1:]
                    loss_mask = \
                        prior.apply_prior(class_ids_extended[1:],
                            predicted_HOI)
                    predicted_HOI = loss_mask * predicted_HOI

                    # ### Calculating Loss############

                    N_b = minbatch_size * 29  # *int(total_elements[0])#*29 #pairs_info[1]*pairs_info[2]*pairs_info[3]
                    hum_obj_mask = \
                        torch.Tensor(objects_score_extended[1:]
                            * persons_score_extended[1:]
                            * loss_mask).cuda()
                    # if epoch < 12:
                    # lossf = torch.sum(loss_com_combine(sigmoid(outputs)
                    #         * sigmoid(outputs_combine)
                    #         * sigmoid(outputs_single) * hum_obj_mask
                    #         * sigmoid(outputs_gem), labels.float())) \
                    #     / N_b

                    #----------------Modified Loss---------------------------------
                    # Binary Cross Entropy
                    # else:
                    # lossf = torch.sum(
                    #         loss_com_balanced(
                    #             {'outputs':outputs, 'outputs_combine':outputs_combine,
                    #             'outputs_single':outputs_single, 'outputs_gem':outputs_gem,
                    #             'hum_obj_mask':hum_obj_mask},
                    #             labels.float(),
                    #             ambiguity_score,
                    #             class_dist,
                    #             class_bias)) \
                    #         / N_b
                    # Focal Loss
                    lossf = torch.sum(loss_com_focal_balanced(sigmoid(outputs)
                            * sigmoid(outputs_combine)
                            * sigmoid(outputs_single) * hum_obj_mask
                            * sigmoid(outputs_gem), labels.float())) \
                        / N_b
                    #--------------------------------------------------------------
                    lossc = lossf.item()

                    acc_epoch += lossc
                    iteration += 1
                    if phase == 'train' or phase == 'val':  # ### Flowing the loss backwards#########
                        lossf.backward()
                        optimizer.step()

                # ##########################################################

                    del lossf
                    del model_out
                    del inputs
                    del outputs
                    del labels
                    del labels_single
                    del ambiguity_score
                    del class_dist
                    del class_bias

            # ###### If we want to do Visualization#########....

                if visualize != 'f':
                    viss.visual(
                        image_id,
                        phase,
                        pairs_info,
                        predicted_HOI,
                        predicted_single,
                        objects_score_extended[1:],
                        persons_score_extended[1:],
                        predicted_HOI_combine,
                        predicted_HOI_pair,
                        true,
                    )

            # ####################################################################

            # #### Preparing for Storing Results##########....

                predicted_scores = np.concatenate(
                    (predicted_scores, predicted_HOI), axis=0)
                true_scores = np.concatenate((true_scores, true), axis=0)
                predicted_scores_single = \
                    np.concatenate((predicted_scores_single,
                                   predicted_single), axis=0)
                true_scores_single = \
                    np.concatenate((true_scores_single, true_single),
                                   axis=0)

                # ############################################

                # ### Storing the result in V-COCO Format##########

                if phase == 'test':
                    if (epoch + 1) % saving_epoch == 0 or infr == 't':
                        all_scores = filtering(
                            predicted_HOI,
                            true,
                            persons_np_extended[1:],
                            objects_np_extended[1:],
                            predicted_single,
                            pairs_info,
                            image_id,
                        )

                        # prep.infer_format(image_id,all_scores,phase,detections_test,pairs_info)

                        proper.infer_format(image_id, all_scores, phase,
                                            detections_test, pairs_info)

            # #####################################################........

            # # Breaking in particular number of epoch####

                if iteration == break_point + 1:

                    break

            # ############################################

            if phase == 'train':
                loss_epoch_train.append(acc_epoch)
                (AP, AP_single) = ap.class_AP(predicted_scores[1:, :],
                                              true_scores[1:, :],
                                              predicted_scores_single[1:],
                                              true_scores_single[1:])
                AP_train = pd.DataFrame(AP,
                                        columns=['Name_TRAIN', 'Score_TRAIN'])
                AP_train_single = pd.DataFrame(
                    AP_single, columns=['Name_TRAIN', 'Score_TRAIN'])
            elif phase == 'val':

                loss_epoch_val.append(acc_epoch)
                (AP, AP_single) = ap.class_AP(predicted_scores[1:, :],
                                              true_scores[1:, :],
                                              predicted_scores_single[1:],
                                              true_scores_single[1:])
                AP_val = pd.DataFrame(AP, columns=['Name_VAL', 'Score_VAL'])
                AP_val_single = pd.DataFrame(AP_single,
                                             columns=['Name_VAL', 'Score_VAL'])
            elif phase == 'test':

                loss_epoch_test.append(acc_epoch)
                (AP, AP_single) = ap.class_AP(predicted_scores[1:, :],
                                              true_scores[1:, :],
                                              predicted_scores_single[1:],
                                              true_scores_single[1:])
                AP_test = pd.DataFrame(AP, columns=['Name_TEST', 'Score_TEST'])
                AP_test_single = pd.DataFrame(
                    AP_single, columns=['Name_TEST', 'Score_TEST'])
                if (epoch + 1) % saving_epoch == 0 or infr == 't':
                    file_name_p = folder_name + '/' \
                        + 'test{}.pickle'.format(epoch + 1)
                    with open(file_name_p, 'wb') as handle:
                        pickle.dump(detections_test, handle)

            # Save feature
            # if infr == 't' and phase == 'test':
            # np.save(folder_name+'/feat_vis', np.vstack(lst_feat_vis))
            # np.save(folder_name+'/feat_int', np.vstack(lst_feat_int))
            # np.save(folder_name+'/feat_grp', np.vstack(lst_feat_grp))
            # np.save(folder_name+'/feat_att', np.vstack(lst_feat_att))
        # np.save(folder_name+'/labels_train', np.vstack(lst_label))
        # print('Saved.')

        # ##### Saving the Model###########
        mean = AP_test.to_records(index=False)[29][1]

        # ###Best Model######

        if mean > mean_best and infr != 't':
            mean_best = mean
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'mean_best': mean_best,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                },
                filename=folder_name + '/' + 'bestcheckpoint.pth.tar')

    # ##############################

        if (epoch + 1) % saving_epoch == 0 and infr != 't':

            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'mean_best': mean_best,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict(),
                },
                filename=folder_name + '/' + str(epoch + 1) +
                'checkpoint.pth.tar')

    # ####################################

        if infr == 't':

            AP_final = pd.concat([AP_test], axis=1)
            AP_final_single = pd.concat([AP_test_single], axis=1)
            result.append(AP_final)
        else:
            AP_final = pd.concat([AP_train, AP_val, AP_test], axis=1)
            AP_final_single = pd.concat(
                [AP_train_single, AP_val_single, AP_test_single], axis=1)

            # #### This file will store each epoch result in a pickle format####

            with open(file_name, 'wb') as handle:
                pickle.dump(result, handle)
        time_elapsed = time.time() - initial_time_epoch
        print('APs in EPOCH:{}'.format(epoch + 1))
        print(AP_final)
        print(AP_final_single)
        try:
            print('Loss_train:{},Loss_validation:{},Loss_test:{}'.format(
                loss_epoch_train[epoch - start_epoch],
                loss_epoch_val[epoch - start_epoch],
                loss_epoch_test[epoch - start_epoch]))
        except:
            print('Loss_test:{}'.format(loss_epoch_test[epoch - start_epoch]))

        print('This epoch completes in {:.0f}m {:.06f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        if infr == 't':
            break

    time_elapsed = time.time() - initial_time
    print('The whole process runs for {:.0f}h {:.0f}m {:0f}s'.format(
        time_elapsed // 3600, time_elapsed % 3600 // 60,
        time_elapsed % 3600 % 60 % 60))
    return
Exemplo n.º 3
0
def visual(image_id, flag, pairs_info, score_HOI, score_interact,
           score_obj_box, score_per_box, score_REL, score_HOI_pair,
           ground_truth):
    start = 0
    for batch in range(len(image_id)):
        this_image = int(image_id[batch])
        a = helpers_pre.get_compact_detections(this_image, flag)
        person_bbxn = a['person_bbx']
        obj_bbxn = a['objects_bbx']
        this_batch_pers = int(pairs_info[batch][0])
        this_batch_objs = int(pairs_info[batch][1])
        increment = this_batch_pers * this_batch_objs
        ground_truth_this_batch = ground_truth[start:start + increment]
        score_HOI_this_batch = score_HOI[start:start + increment]
        start += increment
        if flag == 'train':

            cur_obj_path_s = OBJ_PATH_train_s + "COCO_train2014_%.12i.json" % (
                this_image)

            image_dir_s = image_dir_train + '/train2014COCO_train2014_%.12i.jpg' % (
                this_image)

        elif flag == 'test':

            cur_obj_path_s = OBJ_PATH_test_s + "COCO_val2014_%.12i.json" % (
                this_image)
            image_dir_s = image_dir_test + '/val2014/COCO_val2014_%.12i.jpg' % (
                this_image)
        elif flag == 'val':
            cur_obj_path_s = OBJ_PATH_train_s + "COCO_train2014_%.12i.json" % (
                this_image)
            image_dir_s = image_dir_val + '/train2014/COCO_train2014_%.12i.jpg' % (
                this_image)
        with open(cur_obj_path_s) as fp:
            detections = json.load(fp)
        img_H = detections['H']
        img_W = detections['W']
        person_bbx = np.array([img_W, img_H, img_W, img_H],
                              dtype=float) * person_bbxn
        obj_bbx = np.array([img_W, img_H, img_W, img_H],
                           dtype=float) * obj_bbxn
        img = cv2.imread(image_dir_s, 3)
        start_index = 0
        for person_box in person_bbx:
            for object_box in obj_bbx:
                ground_truth_this_sample = ground_truth_this_batch[start_index]
                score_HOI_this_sample = score_HOI_this_batch[start_index]
                print(score_HOI_this_sample)
                pred = [('GROUND_TRUTH', [
                    (ID2VERB[ind],
                     float("%.2f" % ground_truth_this_sample[ind]))
                    for ind in np.argsort(ground_truth_this_sample)[-5:][::-1]
                ])]
                pred.append(('TOTAL_PREDICTION', [
                    (ID2VERB[ind], float("%.2f" % score_HOI_this_sample[ind]))
                    for ind in np.argsort(score_HOI_this_sample)[-5:][::-1]
                ]))
                prediction = pd.DataFrame(pred, columns=['Name', 'Prediction'])

                img = cv2.imread(image_dir_s, 3)
                x, y, w, h = int(person_box[0]), int(
                    person_box[1]), int(person_box[2] -
                                        person_box[0]), int(person_box[3] -
                                                            person_box[1])
                cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 3)
                x, y, w, h = int(object_box[0]), int(
                    object_box[1]), int(object_box[2] -
                                        object_box[0]), int(object_box[3] -
                                                            object_box[1])
                cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 3)

                print('\nPredictions (Five Highest Confidence Class):\n{}\n'.
                      format(prediction))

                cv2.imshow('image', img)
                start_index += 1
                k = cv2.waitKey(0)
                if k == 27:  # wait for ESC key to exit

                    cv2.destroyAllWindows()

            if k == 27:  # wait for ESC key to exit

                cv2.destroyAllWindows()
        if k == 27:  # wait for ESC key to exit

            cv2.destroyAllWindows()

    cv2.destroyAllWindows()
Exemplo n.º 4
0
def train_test(model, optimizer, scheduler, dataloader, number_of_epochs,
               break_point, saving_epoch, folder_name, batch_size, infr,
               start_epoch, mean_best, visualize):
    #### Creating the folder where the results would be stored##########

    try:
        os.mkdir(folder_name)
    except OSError as exc:
        if exc.errno != errno.EEXIST:
            raise
        pass
    file_name = folder_name + '/' + 'result.pickle'
    ####################################################################

    loss_epoch_train = []
    loss_epoch_val = []
    loss_epoch_test = []
    initial_time = time.time()
    result = []

    ##### Freeing out the cache memories from gpus and declaring the phases######
    torch.cuda.empty_cache()
    phases = ['train', 'test']

    if infr == 't' and visualize == 'f':  ### If running from a pretrained model only for saving best result ######
        start_epoch = start_epoch - 1
        phases = ['test']
        end_of_epochs = start_epoch + 1
        print('Only doing testing for storing result from a model')

    elif visualize != 'f':
        if visualize not in phases:
            print(
                "ERROR! Asked to show result from a unknown set.The choice should be among train,val,test"
            )
            return
        else:
            phases = [visualize]
            end_of_epochs = start_epoch + 1
            print('Only showing predictions from a model')
    else:
        end_of_epochs = start_epoch + number_of_epochs
    ##### Starting the Epochs#############
    for epoch in range(start_epoch, end_of_epochs):
        scheduler.step()
        print('Epoch {}/{}'.format(epoch + 1, end_of_epochs))
        print('-' * 10)
        # print('Lr: {}'.format(scheduler.get_lr()))
        initial_time_epoch = time.time()

        for phase in phases:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            print('In {}'.format(phase))
            detections_train = {}
            detections_test = {}

            true_scores_class = np.ones([1, 80], dtype=int)
            true_scores = np.ones([1, no_of_classes], dtype=int)
            true_scores_single = np.ones([1, 1], dtype=int)
            predicted_scores = np.ones([1, no_of_classes], dtype=float)
            predicted_scores_single = np.ones([1, 1], dtype=float)
            predicted_scores_class = np.ones([1, 80], dtype=float)
            acc_epoch = 0
            iteration = 1

            torch.cuda.empty_cache()
            ####Starting the iterations##################
            for iterr, i in enumerate(tqdm(dataloader[phase])):
                if iterr % 20 == 0:
                    torch.cuda.empty_cache()

                inputs = i[0].to(device)
                labels = i[1].to(device)
                labels_single = i[2].to(device)
                image_id = i[3]
                pairs_info = i[4]
                minbatch_size = len(pairs_info)

                optimizer.zero_grad()
                if phase == 'train':
                    nav = torch.tensor([[0, epoch]] * minbatch_size).to(device)
                else:
                    nav = torch.tensor([[2, epoch]] * minbatch_size).to(device)

                # import pdb;pdb.set_trace()
                true = (labels.data).cpu().numpy()
                true_single = (labels_single.data).cpu().numpy()

                with torch.set_grad_enabled(phase == 'train'
                                            or phase == 'val'):
                    model_out = model(inputs, pairs_info, pairs_info, image_id,
                                      nav, phase)
                    #import pdb; pdb.set_trace()
                    outputs = model_out[0]
                    outputs_single = model_out[1]
                    outputs_combine = model_out[2]
                    outputs_gem = model_out[3]

                    predicted_HOI = sigmoid(outputs).data.cpu().numpy()
                    predicted_HOI_combine = sigmoid(
                        outputs_combine).data.cpu().numpy()
                    predicted_single = sigmoid(
                        outputs_single).data.cpu().numpy()
                    predicted_gem = sigmoid(outputs_gem).data.cpu().numpy()
                    predicted_HOI_pair = predicted_HOI

                    start_index = 0
                    start_obj = 0
                    start_pers = 0
                    start_tot = 0
                    pers_index = 1
                    persons_score_extended = np.zeros([1, 1])
                    objects_score_extended = np.zeros([1, 1])
                    class_ids_extended = np.zeros([1, 1])
                    persons_np_extended = np.zeros([1, 4])
                    objects_np_extended = np.zeros([1, 4])
                    start_no_obj = 0
                    class_ids_total = []

                    ############# Extending Person and Object Boxes and confidence scores to Multiply with all Pairs##########
                    for batch in range(len(pairs_info)):
                        persons_score = []
                        objects_score = []
                        class_ids = []
                        objects_score.append(float(1))

                        this_image = int(image_id[batch])
                        scores_total = helpers_pre.get_compact_detections(
                            this_image, phase)
                        persons_score, objects_score, persons_np, objects_np, class_ids = scores_total[
                                                                                              'person_bbx_score'], \
                                                                                          scores_total[
                                                                                              'objects_bbx_score'], \
                                                                                          scores_total['person_bbx'], \
                                                                                          scores_total['objects_bbx'], \
                                                                                          scores_total[
                                                                                              'class_id_objects']
                        temp_scores = extend(
                            np.array(persons_score).reshape(
                                len(persons_score), 1),
                            int(pairs_info[batch][1]))
                        persons_score_extended = np.concatenate(
                            [persons_score_extended, temp_scores])
                        temp_scores = extend(persons_np,
                                             int(pairs_info[batch][1]))
                        persons_np_extended = np.concatenate(
                            [persons_np_extended, temp_scores])
                        temp_scores = extend_object(
                            np.array(objects_score).reshape(
                                len(objects_score), 1),
                            int(pairs_info[batch][0]))
                        objects_score_extended = np.concatenate(
                            [objects_score_extended, temp_scores])
                        temp_scores = extend_object(objects_np,
                                                    int(pairs_info[batch][0]))
                        objects_np_extended = np.concatenate(
                            [objects_np_extended, temp_scores])
                        temp_scores = extend_object(
                            np.array(class_ids).reshape(len(class_ids), 1),
                            int(pairs_info[batch][0]))
                        class_ids_extended = np.concatenate(
                            [class_ids_extended, temp_scores])
                        class_ids_total.append(class_ids)

                        start_pers = start_pers + int(pairs_info[batch][0])
                        start_obj = start_obj + int(pairs_info[batch][1])
                        start_tot = start_tot + int(
                            pairs_info[batch][1]) * int(pairs_info[batch][0])
                    ###################################################################################################################

                    #### Applying LIS#######
                    persons_score_extended = LIS(persons_score_extended, 8.3,
                                                 12, 10)
                    objects_score_extended = LIS(objects_score_extended, 8.3,
                                                 12, 10)
                    ##################################

                    predicted_HOI = predicted_HOI * predicted_HOI_combine * predicted_single * predicted_gem * objects_score_extended[
                        1:] * persons_score_extended[1:]

                    index_mask = class_ids_extended[1:].reshape(
                        len(class_ids_extended[1:]), ).astype('int32')
                    loss_mask, count_weight = mask_t[index_mask], count_t[
                        index_mask]
                    predicted_HOI = loss_mask * predicted_HOI

                    #### Calculating Loss############
                    N_b = minbatch_size * no_of_classes  # *int(total_elements[0])#*no_of_classes #pairs_info[1]*pairs_info[2]*pairs_info[3]
                    hum_obj_mask = torch.Tensor(objects_score_extended[1:] *
                                                persons_score_extended[1:] *
                                                loss_mask).cuda()
                    lossf = torch.sum(
                        loss_com_combine(
                            sigmoid(outputs) * sigmoid(outputs_combine) *
                            sigmoid(outputs_single) * hum_obj_mask *
                            sigmoid(outputs_gem), labels.float())) / N_b
                    lossc = lossf.item()

                    acc_epoch += lossc
                    iteration += 1
                    if phase == 'train' or phase == 'val':  #### Flowing the loss backwards#########
                        lossf.backward()
                        optimizer.step()

                    ###########################################################
                    del lossf
                    del model_out
                    del inputs
                    del outputs
                    del labels
                ####### If we want to do Visualization#########
                if visualize != 'f':
                    viss.visual(image_id, phase, pairs_info, predicted_HOI,
                                predicted_single, objects_score_extended[1:],
                                persons_score_extended[1:],
                                predicted_HOI_combine, predicted_HOI_pair,
                                true)

                #####################################################################

                ##### Preparing for Storing Results##########
                predicted_scores = np.concatenate(
                    (predicted_scores, predicted_HOI), axis=0)
                true_scores = np.concatenate((true_scores, true), axis=0)
                predicted_scores_single = np.concatenate(
                    (predicted_scores_single, predicted_single), axis=0)
                true_scores_single = np.concatenate(
                    (true_scores_single, true_single), axis=0)
                #############################################

                #### Storing the result in V-COCO Format##########
                if phase == 'test':
                    if (epoch + 1) % saving_epoch == 0 or infr == 't':
                        all_scores = filtering(predicted_HOI, true,
                                               persons_np_extended[1:],
                                               objects_np_extended[1:],
                                               predicted_single, pairs_info,
                                               image_id,
                                               class_ids_extended[1:])
                        # prep.infer_format(image_id,all_scores,phase,detections_test,pairs_info)
                        proper.infer_format(image_id, all_scores, phase,
                                            detections_test, pairs_info)
                ######################################################

                ## Breaking in particular number of epoch####
                if iteration == break_point + 1:
                    break
            #############################################

            if phase == 'train':
                loss_epoch_train.append((acc_epoch))
                AP, AP_single = ap.class_AP(predicted_scores[1:, :],
                                            true_scores[1:, :],
                                            predicted_scores_single[1:, ],
                                            true_scores_single[1:, ])
                AP_train = pd.DataFrame(AP,
                                        columns=['Name_TRAIN', 'Score_TRAIN'])
                AP_train_single = pd.DataFrame(
                    AP_single, columns=['Name_TRAIN', 'Score_TRAIN'])

            elif phase == 'test':
                loss_epoch_test.append((acc_epoch))
                AP, AP_single = ap.class_AP(predicted_scores[1:, :],
                                            true_scores[1:, :],
                                            predicted_scores_single[1:, ],
                                            true_scores_single[1:, ])
                AP_test = pd.DataFrame(AP, columns=['Name_TEST', 'Score_TEST'])
                AP_test_single = pd.DataFrame(
                    AP_single, columns=['Name_TEST', 'Score_TEST'])
                if (epoch + 1) % saving_epoch == 0 or infr == 't':
                    file_name_p = folder_name + '/' + 'test.pickle'.format(
                        epoch + 1)
                    with open(file_name_p, 'wb') as handle:
                        pickle.dump(detections_test, handle)

        ###### Saving the Model###########
        mean = AP_test.to_records(index=False)[no_of_classes][1]
        ####Best Model######
        if mean > mean_best and infr != 't':
            mean_best = mean
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'mean_best': mean_best,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict()
                },
                filename=folder_name + '/' + 'bestcheckpoint.pth.tar')
        ###############################

        if (epoch + 1) % saving_epoch == 0 and infr != 't':
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'mean_best': mean_best,
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict()
                },
                filename=folder_name + '/' + str(epoch + 1) +
                'checkpoint.pth.tar')
        #####################################

        if infr == 't':

            AP_final = pd.concat([AP_test], axis=1)
            AP_final_single = pd.concat([AP_test_single], axis=1)
            result.append(AP_final)
        else:
            AP_final = pd.concat([AP_train, AP_test], axis=1)
            AP_final_single = pd.concat([AP_train_single, AP_test_single],
                                        axis=1)
            ##### This file will store each epoch result in a pickle format####
            with open(file_name, 'wb') as handle:
                pickle.dump(result, handle)
        time_elapsed = time.time() - initial_time_epoch
        print('APs in EPOCH:{}'.format(epoch + 1))
        print(AP_final)
        print(AP_final_single)
        #post_test.send_message_to_slack(AP_final)
        #post_test.send_message_to_slack(AP_final_single)
        try:
            print('Loss_train:{},Loss_test:{}'.format(
                loss_epoch_train[epoch - start_epoch],
                loss_epoch_test[epoch - start_epoch]))
            #post_test.send_message_to_slack('Loss_train:{},Loss_test:{}'.format(loss_epoch_train[epoch - start_epoch],
#                                                      loss_epoch_test[epoch - start_epoch]))
        except:
            print('Loss_test:{}'.format(loss_epoch_test[epoch - start_epoch]))

        print('This epoch completes in {:.0f}m {:.06f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        if infr == 't':
            break

    time_elapsed = time.time() - initial_time
    print('The whole process runs for {:.0f}h {:.0f}m {:0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60,
        ((time_elapsed % 3600) % 60) % 60))
    return
Exemplo n.º 5
0
def train_test(model, optimizer, scheduler, dataloader, num_epochs, batch_size,
               start_epoch, mean_best):

    writer = SummaryWriter('runs/VSGNet_exp_VSG')

    loss_epoch_train = []
    loss_epoch_val = []
    loss_epoch_test = []
    initial_time = time.time()
    result = []

    torch.cuda.empty_cache()
    phases = ['train', 'val', 'test']

    end_epoch = start_epoch + num_epochs

    iteration = 0

    for epoch in range(start_epoch, end_epoch):
        #         scheduler.step()
        print('Epoch {}/{}'.format(epoch + 1, end_epoch))
        print('-' * 10)
        initial_time_epoch = time.time()

        for phase in phases:
            if phase == 'train':
                model.train()
            elif phase == 'val':
                model.train()
            else:
                model.eval()

            print('In {}'.format(phase))

            detections_train = []
            detections_val = []
            detections_test = []

            true_scores_class = np.ones([1, 80], dtype=int)
            true_scores = np.ones([1, 29], dtype=int)
            true_scores_single = np.ones([1, 1], dtype=int)

            predicted_scores_class = np.ones([1, 80], dtype=float)
            predicted_scores = np.ones([1, 29], dtype=float)
            predicted_scores_single = np.ones([1, 1], dtype=float)

            acc_epoch = 0

            for iterr, i in enumerate(tqdm(dataloader[phase])):
                if iterr % 20 == 0:
                    torch.cuda.empty_cache()

                inputs = i[0].to(device)
                labels = i[1].to(device)
                labels_single = i[2].to(device)
                image_id = i[3]
                pairs_info = i[4]
                minibatch_size = len(pairs_info)

                optimizer.zero_grad()

                if phase == 'train':
                    nav = torch.tensor([[0, epoch]] *
                                       minibatch_size).to(device)
                elif phase == 'val':
                    nav = torch.tensor([[1, epoch]] *
                                       minibatch_size).to(device)
                else:
                    nav = torch.tensor([[2, epoch]] *
                                       minibatch_size).to(device)

                true = (labels.data).cpu().numpy()
                true_single = (labels_single.data).cpu().numpy()

                with torch.set_grad_enabled(phase == 'train'
                                            or phase == 'val'):
                    model_out = model(inputs, pairs_info, pairs_info, image_id,
                                      nav, phase)
                    i_ho = model_out[0]
                    p_Ref = model_out[1]
                    p_Att = model_out[2]
                    p_Graph = model_out[3]

                    predicted_HOI = sigmoid(p_Ref).data.cpu().numpy()
                    predicted_single = sigmoid(i_ho).data.cpu().numpy()
                    predicted_HOI_Att = sigmoid(p_Att).data.cpu().numpy()
                    predicted_HOI_Graph = sigmoid(p_Graph).data.cpu().numpy()
                    predicted_HOI_pair = predicted_HOI

                    start_index = 0
                    start_obj = 0
                    start_pers = 0
                    start_tot = 0
                    pers_index = 1

                    persons_score_extended = np.zeros([1, 1])
                    objects_score_extended = np.zeros([1, 1])
                    class_ids_extended = np.zeros([1, 1])
                    persons_np_extended = np.zeros([1, 4])
                    objects_np_extended = np.zeros([1, 4])
                    start_no_obj = 0
                    class_ids_total = []

                    # extendind person and object boxes and confidence scores to multiply with all pairs (?)

                    for batch in range(len(pairs_info)):
                        persons_score = []
                        objects_score = []
                        class_ids = []
                        objects_score.append(float(1))  # no object

                        this_image = int(image_id[batch])  # image_id
                        scores_total = helpers_pre.get_compact_detections(
                            this_image, phase)
                        persons_score, objects_score, persons_np, objects_np, class_ids = \
                        scores_total['person_bbx_score'], scores_total['objects_bbx_score'], \
                        scores_total['person_bbx'], scores_total['objects_bbx'], \
                        scores_total['class_id_objects']

                        temp_scores = extend_person(
                            np.array(persons_score).reshape(
                                len(persons_score), 1),
                            int(pairs_info[batch][1]))  # num_obj
                        persons_score_extended = np.concatenate(
                            [persons_score_extended, temp_scores])

                        temp_scores = extend_person(persons_np,
                                                    int(pairs_info[batch][1]))
                        persons_np_extended = np.concatenate(
                            [persons_np_extended, temp_scores])

                        temp_scores = extend_object(
                            np.array(objects_score).reshape(
                                len(objects_score), 1),
                            int(pairs_info[batch][0]))
                        objects_score_extended = np.concatenate(
                            [objects_score_extended, temp_scores])

                        temp_scores = extend_object(objects_np,
                                                    int(pairs_info[batch][0]))
                        objects_np_extended = np.concatenate(
                            [objects_np_extended, temp_scores])

                        temp_scores = extend_object(
                            np.array(class_ids).reshape(len(class_ids), 1),
                            int(pairs_info[batch][0]))
                        class_ids_extended = np.concatenate(
                            [class_ids_extended, temp_scores])
                        class_ids_total.append(class_ids)

                    persons_score_extended = LIS(persons_score_extended, 8.3,
                                                 12, 10)
                    objects_score_extended = LIS(objects_score_extended, 8.3,
                                                 12, 10)

                    predicted_HOI = predicted_HOI * predicted_single * \
                    predicted_HOI_Att * predicted_HOI_Graph * \
                    objects_score_extended[1:] * persons_score_extended[1:]
                    loss_mask = prior.apply_prior(class_ids_extended[1:],
                                                  predicted_HOI)
                    predicted_HOI = loss_mask * predicted_HOI

                    N_b = minibatch_size * 29
                    hum_obj_mask = torch.Tensor(objects_score_extended[1:] * \
                                                persons_score_extended[1:] * loss_mask).cuda()

                    lossf = torch.sum( criterion(sigmoid(i_ho) * sigmoid(p_Ref) * sigmoid(p_Att)\
                                                 * sigmoid(p_Graph) * hum_obj_mask, labels.float()))/N_b
                    lossc = lossf.item()

                    acc_epoch += lossc
                    if phase == 'train' or phase == 'val':
                        lossf.backward()
                        optimizer.step()
                        iteration += 1

                        writer.add_scalar('training loss', lossc, iteration)

                    del lossf
                    del model_out
                    del inputs
                    del labels

                #prepairing for storing results
                predicted_scores = np.concatenate(
                    (predicted_scores, predicted_HOI), axis=0)
                true_scores = np.concatenate((true_scores, true), axis=0)
                predicted_scores_single = np.concatenate(
                    (predicted_scores_single, predicted_single), axis=0)
                true_scores_single = np.concatenate(
                    (true_scores_single, true_single), axis=0)

                if phase == 'test':
                    all_scores = filtering(predicted_HOI, true,
                                           persons_np_extended[1:],
                                           objects_np_extended[1:],
                                           predicted_single, pairs_info,
                                           image_id)
                    proper.infer_format(image_id, all_scores, phase,
                                        detections_test, pairs_info)

            if phase == 'test':
                #                 loss_epoch_test.append((acc_epoch))
                AP, AP_single = ap.class_AP(predicted_scores[1:,:], true_scores[1:,:], \
                                            predicted_scores_single[1:,], true_scores_single[1:,])
                AP_test = pd.DataFrame(AP, columns=['Name_TEST', 'Score_TEST'])
                AP_test_single = pd.DataFrame(
                    AP_single, columns=['Name_TEST', 'Score_TEST'])
        AP_final = pd.concat([AP_test], axis=1)
        AP_final_single = pd.concat([AP_test_single], axis=1)
        result.append(AP_final)

        print('APs in epoch {}'.format(epoch + 1))
        print(AP_final)
        print(AP_final_single)
Exemplo n.º 6
0
def visual(
    image_id,
    flag,
    pairs_info,
    score_HOI,
    score_interact,
    score_obj_box,
    score_per_box,
    score_REL,
    score_HOI_pair,
    ground_truth,
):
    start = 0
    for batch in range(len(image_id)):
        this_image = int(image_id[batch])
        a = helpers_pre.get_compact_detections(this_image, flag)
        person_bbxn = a['person_bbx']
        obj_bbxn = a['objects_bbx']
        this_batch_pers = int(pairs_info[batch][0])
        this_batch_objs = int(pairs_info[batch][1])
        increment = this_batch_pers * this_batch_objs
        ground_truth_this_batch = ground_truth[start:start + increment]
        score_HOI_this_batch = score_HOI[start:start + increment]
        start += increment
        if flag == 'train':

            cur_obj_path_s = OBJ_PATH_train_s \
                + 'COCO_train2014_%.12i.json' % this_image

            image_dir_s = image_dir_train + '/COCO_train2014_%.12i.jpg' \
                % this_image
        elif flag == 'test':

            cur_obj_path_s = OBJ_PATH_test_s \
                + 'COCO_val2014_%.12i.json' % this_image
            image_dir_s = image_dir_test + '/COCO_val2014_%.12i.jpg' \
                % this_image
        elif flag == 'val':
            cur_obj_path_s = OBJ_PATH_train_s \
                + 'COCO_train2014_%.12i.json' % this_image
            image_dir_s = image_dir_val + '/COCO_train2014_%.12i.jpg' \
                % this_image
        with open(cur_obj_path_s) as fp:
            detections = json.load(fp)
        img_H = detections['H']
        img_W = detections['W']
        person_bbx = np.array([img_W, img_H, img_W, img_H],
                              dtype=float) * person_bbxn
        obj_bbx = np.array([img_W, img_H, img_W, img_H], dtype=float) \
            * obj_bbxn
        img = cv2.imread(image_dir_s, 3)
        start_index = 0
        for person_box in person_bbx:
            for object_box in obj_bbx:
                ground_truth_this_sample = \
                    ground_truth_this_batch[start_index]
                score_HOI_this_sample = \
                    score_HOI_this_batch[start_index]
                print(score_HOI_this_sample)
                pred = [('GROUND_TRUTH', [
                    (ID2VERB[ind],
                     float('%.2f' % ground_truth_this_sample[ind]))
                    for ind in np.argsort(ground_truth_this_sample)[-5:][::-1]
                ])]
                pred.append(('TOTAL_PREDICTION', [
                    (ID2VERB[ind], float('%.2f' % score_HOI_this_sample[ind]))
                    for ind in np.argsort(score_HOI_this_sample)[-5:][::-1]
                ]))
                prediction = pd.DataFrame(pred, columns=['Name', 'Prediction'])

                img = cv2.imread(image_dir_s, 3)
                (x, y, w, h) = (int(person_box[0]), int(person_box[1]),
                                int(person_box[2] - person_box[0]),
                                int(person_box[3] - person_box[1]))
                cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 3)
                (x, y, w, h) = (int(object_box[0]), int(object_box[1]),
                                int(object_box[2] - object_box[0]),
                                int(object_box[3] - object_box[1]))
                cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 3)

                print(this_image)
                print('''Predictions (Five Highest Confidence Class):{}'''.
                      format(prediction))
                # if ground_truth_this_sample[VERB2ID['catch']] == 1:
                #     cv2.imwrite('/home/d9/Documents/VSGNet/new_test_loss_best/result/catch/'+'%.12i' % this_image + str(start_index) + '.jpg', img)
                #     prediction.to_csv('/home/d9/Documents/VSGNet/new_test_loss_best/result/catch/'+'%.12i' % this_image + str(start_index) + '.csv')
                cv2.imshow('image', img)
                start_index += 1
                k = cv2.waitKey(0)
                if k == 27:  # wait for ESC key to exit

                    cv2.destroyAllWindows()

            if k == 27:  # wait for ESC key to exit

                cv2.destroyAllWindows()
        if k == 27:  # wait for ESC key to exit

            cv2.destroyAllWindows()

    cv2.destroyAllWindows()
Exemplo n.º 7
0
def get_pool_loc(ims,
                 image_id,
                 flag_,
                 size=(7, 7),
                 spatial_scale=1,
                 batch_size=1):
    spatial_locs = []
    union_box_out = []
    pers_out = []
    objs_out = []

    flag = 'train'
    max_pool = nn.AdaptiveMaxPool2d(size)
    #二元自适应均值汇聚层,最大池化,输出特征图尺寸仍为7×7,降维,将2048降为1024
    for batch in range(batch_size):
        this_image = int(image_id[batch])

        if int(flag_[batch][0]) == 0:
            flag = 'train'
        elif int(flag_[batch][0]) == 2:
            flag = 'test'

        a = helpers_pre.get_compact_detections(this_image, flag)
        #get_compact_detections(this_image,flag)  {'person_bbx':persons_np, 'objects_bbx':objects_np,'person_bbx_score':scores_persons,'objects_bbx_score':scores_objects,'class_id_objects':class_id_objects}
        roi_pers, roi_objs = a['person_bbx'], a['objects_bbx']
        union_box = helpers_pre.get_attention_maps(this_image, flag)  #得到人物的联合框
        union_box_out.append(torch.tensor(union_box).cuda().float())

        W, H, C = ims[batch].size()[1], ims[batch].size()[2], ims[batch].size(
        )[0]
        spatial_scale = [W, H, W, H]
        image_this_batch = ims[batch]
        roi_pers = roi_pers * spatial_scale  #框匹配到原始图像
        roi_objs = roi_objs * spatial_scale

        ##### Pooling Persons ##########
        for index, roi_val in enumerate(roi_pers):
            x1, y1, x2, y2 = int(roi_val[0]), int(roi_val[1]), int(
                roi_val[2]), int(roi_val[3])
            sp = [x1, y1, x2, y2, x2 - x1, y2 - y1]
            im = image_this_batch.narrow(
                0, 0,
                image_this_batch.size()[0])[..., y1:(y2 + 1), x1:(
                    x2 + 1)]  #一个batch的图像特征图,每个特征图的宽和高(y1:(y2+1), x1:(x2+1))
            pooled = max_pool(im)
            pers_out.append((pooled))
            spatial_locs.append(sp)

    ### Pooling Objects #####
        for index, roi_val in enumerate(roi_objs):
            x1, y1, x2, y2 = int(roi_val[0]), int(roi_val[1]), int(
                roi_val[2]), int(roi_val[3])
            sp = [x1, y1, x2, y2, x2 - x1, y2 - y1]
            im = image_this_batch.narrow(
                0, 0,
                image_this_batch.size()[0])[..., y1:(y2 + 1), x1:(x2 + 1)]
            pooled = max_pool(im)
            objs_out.append((pooled))
            spatial_locs.append(sp)
    #import pdb;pdb.set_trace()
    return torch.stack(pers_out), torch.stack(
        objs_out), spatial_locs, torch.cat(union_box_out)