Пример #1
0
def main():
    # training and test sets
    train_set_full = get_images_labels(P.dataset_full, P.match_labels)
    test_set_full = get_images_labels(P.dataset_full + '/test', P.match_labels)

    labels_list = [t[1] for t in train_set_full]
    # we have to give a number to each label,
    # so we need a list here for the index
    labels.extend(sorted(list(set(labels_list))))

    log(P, 'Loading and transforming train/test sets.')

    train_set, test_train_set, test_set = [], [], []
    train_pre_f = P.train_trans if P.train_pre_proc else transforms.Compose([])
    test_pre_f = P.test_trans if P.test_pre_proc else transforms.Compose([])
    for im, lab in train_set_full:
        im_o = imread_rgb(im)
        train_set.append((train_pre_f(im_o), lab, im))
        test_train_set.append((test_pre_f(im_o), lab, im))

    for im, lab in test_set_full:
        if lab not in labels:
            continue
        im_o = imread_rgb(im)
        test_set.append((test_pre_f(im_o), lab, im))

    siam_net = get_siamese_net()
    optimizer = optim.SGD(
        (p for p in siam_net.parameters() if p.requires_grad),
        lr=P.train_lr,
        momentum=P.train_momentum,
        weight_decay=P.train_weight_decay)
    criterion = TripletLoss(P.triplet_margin, P.train_loss_avg)
    criterion2 = nn.CrossEntropyLoss(size_average=P.train_loss2_avg)
    testset_tuple = (test_set, test_train_set)
    if P.test_upfront:
        log(P, 'Upfront testing of descriptor model')
        score = test_print_descriptor(train_type, P, siam_net, testset_tuple,
                                      get_embeddings)
    else:
        score = 0
    if P.train:
        log(P, 'Starting region-descriptor training')
        train_siam_triplets_pos_couples(siam_net,
                                        train_set,
                                        testset_tuple,
                                        criterion,
                                        criterion2,
                                        optimizer,
                                        best_score=score)
        log(P, 'Finished region-descriptor training')
    if P.test_descriptor_net:
        log(P, 'Testing as descriptor')
        # set best score high enough such that it will never be saved
        test_print_descriptor(train_type,
                              P,
                              siam_net,
                              testset_tuple,
                              get_embeddings,
                              best_score=len(test_set) + 1)
Пример #2
0
def main():
    # training and test sets
    train_set_full = get_images_labels(P.dataset_full, P.match_labels)
    test_set_full = get_images_labels(P.dataset_full + '/test', P.match_labels)

    labels_list = [t[1] for t in train_set_full]
    # we have to give a number to each label,
    # so we need a list here for the index
    labels.extend(sorted(list(set(labels_list))))

    log(P, 'Loading and transforming train/test sets.')

    # open the images (and transform already if possible)
    # do that only if it fits in memory !
    train_set, test_train_set, test_set = [], [], []
    train_pre_f = P.train_trans if P.train_pre_proc else transforms.Compose([])
    test_pre_f = P.test_trans if P.test_pre_proc else transforms.Compose([])
    for im, lab in train_set_full:
        im_o = imread_rgb(im)
        train_set.append((train_pre_f(im_o), lab, im))
        test_train_set.append((test_pre_f(im_o), lab, im))

    for im, lab in test_set_full:
        if lab not in labels:
            continue
        im_o = imread_rgb(im)
        test_set.append((test_pre_f(im_o), lab, im))

    class_net = get_class_net()
    optimizer = optim.SGD(
        (p for p in class_net.parameters() if p.requires_grad),
        lr=P.train_lr,
        momentum=P.train_momentum,
        weight_decay=P.train_weight_decay)
    criterion = nn.CrossEntropyLoss(size_average=P.train_loss_avg)
    testset_tuple = (test_set, test_train_set)
    if P.test_upfront:
        log(P, 'Upfront testing of classification model')
        score = test_print_classif(train_type, P, class_net, testset_tuple,
                                   test_classif_net)
    else:
        score = 0
    if P.train:
        log(P, 'Starting classification training')
        train_classif(class_net,
                      train_set,
                      testset_tuple,
                      criterion,
                      optimizer,
                      best_score=score)
        log(P, 'Finished classification training')
    if P.test_descriptor_net:
        log(P, 'Testing as descriptor')
        test_print_descriptor(train_type, P, class_net, testset_tuple,
                              get_embeddings)
Пример #3
0
 def trans_dataset(dataset, pre_procs, trans, filters=None):
     if filters is None:
         filters = [lambda x, y: True for _ in pre_procs]
     outs = [[] for _ in pre_procs]
     for im, lab in dataset:
         im_o = imread_rgb(im)
         for out, pre_proc, t, f in zip(outs, pre_procs, trans, filters):
             if f(im, lab):
                 im_out = t(im_o) if pre_proc else im_o
                 out.append((im_out, lab, im))
     return outs
# file to write the mean and std values to
out_path = 'data/CLICIDE_224sq_train_ms.txt'
# function to match labels, this is not necessary here
match_labels = match_label_video
# if the image size is constant, indicate it in format (C, H, W)
# if the image size is not constant, use None here
image_size = (3, 224, 224)
dataset_full = get_images_labels(dataset_path, match_labels)

mean = [0., 0., 0.]
std = [0., 0., 0.]
size = len(dataset_full)
if image_size is not None:
    T = torch.Tensor(size, *(image_size))
    for i, (im, _) in enumerate(dataset_full):
        T[i] = transforms.ToTensor()(imread_rgb(im))
    for i in range(3):
        mean[i] = T[:, i, :, :].mean()
        std[i] = T[:, i, :, :].std()
else:
    # cannot take mean/std of whole dataset tensor.
    # need to compute mean of all pixels and std afterwards, pixel by pixel
    dataset_open = []
    for im, _ in dataset_full:
        im_o = imread_rgb(im) / 255.  # cv2 images are 0-255, torch tensors are 0-1
        im_size = im_o.shape[0] * im_o.shape[1]
        dataset_open.append((im_o, im_size))
        for i in range(3):
            mean[i] += np.sum(im_o[:, :, i]) / (im_size * size)
    for im_o, im_size in dataset_open:
        for i in range(3):