def main(): # training and test sets train_set_full = get_images_labels(P.dataset_full, P.match_labels) test_set_full = get_images_labels(P.dataset_full + '/test', P.match_labels) labels_list = [t[1] for t in train_set_full] # we have to give a number to each label, # so we need a list here for the index labels.extend(sorted(list(set(labels_list)))) log(P, 'Loading and transforming train/test sets.') train_set, test_train_set, test_set = [], [], [] train_pre_f = P.train_trans if P.train_pre_proc else transforms.Compose([]) test_pre_f = P.test_trans if P.test_pre_proc else transforms.Compose([]) for im, lab in train_set_full: im_o = imread_rgb(im) train_set.append((train_pre_f(im_o), lab, im)) test_train_set.append((test_pre_f(im_o), lab, im)) for im, lab in test_set_full: if lab not in labels: continue im_o = imread_rgb(im) test_set.append((test_pre_f(im_o), lab, im)) siam_net = get_siamese_net() optimizer = optim.SGD( (p for p in siam_net.parameters() if p.requires_grad), lr=P.train_lr, momentum=P.train_momentum, weight_decay=P.train_weight_decay) criterion = TripletLoss(P.triplet_margin, P.train_loss_avg) criterion2 = nn.CrossEntropyLoss(size_average=P.train_loss2_avg) testset_tuple = (test_set, test_train_set) if P.test_upfront: log(P, 'Upfront testing of descriptor model') score = test_print_descriptor(train_type, P, siam_net, testset_tuple, get_embeddings) else: score = 0 if P.train: log(P, 'Starting region-descriptor training') train_siam_triplets_pos_couples(siam_net, train_set, testset_tuple, criterion, criterion2, optimizer, best_score=score) log(P, 'Finished region-descriptor training') if P.test_descriptor_net: log(P, 'Testing as descriptor') # set best score high enough such that it will never be saved test_print_descriptor(train_type, P, siam_net, testset_tuple, get_embeddings, best_score=len(test_set) + 1)
def main(): # training and test sets train_set_full = get_images_labels(P.dataset_full, P.match_labels) test_set_full = get_images_labels(P.dataset_full + '/test', P.match_labels) labels_list = [t[1] for t in train_set_full] # we have to give a number to each label, # so we need a list here for the index labels.extend(sorted(list(set(labels_list)))) log(P, 'Loading and transforming train/test sets.') # open the images (and transform already if possible) # do that only if it fits in memory ! train_set, test_train_set, test_set = [], [], [] train_pre_f = P.train_trans if P.train_pre_proc else transforms.Compose([]) test_pre_f = P.test_trans if P.test_pre_proc else transforms.Compose([]) for im, lab in train_set_full: im_o = imread_rgb(im) train_set.append((train_pre_f(im_o), lab, im)) test_train_set.append((test_pre_f(im_o), lab, im)) for im, lab in test_set_full: if lab not in labels: continue im_o = imread_rgb(im) test_set.append((test_pre_f(im_o), lab, im)) class_net = get_class_net() optimizer = optim.SGD( (p for p in class_net.parameters() if p.requires_grad), lr=P.train_lr, momentum=P.train_momentum, weight_decay=P.train_weight_decay) criterion = nn.CrossEntropyLoss(size_average=P.train_loss_avg) testset_tuple = (test_set, test_train_set) if P.test_upfront: log(P, 'Upfront testing of classification model') score = test_print_classif(train_type, P, class_net, testset_tuple, test_classif_net) else: score = 0 if P.train: log(P, 'Starting classification training') train_classif(class_net, train_set, testset_tuple, criterion, optimizer, best_score=score) log(P, 'Finished classification training') if P.test_descriptor_net: log(P, 'Testing as descriptor') test_print_descriptor(train_type, P, class_net, testset_tuple, get_embeddings)
def trans_dataset(dataset, pre_procs, trans, filters=None): if filters is None: filters = [lambda x, y: True for _ in pre_procs] outs = [[] for _ in pre_procs] for im, lab in dataset: im_o = imread_rgb(im) for out, pre_proc, t, f in zip(outs, pre_procs, trans, filters): if f(im, lab): im_out = t(im_o) if pre_proc else im_o out.append((im_out, lab, im)) return outs
# file to write the mean and std values to out_path = 'data/CLICIDE_224sq_train_ms.txt' # function to match labels, this is not necessary here match_labels = match_label_video # if the image size is constant, indicate it in format (C, H, W) # if the image size is not constant, use None here image_size = (3, 224, 224) dataset_full = get_images_labels(dataset_path, match_labels) mean = [0., 0., 0.] std = [0., 0., 0.] size = len(dataset_full) if image_size is not None: T = torch.Tensor(size, *(image_size)) for i, (im, _) in enumerate(dataset_full): T[i] = transforms.ToTensor()(imread_rgb(im)) for i in range(3): mean[i] = T[:, i, :, :].mean() std[i] = T[:, i, :, :].std() else: # cannot take mean/std of whole dataset tensor. # need to compute mean of all pixels and std afterwards, pixel by pixel dataset_open = [] for im, _ in dataset_full: im_o = imread_rgb(im) / 255. # cv2 images are 0-255, torch tensors are 0-1 im_size = im_o.shape[0] * im_o.shape[1] dataset_open.append((im_o, im_size)) for i in range(3): mean[i] += np.sum(im_o[:, :, i]) / (im_size * size) for im_o, im_size in dataset_open: for i in range(3):