def transform(zip_file, save_dir=None): """Refactor file directories, rename images and partition the train/val/test set. """ train_test_split_file = osp.join(save_dir, 'train_test_split.pkl') train_test_split = save_images(zip_file, save_dir, train_test_split_file) # train_test_split = load_pickle(train_test_split_file) # partition train/val/test set trainval_ids = list( set([ parse_new_im_name(n, 'id') for n in train_test_split['trainval_im_names'] ])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set(train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100) train_im_names = partitions['train_im_names'] train_ids = list( set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) test_im_names = list(train_test_split['q_im_names']) \ + list(train_test_split['gallery_im_names']) test_marks = [0, ] * len(train_test_split['q_im_names']) \ + [1, ] * len(train_test_split['gallery_im_names']) partitions = { 'trainval_im_names': train_test_split['trainval_im_names'], 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks } partition_file = osp.join(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def transform(zip_file, save_dir=None): """Refactor file directories, rename images and partition the train/val/test set. """ train_test_split_file = osp.join(save_dir, 'train_test_split.pkl') train_test_split = save_images(zip_file, save_dir, train_test_split_file) # train_test_split = load_pickle(train_test_split_file) # partition train/val/test set trainval_ids = list(set([parse_new_im_name(n, 'id') for n in train_test_split['trainval_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set( train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100) train_im_names = partitions['train_im_names'] train_ids = list(set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) test_im_names = list(train_test_split['q_im_names']) \ + list(train_test_split['mq_im_names']) \ + list(train_test_split['gallery_im_names']) test_marks = [0, ] * len(train_test_split['q_im_names']) \ + [2, ] * len(train_test_split['mq_im_names']) \ + [1, ] * len(train_test_split['gallery_im_names']) partitions = {'trainval_im_names': train_test_split['trainval_im_names'], 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks} partition_file = osp.join(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def _updatePartition(self, img_fnames): specific_dir = self.dataset_nm ## Old pkl_fpath_old = osp.join(self.update_trainingset_path, specific_dir, \ 'original') pkl_fname = osp.join(pkl_fpath_old, 'partitions.pkl') old_partitions = load_pickle(pkl_fname) trainval_im_names = old_partitions['trainval_im_names'] ## Update # Train Val trainval_im_names = trainval_im_names + list(img_fnames) trainval_im_names.sort() trainval_ids = list(set([parse_new_im_name(n, 'id') for n in trainval_im_names])) trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set( trainval_im_names, parse_new_im_name, num_val_ids=100) # Train train_im_names = partitions['train_im_names'] train_ids = list(set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # Val val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) ## Save new_partitions = {'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': old_partitions['test_im_names'], 'test_marks': old_partitions['test_marks']} pkl_fpath_new = osp.join(self.update_trainingset_path, specific_dir, \ 'new') pkl_fname = osp.join(pkl_fpath_new, 'partitions.pkl') save_pickle(new_partitions, pkl_fname) return pkl_fname
def transform(original_file, save_dir=None): """Refactor file directories, rename images and partition the train/val/test set. """ train_test_split_file = osp.join(save_dir, 'train_split.pkl') train_test_split = save_images(original_file, save_dir, train_test_split_file) # train_test_split = load_pickle(train_test_split_file) # partition train/val/test set trainval_ids = list( set([ parse_new_im_name(n, 'id') for n in train_test_split['trainval_im_names'] ])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set(train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=100) train_im_names = partitions['train_im_names'] train_ids = list( set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # change anchor_positive_negative to new names apn_pkl_file = osp.join(save_dir, 'anchor_positive_negative_split_2.pkl') apn_pkl = load_pickle(apn_pkl_file) anchor_list = apn_pkl['anchor'] positive_list = apn_pkl['positive'] negative_list = apn_pkl['negative'] anchor_array = np.array(anchor_list) # mess up the order of anchor_positive_negative (trainval) new_tv_anchor_names, new_tv_positive_names, new_tv_negative_names = mess_up_apn( anchor_list, positive_list, negative_list) # select the train anchor_positive_negative train_anchor_names = [] train_positive_names = [] train_negative_names = [] for i in range(train_im_names.shape[0]): k = np.where(anchor_array == train_im_names[i]) ind = k[0][0] ta = anchor_list[ind] tp = positive_list[ind] tn = negative_list[ind] train_anchor_names.append(ta) train_positive_names.append(tp) train_negative_names.append(tn) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) test_im_names = list(train_test_split['q_im_names']) \ + list(train_test_split['mq_im_names']) \ + list(train_test_split['gallery_im_names']) test_marks = [0, ] * len(train_test_split['q_im_names']) \ + [2, ] * len(train_test_split['mq_im_names']) \ + [1, ] * len(train_test_split['gallery_im_names']) partitions = { 'trainval_anchor_im_names': new_tv_anchor_names, 'trainval_positive_im_names': new_tv_positive_names, 'trainval_negative_im_names': new_tv_negative_names, 'trainval_ids2labels': trainval_ids2labels, 'train_anchor_im_names': train_anchor_names, 'train_positive_im_names': train_positive_names, 'train_negative_im_names': train_negative_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks } partition_file = osp.join(save_dir, 'new_shuffle_apn_partitions_2.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))