def get_sample(self, ptr): im_name = self.im_names[ptr] im_path = osp.join(self.im_dir, im_name) im = plt.imread(im_path) im, _ = self.pre_process_im(im) id = parse_im_name(self.im_names[ptr], 'id') cam = parse_im_name(self.im_names[ptr], 'cam') # denoting whether the im is from query, gallery, or multi query set mark = self.marks[ptr] return im, id, cam, im_name, mark
def split_train_test_set(img_dir, save_pkl_dir): train_im_names = [] test_im_names = [] gallery_im_names = [] query_im_names = [] query_ids = [] file_list = os.listdir(img_dir) file_list.sort() test_id = random.sample(range(total_id_num), test_id_num) test_id.sort() # get the id of training/testing in the form of 'XXXX' test_ids = ['{0:08}'.format(i) for i in test_id] train_ids = ['{0:08}'.format(i) for i in range(total_id_num) if i not in test_id] cam_ids = ['{0:04}'.format(i) for i in range(cam_num)] # generate training set and test set for tr in train_ids: for f in file_list: if tr == '{0:08}'.format(parse_im_name(f, 'id')): train_im_names.append(f) for te in test_ids: for f in file_list: if te == '{0:08}'.format(parse_im_name(f, 'id')): test_im_names.append(f) # generate query in test set item = 0 while item < test_id_num: i_1, i_2 = random.sample(test_im_names, 2) if parse_im_name(i_1, 'id') == parse_im_name(i_2, 'id')\ and parse_im_name(i_1, 'id') not in query_ids: if parse_im_name(i_1, 'cam') != parse_im_name(i_2, 'cam'): item += 1 query_im_names.append(i_1) query_im_names.append(i_2) query_ids.append(parse_im_name(i_1, 'id')) else: continue else: continue # generate gallery in test set gallery_im_names = [f for f in test_im_names if f not in query_im_names] train_im_names.sort() gallery_im_names.sort() query_im_names.sort() partitions = {'gallery_im_names': np.asarray(gallery_im_names), 'query_im_names': np.asarray(query_im_names), 'train_im_names': np.asarray(train_im_names)} partitions = {'detected': partitions, 'labeled': partitions} save_pickle(partitions, save_pkl_dir) print("Spliting operation has been done!")
def __init__( self, im_dir=None, im_names=None, ids2labels=None, ids_per_batch=None, ims_per_id=None, **kwargs): # The im dir of all images self.im_dir = im_dir self.im_names = im_names self.ids2labels = ids2labels self.ids_per_batch = ids_per_batch self.ims_per_id = ims_per_id im_ids = [parse_im_name(name, 'id') for name in im_names] self.ids_to_im_inds = defaultdict(list) for ind, id in enumerate(im_ids): self.ids_to_im_inds[id].append(ind) self.ids = self.ids_to_im_inds.keys() super(TrainSet, self).__init__( dataset_size=len(self.ids), batch_size=ids_per_batch, **kwargs)
def move_ims(ori_im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id): """Rename and move images to new directory.""" ids = [parse_im_name(osp.basename(p), 'id') for p in ori_im_paths] cams = [parse_im_name(osp.basename(p), 'cam') for p in ori_im_paths] unique_ids = list(set(ids)) unique_ids.sort() id_mapping = dict( zip(unique_ids, range(new_start_id, new_start_id + len(unique_ids)))) new_im_names = [] cnt = defaultdict(int) for im_path, id, cam in zip(ori_im_paths, ids, cams): new_id = id_mapping[id] cnt[(new_id, cam)] += 1 new_im_name = new_im_name_tmpl.format(new_id, cam, cnt[(new_id, cam)] - 1) shutil.copy(im_path, ospj(new_im_dir, new_im_name)) new_im_names.append(new_im_name) return new_im_names, id_mapping
def move_ims( ori_im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id): """Rename and move images to new directory.""" ids = [parse_im_name(osp.basename(p), 'id') for p in ori_im_paths] cams = [parse_im_name(osp.basename(p), 'cam') for p in ori_im_paths] unique_ids = list(set(ids)) unique_ids.sort() id_mapping = dict( zip(unique_ids, range(new_start_id, new_start_id + len(unique_ids)))) new_im_names = [] cnt = defaultdict(int) for im_path, id, cam in zip(ori_im_paths, ids, cams): new_id = id_mapping[id] cnt[(new_id, cam)] += 1 new_im_name = new_im_name_tmpl.format(new_id, cam, cnt[(new_id, cam)] - 1) shutil.copy(im_path, ospj(new_im_dir, new_im_name)) new_im_names.append(new_im_name) return new_im_names, id_mapping
def __init__(self, im_dir=None, im_names=None, ids2labels=None, ids_per_batch=None, ims_per_id=None, **kwargs): # The im dir of all images self.im_dir = im_dir self.im_names = im_names self.ids2labels = ids2labels self.ids_per_batch = ids_per_batch self.ims_per_id = ims_per_id im_ids = [parse_im_name(name, 'id') for name in im_names] self.ids_to_im_inds = defaultdict(list) for ind, id in enumerate(im_ids): self.ids_to_im_inds[id].append(ind) self.ids = self.ids_to_im_inds.keys() super(TrainSet, self).__init__( dataset_size=len(self.ids), batch_size=ids_per_batch, **kwargs)
def transform(zip_file, train_test_partition_file, save_dir=None): """Save images and partition the train/val/test set. """ print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat') save_images(mat_file, save_dir, new_im_name_tmpl) if osp.exists(train_test_partition_file): train_test_partition = load_pickle(train_test_partition_file) else: raise RuntimeError('Train/test partition file should be provided.') for im_type in ['detected', 'labeled']: trainval_im_names = train_test_partition[im_type]['train_im_names'] trainval_ids = list( set([parse_im_name(n, 'id') for n in trainval_im_names])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) train_val_partition = \ partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100) train_im_names = train_val_partition['train_im_names'] train_ids = list( set([ parse_im_name(n, 'id') for n in train_val_partition['train_im_names'] ])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \ + [1, ] * len(train_val_partition['val_gallery_im_names']) val_im_names = list(train_val_partition['val_query_im_names']) \ + list(train_val_partition['val_gallery_im_names']) test_im_names = list(train_test_partition[im_type]['query_im_names']) \ + list(train_test_partition[im_type]['gallery_im_names']) test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \ + [1, ] * len( train_test_partition[im_type]['gallery_im_names']) partitions = { 'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks } partition_file = osp.join(save_dir, im_type, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file for "{}" saved to {}'.format( im_type, partition_file))
def create_dataset( name='market1501', part='trainval', **kwargs): assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \ "Unsupported Dataset {}".format(name) assert part in ['trainval', 'train', 'val', 'test'], \ "Unsupported Dataset Part {}".format(part) ######################################## # Specify Directory and Partition File # ######################################## if name == 'market1501': im_dir = ospeu('~/Dataset/market1501/images') partition_file = ospeu('~/Dataset/market1501/partitions.pkl') elif name == 'cuhk03': im_type = ['detected', 'labeled'][0] im_dir = ospeu(ospj('~/Dataset/cuhk03', im_type, 'images')) partition_file = ospeu(ospj('~/Dataset/cuhk03', im_type, 'partitions.pkl')) elif name == 'duke': im_dir = ospeu('~/Dataset/duke/images') partition_file = ospeu('~/Dataset/duke/partitions.pkl') elif name == 'combined': assert part in ['trainval'], \ "Only trainval part of the combined dataset is available now." im_dir = ospeu('~/Dataset/market1501_cuhk03_duke/trainval_images') partition_file = ospeu('~/Dataset/market1501_cuhk03_duke/partitions.pkl') ################## # Create Dataset # ################## # Use standard Market1501 CMC settings for all datasets here. cmc_kwargs = dict(separate_camera_set=False, single_gallery_shot=False, first_match_break=True) partitions = load_pickle(partition_file) im_names = partitions['{}_im_names'.format(part)] if part == 'trainval': ids2labels = partitions['trainval_ids2labels'] ret_set = TrainSet( im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'train': ids2labels = partitions['train_ids2labels'] ret_set = TrainSet( im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'val': marks = partitions['val_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet( im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) elif part == 'test': marks = partitions['test_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet( im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) if part in ['trainval', 'train']: num_ids = len(ids2labels) elif part in ['val', 'test']: ids = [parse_im_name(n, 'id') for n in im_names] num_ids = len(list(set(ids))) num_query = np.sum(np.array(marks) == 0) num_gallery = np.sum(np.array(marks) == 1) num_multi_query = np.sum(np.array(marks) == 2) # Print dataset information print('-' * 40) print('{} {} set'.format(name, part)) print('-' * 40) print('NO. Images: {}'.format(len(im_names))) print('NO. IDs: {}'.format(num_ids)) try: print('NO. Query Images: {}'.format(num_query)) print('NO. Gallery Images: {}'.format(num_gallery)) print('NO. Multi-query Images: {}'.format(num_multi_query)) except: pass print('-' * 40) return ret_set
def create_dataset(name='market1501', part='trainval', **kwargs): assert name in ['market1501', 'cuhk03', 'duke', 'combined'], \ "Unsupported Dataset {}".format(name) assert part in ['trainval', 'train', 'val', 'test'], \ "Unsupported Dataset Part {}".format(part) ######################################## # Specify Directory and Partition File # ######################################## if name == 'market1501': im_dir = ospeu('~/Dataset/market1501/images') partition_file = ospeu('~/Dataset/market1501/partitions.pkl') elif name == 'cuhk03': im_type = ['detected', 'labeled'][0] im_dir = ospeu(ospj('~/Dataset/cuhk03', im_type, 'images')) partition_file = ospeu( ospj('~/Dataset/cuhk03', im_type, 'partitions.pkl')) elif name == 'duke': im_dir = ospeu('~/Dataset/duke/images') partition_file = ospeu('~/Dataset/duke/partitions.pkl') elif name == 'combined': assert part in ['trainval'], \ "Only trainval part of the combined dataset is available now." im_dir = ospeu('~/Dataset/market1501_cuhk03_duke/trainval_images') partition_file = ospeu( '~/Dataset/market1501_cuhk03_duke/partitions.pkl') ################## # Create Dataset # ################## # Use standard Market1501 CMC settings for all datasets here. cmc_kwargs = dict(separate_camera_set=False, single_gallery_shot=False, first_match_break=True) partitions = load_pickle(partition_file) im_names = partitions['{}_im_names'.format(part)] if part == 'trainval': ids2labels = partitions['trainval_ids2labels'] ret_set = TrainSet(im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'train': ids2labels = partitions['train_ids2labels'] ret_set = TrainSet(im_dir=im_dir, im_names=im_names, ids2labels=ids2labels, **kwargs) elif part == 'val': marks = partitions['val_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet(im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) elif part == 'test': marks = partitions['test_marks'] kwargs.update(cmc_kwargs) ret_set = TestSet(im_dir=im_dir, im_names=im_names, marks=marks, **kwargs) if part in ['trainval', 'train']: num_ids = len(ids2labels) elif part in ['val', 'test']: ids = [parse_im_name(n, 'id') for n in im_names] num_ids = len(list(set(ids))) num_query = np.sum(np.array(marks) == 0) num_gallery = np.sum(np.array(marks) == 1) num_multi_query = np.sum(np.array(marks) == 2) # Print dataset information print('-' * 40) print('{} {} set'.format(name, part)) print('-' * 40) print('NO. Images: {}'.format(len(im_names))) print('NO. IDs: {}'.format(num_ids)) try: print('NO. Query Images: {}'.format(num_query)) print('NO. Gallery Images: {}'.format(num_gallery)) print('NO. Multi-query Images: {}'.format(num_multi_query)) except: pass print('-' * 40) return ret_set
def transform(zip_file, train_test_partition_file, save_dir=None): """Save images and partition the train/val/test set. """ print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat') save_images(mat_file, save_dir, new_im_name_tmpl) if osp.exists(train_test_partition_file): train_test_partition = load_pickle(train_test_partition_file) else: raise RuntimeError('Train/test partition file should be provided.') for im_type in ['detected', 'labeled']: trainval_im_names = train_test_partition[im_type]['train_im_names'] trainval_ids = list(set([parse_im_name(n, 'id') for n in trainval_im_names])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) train_val_partition = \ partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100) train_im_names = train_val_partition['train_im_names'] train_ids = list(set([parse_im_name(n, 'id') for n in train_val_partition['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \ + [1, ] * len(train_val_partition['val_gallery_im_names']) val_im_names = list(train_val_partition['val_query_im_names']) \ + list(train_val_partition['val_gallery_im_names']) test_im_names = list(train_test_partition[im_type]['query_im_names']) \ + list(train_test_partition[im_type]['gallery_im_names']) test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \ + [1, ] * len( train_test_partition[im_type]['gallery_im_names']) partitions = {'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks} partition_file = osp.join(save_dir, im_type, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file for "{}" saved to {}'.format(im_type, partition_file))