def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(new_im_dir) raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) im_paths = [] nums = [] for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']: im_paths_ = get_im_names(osp.join(raw_dir, dir_name), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) im_names = move_ims(im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names'] inds = [0] + nums inds = np.cumsum(inds) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(new_im_dir) raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) im_paths = [] nums = [] for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']: im_paths_ = get_im_names(osp.join(raw_dir, dir_name), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) im_names = move_ims( im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names'] inds = [0] + nums inds = np.cumsum(inds) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def combine_trainval_sets(im_dirs, partition_files, save_dir): new_im_dir = ospj(save_dir, 'trainval_images') may_make_dir(new_im_dir) new_im_names = [] new_start_id = 0 for im_dir, partition_file in zip(im_dirs, partition_files): partitions = load_pickle(partition_file) im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']] im_paths.sort() new_im_names_, id_mapping = move_ims(im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id) new_start_id += len(id_mapping) new_im_names += new_im_names_ new_ids = range(new_start_id) partitions = { 'trainval_im_names': new_im_names, 'trainval_ids2labels': dict(zip(new_ids, new_ids)), } partition_file = ospj(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def combine_trainval_sets( im_dirs, partition_files, save_dir): new_im_dir = ospj(save_dir, 'trainval_images') may_make_dir(new_im_dir) new_im_names = [] new_start_id = 0 for im_dir, partition_file in zip(im_dirs, partition_files): partitions = load_pickle(partition_file) im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']] im_paths.sort() new_im_names_, id_mapping = move_ims( im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id) new_start_id += len(id_mapping) new_im_names += new_im_names_ new_ids = range(new_start_id) partitions = {'trainval_im_names': new_im_names, 'trainval_ids2labels': dict(zip(new_ids, new_ids)), } partition_file = ospj(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def save_images(mat_file, save_dir, new_im_name_tmpl): def deref(mat, ref): return mat[ref][:].T def dump(mat, refs, pid, cam, im_dir): """Save the images of a person under one camera.""" for i, ref in enumerate(refs): im = deref(mat, ref) if im.size == 0 or im.ndim < 2: break fname = new_im_name_tmpl.format(pid, cam, i) imsave(osp.join(im_dir, fname), im) mat = h5py.File(mat_file, 'r') labeled_im_dir = osp.join(save_dir, 'labeled/images') detected_im_dir = osp.join(save_dir, 'detected/images') all_im_dir = osp.join(save_dir, 'all/images') may_make_dir(labeled_im_dir) may_make_dir(detected_im_dir) may_make_dir(all_im_dir) # loop through camera pairs pid = 0 for labeled, detected in zip(mat['labeled'][0], mat['detected'][0]): labeled, detected = deref(mat, labeled), deref(mat, detected) assert labeled.shape == detected.shape # loop through ids in a camera pair for i in range(labeled.shape[0]): # We don't care about whether different persons are under same cameras, # we only care about the same person being under different cameras or not. dump(mat, labeled[i, :5], pid, 0, labeled_im_dir) dump(mat, labeled[i, 5:], pid, 1, labeled_im_dir) dump(mat, detected[i, :5], pid, 0, detected_im_dir) dump(mat, detected[i, 5:], pid, 1, detected_im_dir) dump(mat, chain(detected[i, :5], labeled[i, :5]), pid, 0, all_im_dir) dump(mat, chain(detected[i, 5:], labeled[i, 5:]), pid, 1, all_im_dir) pid += 1 if pid % 100 == 0: sys.stdout.write('\033[F\033[K') print('Saving images {}/{}'.format(pid, 1467))
def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(osp.abspath(save_dir)) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(osp.abspath(new_im_dir)) raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) im_paths = [] nums = [] im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=True, return_np=False) im_paths_.sort() im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'query'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) for p in im_paths_]) im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=True, return_np=False) im_paths_.sort() # Only gather images for those ids and cams used in testing. im_paths_ = [ p for p in im_paths_ if (parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams ] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_names = move_ims(im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = [ 'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names' ] inds = [0] + nums inds = np.cumsum(np.array(inds)) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def transform(zip_file, train_test_partition_file, save_dir=None): """Save images and partition the train/val/test set. """ print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat') save_images(mat_file, save_dir, new_im_name_tmpl) if osp.exists(train_test_partition_file): train_test_partition = load_pickle(train_test_partition_file) else: raise RuntimeError('Train/test partition file should be provided.') for im_type in ['detected', 'labeled']: trainval_im_names = train_test_partition[im_type]['train_im_names'] trainval_ids = list(set([parse_im_name(n, 'id') for n in trainval_im_names])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) train_val_partition = \ partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100) train_im_names = train_val_partition['train_im_names'] train_ids = list(set([parse_im_name(n, 'id') for n in train_val_partition['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \ + [1, ] * len(train_val_partition['val_gallery_im_names']) val_im_names = list(train_val_partition['val_query_im_names']) \ + list(train_val_partition['val_gallery_im_names']) test_im_names = list(train_test_partition[im_type]['query_im_names']) \ + list(train_test_partition[im_type]['gallery_im_names']) test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \ + [1, ] * len( train_test_partition[im_type]['gallery_im_names']) partitions = {'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks} partition_file = osp.join(save_dir, im_type, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file for "{}" saved to {}'.format(im_type, partition_file))
def save_im(im, save_path): """im: shape [3, H, W]""" may_make_dir(ospdn(save_path)) im = im.transpose(1, 2, 0) Image.fromarray(im).save(save_path)
def transform(zip_file, train_test_partition_file, save_dir=None): """Save images and partition the train/val/test set. """ print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") mat_file = osp.join(save_dir, osp.basename(zip_file)[:-4], 'cuhk-03.mat') save_images(mat_file, save_dir, new_im_name_tmpl) if osp.exists(train_test_partition_file): train_test_partition = load_pickle(train_test_partition_file) else: raise RuntimeError('Train/test partition file should be provided.') for im_type in ['detected', 'labeled']: trainval_im_names = train_test_partition[im_type]['train_im_names'] trainval_ids = list( set([parse_im_name(n, 'id') for n in trainval_im_names])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) train_val_partition = \ partition_train_val_set(trainval_im_names, parse_im_name, num_val_ids=100) train_im_names = train_val_partition['train_im_names'] train_ids = list( set([ parse_im_name(n, 'id') for n in train_val_partition['train_im_names'] ])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(train_val_partition['val_query_im_names']) \ + [1, ] * len(train_val_partition['val_gallery_im_names']) val_im_names = list(train_val_partition['val_query_im_names']) \ + list(train_val_partition['val_gallery_im_names']) test_im_names = list(train_test_partition[im_type]['query_im_names']) \ + list(train_test_partition[im_type]['gallery_im_names']) test_marks = [0, ] * len(train_test_partition[im_type]['query_im_names']) \ + [1, ] * len( train_test_partition[im_type]['gallery_im_names']) partitions = { 'trainval_im_names': trainval_im_names, 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks } partition_file = osp.join(save_dir, im_type, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file for "{}" saved to {}'.format( im_type, partition_file))
parser.add_argument('--duke_im_dir', type=str, default=ospeu('/mnt/data/dataset/duke/images')) parser.add_argument('--duke_partition_file', type=str, default=ospeu('/mnt/data/dataset/duke/partitions.pkl')) parser.add_argument( '--save_dir', type=str, default=ospeu('/mnt/data/dataset/pcb/market1501_cuhk03_duke')) args = parser.parse_args() im_dirs = [ ospap(ospeu(args.market1501_im_dir)), ospap(ospeu(args.cuhk03_im_dir)), ospap(ospeu(args.duke_im_dir)) ] partition_files = [ ospap(ospeu(args.market1501_partition_file)), ospap(ospeu(args.cuhk03_partition_file)), ospap(ospeu(args.duke_partition_file)) ] save_dir = ospap(ospeu(args.save_dir)) may_make_dir(save_dir) combine_trainval_sets(im_dirs, partition_files, save_dir)
def save_images(original_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" # print("Extracting zip file") root = osp.dirname(osp.abspath(original_file)) if save_dir is None: save_dir = root may_make_dir(osp.abspath(save_dir)) # with ZipFile(original_file) as z: # z.extractall(path=save_dir) # print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(osp.abspath(new_im_dir)) raw_dir = osp.abspath(original_file) print('raw_dir: ', raw_dir) im_paths = [] nums = [] im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'), pattern='*.png', return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) print('dir_name: bounding_box_train') print('nums: ', nums) # Create (anchor, positive, negative) anchor_positive_negative_2(im_paths, parse_original_im_name, save_dir) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'), pattern='*.png', return_path=True, return_np=False) im_paths_.sort() im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')] im_paths += list(im_paths_) nums.append(len(im_paths_)) print('dir_name: bounding_box_test') print('nums: ', nums) im_paths_ = get_im_names(osp.join(raw_dir, 'query'), pattern='*.png', return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) for p in im_paths_]) print('dir_name: query') print('nums: ', nums) im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'), pattern='*.png', return_path=True, return_np=False) im_paths_.sort() #print('len of im_paths:'+str(len(im_paths))) # Only gather images for those ids and cams used in testing. im_paths_ = [ p for p in im_paths_ if (parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams ] im_paths += list(im_paths_) nums.append(len(im_paths_)) print('dir_name: gt_bbox') print('nums: ', nums) im_names = move_ims_2(im_paths, parse_original_im_name, new_im_name_tmpl) split = dict() keys = [ 'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names' ] inds = [0] + nums print('inds: ', inds) inds = np.cumsum(np.array(inds)) print('inds: ', inds) print('enumerate(keys): ', enumerate(keys)) for i, k in enumerate(keys): print('i,k: ', i, k) split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(osp.abspath(save_dir)) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(osp.abspath(new_im_dir)) raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) im_paths = [] nums = [] im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=True, return_np=False) im_paths_.sort() im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'query'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) for p in im_paths_]) im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=True, return_np=False) im_paths_.sort() # Only gather images for those ids and cams used in testing. im_paths_ = [p for p in im_paths_ if (parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_names = move_ims( im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names'] inds = [0] + nums inds = np.cumsum(np.array(inds)) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
default=ospeu('~/Dataset/duke/images')) parser.add_argument( '--duke_partition_file', type=str, default=ospeu('~/Dataset/duke/partitions.pkl') ) parser.add_argument( '--save_dir', type=str, default=ospeu('~/Dataset/market1501_cuhk03_duke') ) args = parser.parse_args() im_dirs = [ ospap(ospeu(args.market1501_im_dir)), ospap(ospeu(args.cuhk03_im_dir)), ospap(ospeu(args.duke_im_dir)) ] partition_files = [ ospap(ospeu(args.market1501_partition_file)), ospap(ospeu(args.cuhk03_partition_file)), ospap(ospeu(args.duke_partition_file)) ] save_dir = ospap(ospeu(args.save_dir)) may_make_dir(save_dir) combine_trainval_sets(im_dirs, partition_files, save_dir)
def save_images(data_dir, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" # print("Extracting zip file") # root = osp.dirname(osp.abspath(zip_file)) # if save_dir is None: # save_dir = root # may_make_dir(osp.abspath(save_dir)) # with ZipFile(zip_file) as z: # z.extractall(path=save_dir) # print("Extracting zip file done") #get the images and origin name of path new_im_dir = osp.join(save_dir, 'images') may_make_dir(osp.abspath(new_im_dir)) # define paths of all images and number of files in four folders im_paths = [] bb_test = [] bb_test_num = 0 bb_train_num = 0 bb_train = [] query = [] query_num = 0 gt_bb_num = 0 gt_bb = [] nums = [] for data in dataset: raw_dir = osp.join(data_dir, data) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=True, return_np=False) im_paths_.sort() bb_train += list(im_paths_) bb_train_num += len(im_paths_) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=True, return_np=False) im_paths_.sort() im_paths_ = [ p for p in im_paths_ if not osp.basename(p).startswith('-1') ] bb_test += list(im_paths_) bb_test_num += len(im_paths_) im_paths_ = get_im_names(osp.join(raw_dir, 'query'), return_path=True, return_np=False) im_paths_.sort() query += list(im_paths_) query_num += len(im_paths_) q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) for p in im_paths_]) im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=True, return_np=False) im_paths_.sort() # Only gather images for those ids and cams used in testing. im_paths_ = [ p for p in im_paths_ if (parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams ] gt_bb += list(im_paths_) gt_bb_num += len(im_paths_) im_paths = bb_train + bb_test + query + gt_bb nums = [bb_train_num] + [bb_test_num] + [query_num] + [gt_bb_num] im_names = move_ims(im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = [ 'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names' ] inds = [0] + nums inds = np.cumsum(np.array(inds)) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split