def transform(zip_file, save_dir=None): """Refactor file directories, rename images and partition the train/val/test set. """ train_test_split_file = osp.join(save_dir, 'train_test_split.pkl') train_test_split = save_images(zip_file, save_dir, train_test_split_file) # train_test_split = load_pickle(train_test_split_file) # partition train/val/test set # import pdb # pdb.set_trace() trainval_ids = list(set([parse_new_im_name(n, 'id') for n in train_test_split['trainval_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. # import pdb # pdb.set_trace() trainval_ids.sort() trainval_ids2labels = dict(zip(trainval_ids, range(len(trainval_ids)))) partitions = partition_train_val_set( train_test_split['trainval_im_names'], parse_new_im_name, num_val_ids=0) train_im_names = partitions['train_im_names'] train_ids = list(set([parse_new_im_name(n, 'id') for n in partitions['train_im_names']])) # Sort ids, so that id-to-label mapping remains the same when running # the code on different machines. train_ids.sort() train_ids2labels = dict(zip(train_ids, range(len(train_ids)))) # A mark is used to denote whether the image is from # query (mark == 0), or # gallery (mark == 1), or # multi query (mark == 2) set val_marks = [0, ] * len(partitions['val_query_im_names']) \ + [1, ] * len(partitions['val_gallery_im_names']) val_im_names = list(partitions['val_query_im_names']) \ + list(partitions['val_gallery_im_names']) test_im_names = list(train_test_split['q_im_names']) \ + list(train_test_split['gallery_im_names']) test_marks = [0, ] * len(train_test_split['q_im_names']) \ + [1, ] * len(train_test_split['gallery_im_names']) partitions = {'trainval_im_names': train_test_split['trainval_im_names'], 'trainval_ids2labels': trainval_ids2labels, 'train_im_names': train_im_names, 'train_ids2labels': train_ids2labels, 'val_im_names': val_im_names, 'val_marks': val_marks, 'test_im_names': test_im_names, 'test_marks': test_marks} partition_file = osp.join(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def save_im_name_mapping(raw_dir, ori_to_new_im_name_file): im_names = [] for dir_name in ['bounding_box_train', 'bounding_box_test', 'query', 'gt_bbox']: im_names_ = get_im_names(osp.join(raw_dir, dir_name), return_path=False, return_np=False) im_names_.sort() # Filter out id -1 if dir_name == 'bounding_box_test': im_names_ = [n for n in im_names_ if not n.startswith('-1')] # Images in different original directories may have same names, # so here we use relative paths as original image names. im_names_ = [osp.join(dir_name, n) for n in im_names_] im_names += im_names_ new_im_names = map_im_names(im_names, parse_original_im_name, new_im_name_tmpl) ori_to_new_im_name = dict(zip(im_names, new_im_names)) save_pickle(ori_to_new_im_name, ori_to_new_im_name_file) print('File saved to {}'.format(ori_to_new_im_name_file)) ################## # Just Some Info # ################## print('len(im_names)', len(im_names)) print('len(set(im_names))', len(set(im_names))) print('len(set(new_im_names))', len(set(new_im_names))) print('len(ori_to_new_im_name)', len(ori_to_new_im_name)) bounding_box_train_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=False, return_np=False) bounding_box_test_im_names = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=False, return_np=False) query_im_names = get_im_names(osp.join(raw_dir, 'query'), return_path=False, return_np=False) gt_bbox_im_names = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=False, return_np=False) print('set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))', set(bounding_box_train_im_names).isdisjoint(set(bounding_box_test_im_names))) print('set(bounding_box_train_im_names).isdisjoint(set(query_im_names))', set(bounding_box_train_im_names).isdisjoint(set(query_im_names))) print('set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names))', set(bounding_box_train_im_names).isdisjoint(set(gt_bbox_im_names))) print('set(bounding_box_test_im_names).isdisjoint(set(query_im_names))', set(bounding_box_test_im_names).isdisjoint(set(query_im_names))) print('set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names))', set(bounding_box_test_im_names).isdisjoint(set(gt_bbox_im_names))) print('set(query_im_names).isdisjoint(set(gt_bbox_im_names))', set(query_im_names).isdisjoint(set(gt_bbox_im_names))) print('len(query_im_names)', len(query_im_names)) print('len(gt_bbox_im_names)', len(gt_bbox_im_names)) print('len(set(query_im_names) & set(gt_bbox_im_names))', len(set(query_im_names) & set(gt_bbox_im_names))) print('len(set(query_im_names) | set(gt_bbox_im_names))', len(set(query_im_names) | set(gt_bbox_im_names)))
def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) # with ZipFile(zip_file) as z: # z.extractall(path=save_dir) # print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(new_im_dir) # raw_dir = osp.join(save_dir, osp.basename(zip_file[:-7])) raw_dir = save_dir im_paths = [] nums = [] for dir_name in ['bounding_box_train', 'bounding_box_test', 'query']: # import pdb # pdb.set_trace() im_paths_ = get_im_names(osp.join(raw_dir, dir_name), pattern='*.jpg', return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) # import pdb # pdb.set_trace() im_names = move_ims(im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names'] inds = [0] + nums inds = np.cumsum(inds) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def save_images_rap2(zip_file, annotation_file, query_file, save_dir=None, \ train_test_split_file=None): """Rename and move all the used images to a diretory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(save_dir) with ZipFile(zip_file) as z: z.extractall(path=save_dir) print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(new_im_dir) raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) # Get fnames img_fnames_train, img_fnames_test, img_fnames_query = \ _get_im_names_rap2(annotation_file, query_file, 1) img_fnames_train.sort() img_fnames_test.sort() img_fnames_query.sort() im_paths = list(img_fnames_train) + list(img_fnames_test) + \ list(img_fnames_query) nums = [img_fnames_train.shape[0], img_fnames_test.shape[0], \ img_fnames_query.shape[0]] # Move images org_img_dir = osp.join(root, 'images-pedestrian') im_names = _move_ims(org_img_dir, im_paths, new_im_dir, \ parse_original_im_name, new_im_name_tmpl) split = dict() keys = ['trainval_im_names', 'gallery_im_names', 'q_im_names'] inds = [0] + nums inds = np.cumsum(inds) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split
def combine_trainval_sets(im_dirs, partition_files, save_dir): new_im_dir = ospj(save_dir, 'trainval_images') may_make_dir(new_im_dir) new_im_names = [] new_start_id = 0 for im_dir, partition_file in zip(im_dirs, partition_files): partitions = load_pickle(partition_file) im_paths = [ospj(im_dir, n) for n in partitions['trainval_im_names']] im_paths.sort() new_im_names_, id_mapping = move_ims(im_paths, new_im_dir, parse_im_name, new_im_name_tmpl, new_start_id) new_start_id += len(id_mapping) new_im_names += new_im_names_ new_ids = range(new_start_id) partitions = { 'trainval_im_names': new_im_names, 'trainval_ids2labels': dict(zip(new_ids, new_ids)), } partition_file = ospj(save_dir, 'partitions.pkl') save_pickle(partitions, partition_file) print('Partition file saved to {}'.format(partition_file))
def save_images(zip_file, save_dir=None, train_test_split_file=None): """Rename and move all used images to a directory.""" print("Extracting zip file") root = osp.dirname(osp.abspath(zip_file)) if save_dir is None: save_dir = root may_make_dir(osp.abspath(save_dir)) # with ZipFile(zip_file) as z: # z.extractall(path=save_dir) # print("Extracting zip file done") new_im_dir = osp.join(save_dir, 'images') may_make_dir(osp.abspath(new_im_dir)) # raw_dir = osp.join(save_dir, osp.basename(zip_file)[:-4]) raw_dir = save_dir im_paths = [] nums = [] im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_train'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'bounding_box_test'), return_path=True, return_np=False) im_paths_.sort() im_paths_ = [p for p in im_paths_ if not osp.basename(p).startswith('-1')] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_paths_ = get_im_names(osp.join(raw_dir, 'query'), return_path=True, return_np=False) im_paths_.sort() im_paths += list(im_paths_) nums.append(len(im_paths_)) q_ids_cams = set([(parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) for p in im_paths_]) im_paths_ = get_im_names(osp.join(raw_dir, 'gt_bbox'), return_path=True, return_np=False) im_paths_.sort() # Only gather images for those ids and cams used in testing. im_paths_ = [ p for p in im_paths_ if (parse_original_im_name(osp.basename(p), 'id'), parse_original_im_name(osp.basename(p), 'cam')) in q_ids_cams ] im_paths += list(im_paths_) nums.append(len(im_paths_)) im_names = move_ims(im_paths, new_im_dir, parse_original_im_name, new_im_name_tmpl) split = dict() keys = [ 'trainval_im_names', 'gallery_im_names', 'q_im_names', 'mq_im_names' ] inds = [0] + nums inds = np.cumsum(np.array(inds)) for i, k in enumerate(keys): split[k] = im_names[inds[i]:inds[i + 1]] save_pickle(split, train_test_split_file) print('Saving images done.') return split