def _download_data(self): if osp.exists(self.dataset_dir): print("This dataset has been downloaded.") return mkdir_if_missing(self.dataset_dir) fpath = osp.join(self.dataset_dir, osp.basename(self.dataset_url)) print("Downloading iLIDS-VID dataset") urllib.urlretrieve(self.dataset_url, fpath) print("Extracting files") tar = tarfile.open(fpath) tar.extractall(path=self.dataset_dir) tar.close()
def _download_data(self): if osp.exists(self.dataset_dir): return print("Creating directory {}".format(self.dataset_dir)) mkdir_if_missing(self.dataset_dir) fpath = osp.join(self.dataset_dir, osp.basename(self.dataset_url)) print("Downloading DukeMTMC-reID dataset") urllib.request.urlretrieve(self.dataset_url, fpath) print("Extracting files") zip_ref = zipfile.ZipFile(fpath, 'r') zip_ref.extractall(self.dataset_dir) zip_ref.close()
def _download_data(self): if osp.exists(self.dataset_dir): print("This dataset has been downloaded.") return print("Creating directory {}".format(self.dataset_dir)) mkdir_if_missing(self.dataset_dir) fpath = osp.join(self.dataset_dir, osp.basename(self.dataset_url)) print("Downloading VIPeR dataset") urllib.urlretrieve(self.dataset_url, fpath) print("Extracting files") zip_ref = zipfile.ZipFile(fpath, 'r') zip_ref.extractall(self.dataset_dir) zip_ref.close()
def download_dataset(self, dataset_dir, dataset_url): """Downloads and extracts dataset. Args: dataset_dir (str): dataset directory. dataset_url (str): url to download dataset. """ if osp.exists(dataset_dir): return if dataset_url is None: raise RuntimeError( '{} dataset needs to be manually ' 'prepared, please follow the ' 'document to prepare this dataset'.format( self.__class__.__name__ ) ) print('Creating directory "{}"'.format(dataset_dir)) mkdir_if_missing(dataset_dir) fpath = osp.join(dataset_dir, osp.basename(dataset_url)) print( 'Downloading {} dataset to "{}"'.format( self.__class__.__name__, dataset_dir ) ) # download_url(dataset_url, fpath) print('Extracting "{}"'.format(fpath)) try: tar = tarfile.open(fpath) tar.extractall(path=dataset_dir) tar.close() except: zip_ref = zipfile.ZipFile(fpath, 'r') zip_ref.extractall(dataset_dir) zip_ref.close() print('{} dataset is ready'.format(self.__class__.__name__))
def __init__(self, cfg): """ All Saver based on two dir: save_dir and load_dir. for train : save_dir = load_dir and every time This will make a new one. for test : save_dir = None , load_dir will automatic be stitched with run_id for uda : every time save_dir will be made. and load_dir will automatic be stitched with run_id and source dataset """ self.cfg = cfg self.save_dir = '' self.load_dir = '' dirname_list = os.path.dirname(__file__).split("/")[:-1] self.up_dir = "/".join(dirname_list) source_name, source_mid_name, target_name, target_mid_name = self._get_some_dir_name( cfg) if cfg.TEST.IF_ON: self.load_dir = self.get_load_dir(source_mid_name, source_name) self.save_dir = self.load_dir else: self.save_dir = self.get_save_dir(target_mid_name, target_name) if target_mid_name == 'direct': self.load_dir = self.save_dir else: self.load_dir = self.get_load_dir(source_mid_name, source_name) print(f"save dir: {self.save_dir}") print(f"load dir: {self.load_dir}") self.model_dir = join(self.save_dir, 'model') self.image_dir = join(self.save_dir, 'image') self.tensorboard_dir = join(self.save_dir, 'tensorboard') self.code_dir = join(self.save_dir, 'code') mkdir_if_missing(self.save_dir) mkdir_if_missing(self.model_dir) mkdir_if_missing(self.image_dir) mkdir_if_missing(self.tensorboard_dir) if not os.path.exists(self.code_dir): shutil.copytree(self.up_dir, self.code_dir, ignore=shutil.ignore_patterns('run'))
def _preprocess(self): """ This function is a bit complex and ugly, what it does is 1. Extract data from cuhk-03.mat and save as png images. 2. Create 20 classic splits. (Li et al. CVPR'14) 3. Create new split. (Zhong et al. CVPR'17) """ print( "Note: if root path is changed, the previously generated json files need " "to be re-generated (delete them first)") if osp.exists(self.imgs_labeled_dir) and \ osp.exists(self.imgs_detected_dir) and \ osp.exists(self.split_classic_det_json_path) and \ osp.exists(self.split_classic_lab_json_path) and \ osp.exists(self.split_new_det_json_path) and \ osp.exists(self.split_new_lab_json_path): return mkdir_if_missing(self.imgs_detected_dir) mkdir_if_missing(self.imgs_labeled_dir) print("Extract image data from {} and save as png".format(self.raw_mat_path)) mat = h5py.File(self.raw_mat_path, 'r') def _deref(ref): return mat[ref][:].T def _process_images(img_refs, campid, pid, save_dir): img_paths = [] # Note: some persons only have images for one view for imgid, img_ref in enumerate(img_refs): img = _deref(img_ref) if img.size == 0 or img.ndim < 3: continue viewid = 1 if imgid < 5 else 2 img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format(campid + 1, pid + 1, viewid, imgid + 1) img_path = osp.join(save_dir, img_name) if not osp.isfile(img_path): imsave(img_path, img) img_paths.append(img_path) return img_paths def _extract_img(name): print("Processing {} images (extract and save) ...".format(name)) meta_data = [] imgs_dir = self.imgs_detected_dir if name == 'detected' else self.imgs_labeled_dir for campid, camp_ref in enumerate(mat[name][0]): camp = _deref(camp_ref) num_pids = camp.shape[0] for pid in range(num_pids): img_paths = _process_images(camp[pid, :], campid, pid, imgs_dir) assert len(img_paths) > 0, "campid{}-pid{} has no images".format(campid, pid) meta_data.append((campid + 1, pid + 1, img_paths)) print("- done camera pair {} with {} identities".format(campid + 1, num_pids)) return meta_data meta_detected = _extract_img('detected') meta_labeled = _extract_img('labeled') def _extract_classic_split(meta_data, test_split): train, test = [], [] num_train_pids, num_test_pids = 0, 0 num_train_imgs, num_test_imgs = 0, 0 for i, (campid, pid, img_paths) in enumerate(meta_data): if [campid, pid] in test_split: for img_path in img_paths: camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based test.append((img_path, num_test_pids, camid)) num_test_pids += 1 num_test_imgs += len(img_paths) else: for img_path in img_paths: camid = int(osp.basename(img_path).split('_')[2]) - 1 # make it 0-based train.append((img_path, num_train_pids, camid)) num_train_pids += 1 num_train_imgs += len(img_paths) return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs print("Creating classic splits (# = 20) ...") splits_classic_det, splits_classic_lab = [], [] for split_ref in mat['testsets'][0]: test_split = _deref(split_ref).tolist() # create split for detected images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_detected, test_split) splits_classic_det.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs, }) # create split for labeled images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_labeled, test_split) splits_classic_lab.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs, }) write_json(splits_classic_det, self.split_classic_det_json_path) write_json(splits_classic_lab, self.split_classic_lab_json_path) def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel): tmp_set = [] unique_pids = set() for idx in idxs: img_name = filelist[idx][0] camid = int(img_name.split('_')[2]) - 1 # make it 0-based pid = pids[idx] if relabel: pid = pid2label[pid] img_path = osp.join(img_dir, img_name) tmp_set.append((img_path, int(pid), camid)) unique_pids.add(pid) return tmp_set, len(unique_pids), len(idxs) def _extract_new_split(split_dict, img_dir): train_idxs = split_dict['train_idx'].flatten() - 1 # index-0 pids = split_dict['labels'].flatten() train_pids = set(pids[train_idxs]) pid2label = {pid: label for label, pid in enumerate(train_pids)} query_idxs = split_dict['query_idx'].flatten() - 1 gallery_idxs = split_dict['gallery_idx'].flatten() - 1 filelist = split_dict['filelist'].flatten() train_info = _extract_set(filelist, pids, pid2label, train_idxs, img_dir, relabel=True) query_info = _extract_set(filelist, pids, pid2label, query_idxs, img_dir, relabel=False) gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False) return train_info, query_info, gallery_info print("Creating new splits for detected images (767/700) ...") train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_det_mat_path), self.imgs_detected_dir, ) splits = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], }] write_json(splits, self.split_new_det_json_path) print("Creating new splits for labeled images (767/700) ...") train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_lab_mat_path), self.imgs_labeled_dir, ) splits = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], }] write_json(splits, self.split_new_lab_json_path)
def _preprocess(self): """ This function is a bit complex and ugly, what it does is 1. Extract data from cuhk-03.mat and save as png images. 2. Create 20 classic splits. (Li et al. CVPR'14) 3. Create new split. (Zhong et al. CVPR'17) """ print("Note: if root path is changed, the previously generated json files need to be re-generated (delete them first)") if osp.exists(self.imgs_labeled_dir) and \ osp.exists(self.imgs_detected_dir) and \ osp.exists(self.split_classic_det_json_path) and \ osp.exists(self.split_classic_lab_json_path) and \ osp.exists(self.split_new_det_json_path) and \ osp.exists(self.split_new_lab_json_path): return mkdir_if_missing(self.imgs_detected_dir) mkdir_if_missing(self.imgs_labeled_dir) print("Extract image data from {} and save as png".format(self.raw_mat_path)) mat = h5py.File(self.raw_mat_path, 'r') def _deref(ref): return mat[ref][:].T def _process_images(img_refs, campid, pid, save_dir): img_paths = [] # Note: some persons only have images for one view for imgid, img_ref in enumerate(img_refs): img = _deref(img_ref) # skip empty cell if img.size == 0 or img.ndim < 3: continue # images are saved with the following format, index-1 (ensure uniqueness) # campid: index of camera pair (1-5) # pid: index of person in 'campid'-th camera pair # viewid: index of view, {1, 2} # imgid: index of image, (1-10) viewid = 1 if imgid < 5 else 2 img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format(campid+1, pid+1, viewid, imgid+1) img_path = osp.join(save_dir, img_name) imsave(img_path, img) img_paths.append(img_path) return img_paths def _extract_img(name): print("Processing {} images (extract and save) ...".format(name)) meta_data = [] imgs_dir = self.imgs_detected_dir if name == 'detected' else self.imgs_labeled_dir for campid, camp_ref in enumerate(mat[name][0]): camp = _deref(camp_ref) num_pids = camp.shape[0] for pid in range(num_pids): img_paths = _process_images(camp[pid,:], campid, pid, imgs_dir) assert len(img_paths) > 0, "campid{}-pid{} has no images".format(campid, pid) meta_data.append((campid+1, pid+1, img_paths)) print("done camera pair {} with {} identities".format(campid+1, num_pids)) return meta_data meta_detected = _extract_img('detected') meta_labeled = _extract_img('labeled') def _extract_classic_split(meta_data, test_split): train, test = [], [] num_train_pids, num_test_pids = 0, 0 num_train_imgs, num_test_imgs = 0, 0 for i, (campid, pid, img_paths) in enumerate(meta_data): if [campid, pid] in test_split: for img_path in img_paths: camid = int(osp.basename(img_path).split('_')[2]) test.append((img_path, num_test_pids, camid)) num_test_pids += 1 num_test_imgs += len(img_paths) else: for img_path in img_paths: camid = int(osp.basename(img_path).split('_')[2]) train.append((img_path, num_train_pids, camid)) num_train_pids += 1 num_train_imgs += len(img_paths) return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs print("Creating classic splits (# = 20) ...") splits_classic_det, splits_classic_lab = [], [] for split_ref in mat['testsets'][0]: test_split = _deref(split_ref).tolist() # create split for detected images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_detected, test_split) splits_classic_det.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs, }) # create split for labeled images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_labeled, test_split) splits_classic_lab.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs, }) write_json(splits_classic_det, self.split_classic_det_json_path) write_json(splits_classic_lab, self.split_classic_lab_json_path) def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel): tmp_set = [] unique_pids = set() for idx in idxs: img_name = filelist[idx][0] camid = int(img_name.split('_')[2]) pid = pids[idx] if relabel: pid = pid2label[pid] img_path = osp.join(img_dir, img_name) tmp_set.append((img_path, int(pid), camid)) unique_pids.add(pid) return tmp_set, len(unique_pids), len(idxs) def _extract_new_split(split_dict, img_dir): train_idxs = split_dict['train_idx'].flatten() - 1 # index-0 pids = split_dict['labels'].flatten() train_pids = set(pids[train_idxs]) pid2label = {pid: label for label, pid in enumerate(train_pids)} query_idxs = split_dict['query_idx'].flatten() - 1 gallery_idxs = split_dict['gallery_idx'].flatten() - 1 filelist = split_dict['filelist'].flatten() train_info = _extract_set(filelist, pids, pid2label, train_idxs, img_dir, relabel=True) query_info = _extract_set(filelist, pids, pid2label, query_idxs, img_dir, relabel=False) gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False) return train_info, query_info, gallery_info print("Creating new splits for detected images (767/700) ...") train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_det_mat_path), self.imgs_detected_dir, ) splits = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], }] write_json(splits, self.split_new_det_json_path) print("Creating new splits for labeled images (767/700) ...") train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_lab_mat_path), self.imgs_labeled_dir, ) splits = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2], }] write_json(splits, self.split_new_lab_json_path)
def preprocess_split(self): # This function is a bit complex and ugly, what it does is # 1. extract data from cuhk-03.mat and save as png images # 2. create 20 classic splits (Li et al. CVPR'14) # 3. create new split (Zhong et al. CVPR'17) if osp.exists(self.imgs_labeled_dir) \ and osp.exists(self.imgs_detected_dir) \ and osp.exists(self.split_classic_det_json_path) \ and osp.exists(self.split_classic_lab_json_path) \ and osp.exists(self.split_new_det_json_path) \ and osp.exists(self.split_new_lab_json_path): return import h5py from imageio import imwrite from scipy.io import loadmat mkdir_if_missing(self.imgs_detected_dir) mkdir_if_missing(self.imgs_labeled_dir) print('Extract image data from "{}" and save as png'.format( self.raw_mat_path)) mat = h5py.File(self.raw_mat_path, 'r') def _deref(ref): return mat[ref][:].T def _process_images(img_refs, campid, pid, save_dir): img_paths = [] # Note: some persons only have images for one view for imgid, img_ref in enumerate(img_refs): img = _deref(img_ref) if img.size == 0 or img.ndim < 3: continue # skip empty cell # images are saved with the following format, index-1 (ensure uniqueness) # campid: index of camera pair (1-5) # pid: index of person in 'campid'-th camera pair # viewid: index of view, {1, 2} # imgid: index of image, (1-10) viewid = 1 if imgid < 5 else 2 img_name = '{:01d}_{:03d}_{:01d}_{:02d}.png'.format( campid + 1, pid + 1, viewid, imgid + 1) img_path = osp.join(save_dir, img_name) if not osp.isfile(img_path): imwrite(img_path, img) img_paths.append(img_path) return img_paths def _extract_img(image_type): print('Processing {} images ...'.format(image_type)) meta_data = [] imgs_dir = self.imgs_detected_dir if image_type == 'detected' else self.imgs_labeled_dir for campid, camp_ref in enumerate(mat[image_type][0]): camp = _deref(camp_ref) num_pids = camp.shape[0] for pid in range(num_pids): img_paths = _process_images(camp[pid, :], campid, pid, imgs_dir) assert len( img_paths) > 0, 'campid{}-pid{} has no images'.format( campid, pid) meta_data.append((campid + 1, pid + 1, img_paths)) print('- done camera pair {} with {} identities'.format( campid + 1, num_pids)) return meta_data meta_detected = _extract_img('detected') meta_labeled = _extract_img('labeled') def _extract_classic_split(meta_data, test_split): train, test = [], [] num_train_pids, num_test_pids = 0, 0 num_train_imgs, num_test_imgs = 0, 0 for i, (campid, pid, img_paths) in enumerate(meta_data): if [campid, pid] in test_split: for img_path in img_paths: camid = int(osp.basename(img_path).split('_') [2]) - 1 # make it 0-based test.append((img_path, num_test_pids, camid)) num_test_pids += 1 num_test_imgs += len(img_paths) else: for img_path in img_paths: camid = int(osp.basename(img_path).split('_') [2]) - 1 # make it 0-based train.append((img_path, num_train_pids, camid)) num_train_pids += 1 num_train_imgs += len(img_paths) return train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs print('Creating classic splits (# = 20) ...') splits_classic_det, splits_classic_lab = [], [] for split_ref in mat['testsets'][0]: test_split = _deref(split_ref).tolist() # create split for detected images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_detected, test_split) splits_classic_det.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs }) # create split for labeled images train, num_train_pids, num_train_imgs, test, num_test_pids, num_test_imgs = \ _extract_classic_split(meta_labeled, test_split) splits_classic_lab.append({ 'train': train, 'query': test, 'gallery': test, 'num_train_pids': num_train_pids, 'num_train_imgs': num_train_imgs, 'num_query_pids': num_test_pids, 'num_query_imgs': num_test_imgs, 'num_gallery_pids': num_test_pids, 'num_gallery_imgs': num_test_imgs }) write_json(splits_classic_det, self.split_classic_det_json_path) write_json(splits_classic_lab, self.split_classic_lab_json_path) def _extract_set(filelist, pids, pid2label, idxs, img_dir, relabel): tmp_set = [] unique_pids = set() for idx in idxs: img_name = filelist[idx][0] camid = int(img_name.split('_')[2]) - 1 # make it 0-based pid = pids[idx] if relabel: pid = pid2label[pid] img_path = osp.join(img_dir, img_name) tmp_set.append((img_path, int(pid), camid)) unique_pids.add(pid) return tmp_set, len(unique_pids), len(idxs) def _extract_new_split(split_dict, img_dir): train_idxs = split_dict['train_idx'].flatten() - 1 # index-0 pids = split_dict['labels'].flatten() train_pids = set(pids[train_idxs]) pid2label = {pid: label for label, pid in enumerate(train_pids)} query_idxs = split_dict['query_idx'].flatten() - 1 gallery_idxs = split_dict['gallery_idx'].flatten() - 1 filelist = split_dict['filelist'].flatten() train_info = _extract_set(filelist, pids, pid2label, train_idxs, img_dir, relabel=True) query_info = _extract_set(filelist, pids, pid2label, query_idxs, img_dir, relabel=False) gallery_info = _extract_set(filelist, pids, pid2label, gallery_idxs, img_dir, relabel=False) return train_info, query_info, gallery_info print('Creating new split for detected images (767/700) ...') train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_det_mat_path), self.imgs_detected_dir) split = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2] }] write_json(split, self.split_new_det_json_path) print('Creating new split for labeled images (767/700) ...') train_info, query_info, gallery_info = _extract_new_split( loadmat(self.split_new_lab_mat_path), self.imgs_labeled_dir) split = [{ 'train': train_info[0], 'query': query_info[0], 'gallery': gallery_info[0], 'num_train_pids': train_info[1], 'num_train_imgs': train_info[2], 'num_query_pids': query_info[1], 'num_query_imgs': query_info[2], 'num_gallery_pids': gallery_info[1], 'num_gallery_imgs': gallery_info[2] }] write_json(split, self.split_new_lab_json_path)