def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(CUHK01, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.zip_path = osp.join(self.dataset_dir, 'CUHK01.zip') self.campus_dir = osp.join(self.dataset_dir, 'campus') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.extract_file() required_files = [self.dataset_dir, self.campus_dir] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1)) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] self.init_attributes(train, query, gallery, **kwargs) if verbose: self.print_dataset_statistics(self.train, self.query, self.gallery)
def __init__(self, root='data', split_id=0, min_seq_len=0, verbose=True, **kwargs): super(PRID2011, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.split_path = osp.join(self.dataset_dir, 'splits_prid2011.json') self.cam_a_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_a') self.cam_b_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_b') self._check_before_run() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError("split_id exceeds range, received {}, but expected between 0 and {}".format(split_id, len(splits)-1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] print("# train identites: {}, # test identites {}".format(len(train_dirs), len(test_dirs))) train = self._process_data(train_dirs, cam1=True, cam2=True) query = self._process_data(test_dirs, cam1=True, cam2=False) gallery = self._process_data(test_dirs, cam1=False, cam2=True) if verbose: print("=> PRID2011 loaded") self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, _, self.num_train_cams = self.get_videodata_info(self.train) self.num_query_pids, _, self.num_query_cams = self.get_videodata_info(self.query) self.num_gallery_pids, _, self.num_gallery_cams = self.get_videodata_info(self.gallery)
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(iLIDS, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.dataset_url = 'http://www.eecs.qmul.ac.uk/~jason/data/i-LIDS_Pedestrian.tgz' self.data_dir = osp.join(self.dataset_dir, 'i-LIDS_Pedestrian/Persons') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.download_data() required_files = [ self.dataset_dir, self.data_dir ] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError('split_id exceeds range, received {}, but expected between 0 and {}'.format(split_id, len(splits)-1)) split = splits[split_id] train, query, gallery = self.process_split(split) if verbose: self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train) self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(CUHK01, self).__init__() self.dataset_dir = osp.join(root, self.dataset_dir) self.zip_path = osp.join(self.dataset_dir, 'CUHK01.zip') self.campus_dir = osp.join(self.dataset_dir, 'campus') self.split_path = osp.join(self.dataset_dir, 'splits.json') self._extract_file() self._check_before_run() self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError("split_id exceeds range, received {}, but expected between 0 and {}".format(split_id, len(splits)-1)) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] if verbose: print("=> CUHK01 loaded") self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train) self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)
def __init__(self, root='data', split_id=0, cuhk03_labeled=False, cuhk03_classic_split=False, verbose=True, **kwargs): super(CUHK03, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.data_dir = osp.join(self.dataset_dir, 'cuhk03_release') self.raw_mat_path = osp.join(self.data_dir, 'cuhk-03.mat') self.imgs_detected_dir = osp.join(self.dataset_dir, 'images_detected') self.imgs_labeled_dir = osp.join(self.dataset_dir, 'images_labeled') self.split_classic_det_json_path = osp.join( self.dataset_dir, 'splits_classic_detected.json') self.split_classic_lab_json_path = osp.join( self.dataset_dir, 'splits_classic_labeled.json') self.split_new_det_json_path = osp.join(self.dataset_dir, 'splits_new_detected.json') self.split_new_lab_json_path = osp.join(self.dataset_dir, 'splits_new_labeled.json') self.split_new_det_mat_path = osp.join( self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat') self.split_new_lab_mat_path = osp.join( self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat') required_files = [ self.dataset_dir, self.data_dir, self.raw_mat_path, self.split_new_det_mat_path, self.split_new_lab_mat_path ] self.check_before_run(required_files) self.preprocess_split() if cuhk03_labeled: split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path else: split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path splits = read_json(split_path) assert split_id < len( splits ), 'Condition split_id ({}) < len(splits) ({}) is false'.format( split_id, len(splits)) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] self.init_attributes(train, query, gallery, **kwargs) if verbose: self.print_dataset_statistics(self.train, self.query, self.gallery)
def process_dir(self, dir_path, json_path, relabel): if osp.exists(json_path): split = read_json(json_path) return split['tracklets'] print('=> Generating split json file (** this might take a while **)') pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store print('Processing "{}" with {} person identities'.format( dir_path, len(pdirs))) pid_container = set() for pdir in pdirs: pid = int(osp.basename(pdir)) pid_container.add(pid) pid2label = {pid: label for label, pid in enumerate(pid_container)} tracklets = [] for pdir in pdirs: pid = int(osp.basename(pdir)) if relabel: pid = pid2label[pid] tdirs = glob.glob(osp.join(pdir, '*')) for tdir in tdirs: raw_img_paths = glob.glob(osp.join(tdir, '*.jpg')) num_imgs = len(raw_img_paths) if num_imgs < self.min_seq_len: continue img_paths = [] for img_idx in range(num_imgs): # some tracklet starts from 0002 instead of 0001 img_idx_name = 'F' + str(img_idx + 1).zfill(4) res = glob.glob( osp.join(tdir, '*' + img_idx_name + '*.jpg')) if len(res) == 0: print( 'Warn: index name {} in {} is missing, jump to next' .format(img_idx_name, tdir)) continue img_paths.append(res[0]) img_name = osp.basename(img_paths[0]) if img_name.find('_') == -1: # old naming format: 0001C6F0099X30823.jpg camid = int(img_name[5]) - 1 else: # new naming format: 0001_C6_F0099_X30823.jpg camid = int(img_name[6]) - 1 img_paths = tuple(img_paths) tracklets.append((img_paths, pid, camid)) print('Saving split to {}'.format(json_path)) split_dict = { 'tracklets': tracklets, } write_json(split_dict, json_path) return tracklets
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(VIPeR, self).__init__() self.dataset_dir = osp.join(root, self.dataset_dir) self.dataset_url = 'http://users.soe.ucsc.edu/~manduchi/VIPeR.v1.0.zip' self.cam_a_path = osp.join(self.dataset_dir, 'VIPeR', 'cam_a') self.cam_b_path = osp.join(self.dataset_dir, 'VIPeR', 'cam_b') self.split_path = osp.join(self.dataset_dir, 'splits.json') self._download_data() self._check_before_run() self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError("split_id exceeds range, received {}, but expected between 0 and {}".format(split_id, len(splits)-1)) split = splits[split_id] train = split['train'] query = split['query'] # query and gallery share the same images gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] num_train_pids = split['num_train_pids'] num_query_pids = split['num_query_pids'] num_gallery_pids = split['num_gallery_pids'] num_train_imgs = len(train) num_query_imgs = len(query) num_gallery_imgs = len(gallery) num_total_pids = num_train_pids + num_query_pids num_total_imgs = num_train_imgs + num_query_imgs if verbose: print("=> VIPeR loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # images") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_imgs)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_imgs)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_imgs)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_imgs)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(GRID, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.dataset_url = 'http://personal.ie.cuhk.edu.hk/~ccloy/files/datasets/underground_reid.zip' self.probe_path = osp.join(self.dataset_dir, 'underground_reid', 'probe') self.gallery_path = osp.join(self.dataset_dir, 'underground_reid', 'gallery') self.split_mat_path = osp.join(self.dataset_dir, 'underground_reid', 'features_and_partitions.mat') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.download_data() required_files = [ self.dataset_dir, self.probe_path, self.gallery_path, self.split_mat_path ] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1)) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] if verbose: self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info( self.train) self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info( self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info( self.gallery)
def __init__(self, root='data', split_id=0, min_seq_len=0, verbose=True, **kwargs): super(PRID450S, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.dataset_url = 'https://files.icg.tugraz.at/f/8c709245bb/?raw=1' self.split_path = osp.join(self.dataset_dir, 'splits.json') self.cam_a_dir = osp.join(self.dataset_dir, 'cam_a') self.cam_b_dir = osp.join(self.dataset_dir, 'cam_b') self.download_data() required_files = [self.dataset_dir, self.cam_a_dir, self.cam_b_dir] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1)) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] if verbose: self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info( self.train) self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info( self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info( self.gallery)
def __init__(self, root='data', split_id=0, cuhk03_labeled=False, cuhk03_classic_split=False, verbose=True, **kwargs): super(CUHK03, self).__init__() self.dataset_dir = osp.join(root, self.dataset_dir) self.data_dir = osp.join(self.dataset_dir, 'cuhk03_release') self.raw_mat_path = osp.join(self.data_dir, 'cuhk-03.mat') self.imgs_detected_dir = osp.join(self.dataset_dir, 'images_detected') self.imgs_labeled_dir = osp.join(self.dataset_dir, 'images_labeled') self.split_classic_det_json_path = osp.join(self.dataset_dir, 'splits_classic_detected.json') self.split_classic_lab_json_path = osp.join(self.dataset_dir, 'splits_classic_labeled.json') self.split_new_det_json_path = osp.join(self.dataset_dir, 'splits_new_detected.json') self.split_new_lab_json_path = osp.join(self.dataset_dir, 'splits_new_labeled.json') self.split_new_det_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat') self.split_new_lab_mat_path = osp.join(self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat') self._check_before_run() self._preprocess() if cuhk03_labeled: image_type = 'labeled' split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path else: image_type = 'detected' split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path splits = read_json(split_path) assert split_id < len(splits), "Condition split_id ({}) < len(splits) ({}) is false".format(split_id, len(splits)) split = splits[split_id] print("Split index = {}".format(split_id)) train = split['train'] query = split['query'] gallery = split['gallery'] if verbose: print("=> CUHK03 ({}) loaded".format(image_type)) self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info(self.train) self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info(self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info(self.gallery)
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(iLIDSVID, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar' self.data_dir = osp.join(self.dataset_dir, 'i-LIDS-VID') self.split_dir = osp.join(self.dataset_dir, 'train-test people splits') self.split_mat_path = osp.join(self.split_dir, 'train_test_splits_ilidsvid.mat') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.cam_1_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam1') self.cam_2_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam2') self._download_data() self._check_before_run() self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( "split_id exceeds range, received {}, but expected between 0 and {}" .format(split_id, len(splits) - 1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] print("# train identites: {}, # test identites {}".format( len(train_dirs), len(test_dirs))) train = self._process_data(train_dirs, cam1=True, cam2=True) query = self._process_data(test_dirs, cam1=True, cam2=False) gallery = self._process_data(test_dirs, cam1=False, cam2=True) if verbose: print("=> iLIDS-VID loaded") self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, _, self.num_train_cams = self.get_videodata_info( self.train) self.num_query_pids, _, self.num_query_cams = self.get_videodata_info( self.query) self.num_gallery_pids, _, self.num_gallery_cams = self.get_videodata_info( self.gallery)
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(VIPeR, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.dataset_url = 'http://users.soe.ucsc.edu/~manduchi/VIPeR.v1.0.zip' self.cam_a_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_a') self.cam_b_dir = osp.join(self.dataset_dir, 'VIPeR', 'cam_b') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.download_data() required_files = [self.dataset_dir, self.cam_a_dir, self.cam_b_dir] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1)) split = splits[split_id] train = split['train'] query = split['query'] # note: query and gallery share the same images gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] if verbose: self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info( self.train) self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info( self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info( self.gallery)
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(iLIDSVID, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar' self.data_dir = osp.join(self.dataset_dir, 'i-LIDS-VID') self.split_dir = osp.join(self.dataset_dir, 'train-test people splits') self.split_mat_path = osp.join(self.split_dir, 'train_test_splits_ilidsvid.mat') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.cam_1_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam1') self.cam_2_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam2') self.download_data() required_files = [self.dataset_dir, self.data_dir, self.split_dir] self.check_before_run(required_files) self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] train = self.process_data(train_dirs, cam1=True, cam2=True) query = self.process_data(test_dirs, cam1=True, cam2=False) gallery = self.process_data(test_dirs, cam1=False, cam2=True) self.init_attributes(train, query, gallery, **kwargs) if verbose: self.print_dataset_statistics(self.train, self.query, self.gallery)
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(PRID, self).__init__(root) self.dataset_dir = osp.join(self.root, self.dataset_dir) self.cam_a_dir = osp.join(self.dataset_dir, 'prid_2011', 'single_shot', 'cam_a') self.cam_b_dir = osp.join(self.dataset_dir, 'prid_2011', 'single_shot', 'cam_b') self.split_path = osp.join(self.dataset_dir, 'splits_single_shot.json') self.check_before_run() self.prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( 'split_id exceeds range, received {}, but expected between 0 and {}' .format(split_id, len(splits) - 1)) split = splits[split_id] train, query, gallery = self.process_split(split) if verbose: print('=> PRID loaded') self.print_dataset_statistics(train, query, gallery) self.train = train self.query = query self.gallery = gallery self.num_train_pids, self.num_train_imgs, self.num_train_cams = self.get_imagedata_info( self.train) self.num_query_pids, self.num_query_imgs, self.num_query_cams = self.get_imagedata_info( self.query) self.num_gallery_pids, self.num_gallery_imgs, self.num_gallery_cams = self.get_imagedata_info( self.gallery)
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(iLIDS, self).__init__() self.dataset_dir = osp.join(root, self.dataset_dir) self.dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar' self.data_dir = osp.join(self.dataset_dir, 'i-LIDS-VID') self.split_dir = osp.join(self.dataset_dir, 'train-test people splits') self.split_mat_path = osp.join(self.split_dir, 'train_test_splits_ilidsvid.mat') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.cam_1_path = osp.join( self.dataset_dir, 'i-LIDS-VID/images/cam1') # differ from video self.cam_2_path = osp.join(self.dataset_dir, 'i-LIDS-VID/images/cam2') self._download_data() self._check_before_run() self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( "split_id exceeds range, received {}, but expected between 0 and {}" .format(split_id, len(splits) - 1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] print("# train identites: {}, # test identites {}".format( len(train_dirs), len(test_dirs))) train, num_train_imgs, num_train_pids = self._process_data(train_dirs, cam1=True, cam2=True) query, num_query_imgs, num_query_pids = self._process_data(test_dirs, cam1=True, cam2=False) gallery, num_gallery_imgs, num_gallery_pids = self._process_data( test_dirs, cam1=False, cam2=True) num_total_pids = num_train_pids + num_query_pids num_total_imgs = num_train_imgs + num_query_imgs if verbose: print("=> iLIDS (single-shot) loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # images") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_imgs)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_imgs)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_imgs)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_imgs)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids
def __init__(self, root='data', split_id=0, min_seq_len=0, verbose=True, **kwargs): self.dataset_dir = osp.join(root, self.dataset_dir) self.split_path = osp.join(self.dataset_dir, 'splits_prid2011.json') self.cam_a_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_a') self.cam_b_path = osp.join(self.dataset_dir, 'prid_2011', 'multi_shot', 'cam_b') self._check_before_run() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( "split_id exceeds range, received {}, but expected between 0 and {}" .format(split_id, len(splits) - 1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] print("# train identites: {}, # test identites {}".format( len(train_dirs), len(test_dirs))) train, num_train_tracklets, num_train_pids, num_imgs_train = \ self._process_data(train_dirs, cam1=True, cam2=True) query, num_query_tracklets, num_query_pids, num_imgs_query = \ self._process_data(test_dirs, cam1=True, cam2=False) gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \ self._process_data(test_dirs, cam1=False, cam2=True) num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery min_num = np.min(num_imgs_per_tracklet) max_num = np.max(num_imgs_per_tracklet) avg_num = np.mean(num_imgs_per_tracklet) num_total_pids = num_train_pids + num_query_pids num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets if verbose: print("=> PRID2011 loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # tracklets") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_tracklets)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_tracklets)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_tracklets)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_tracklets)) print(" number of images per tracklet: {} ~ {}, average {:.1f}". format(min_num, max_num, avg_num)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids
def _process_dir(self, dir_path, json_path, relabel): if osp.exists(json_path): print("=> {} generated before, awesome!".format(json_path)) split = read_json(json_path) return split['tracklets'], split['num_tracklets'], split['num_pids'], split['num_imgs_per_tracklet'] print("=> Automatically generating split (might take a while for the first time, have a coffe)") pdirs = glob.glob(osp.join(dir_path, '*')) # avoid .DS_Store print("Processing {} with {} person identities".format(dir_path, len(pdirs))) pid_container = set() for pdir in pdirs: pid = int(osp.basename(pdir)) pid_container.add(pid) pid2label = {pid:label for label, pid in enumerate(pid_container)} tracklets = [] num_imgs_per_tracklet = [] for pdir in pdirs: pid = int(osp.basename(pdir)) if relabel: pid = pid2label[pid] tdirs = glob.glob(osp.join(pdir, '*')) for tdir in tdirs: raw_img_paths = glob.glob(osp.join(tdir, '*.jpg')) num_imgs = len(raw_img_paths) if num_imgs < self.min_seq_len: continue num_imgs_per_tracklet.append(num_imgs) img_paths = [] for img_idx in range(num_imgs): # some tracklet starts from 0002 instead of 0001 img_idx_name = 'F' + str(img_idx+1).zfill(4) res = glob.glob(osp.join(tdir, '*' + img_idx_name + '*.jpg')) if len(res) == 0: print("Warn: index name {} in {} is missing, jump to next".format(img_idx_name, tdir)) continue img_paths.append(res[0]) img_name = osp.basename(img_paths[0]) if img_name.find('_') == -1: # old naming format: 0001C6F0099X30823.jpg camid = int(img_name[5]) - 1 else: # new naming format: 0001_C6_F0099_X30823.jpg camid = int(img_name[6]) - 1 img_paths = tuple(img_paths) tracklets.append((img_paths, pid, camid)) num_pids = len(pid_container) num_tracklets = len(tracklets) print("Saving split to {}".format(json_path)) split_dict = { 'tracklets': tracklets, 'num_tracklets': num_tracklets, 'num_pids': num_pids, 'num_imgs_per_tracklet': num_imgs_per_tracklet, } write_json(split_dict, json_path) return tracklets, num_tracklets, num_pids, num_imgs_per_tracklet
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): self.dataset_dir = osp.join(root, self.dataset_dir) self.dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar' self.data_dir = osp.join(self.dataset_dir, 'i-LIDS-VID') self.split_dir = osp.join(self.dataset_dir, 'train-test people splits') self.split_mat_path = osp.join(self.split_dir, 'train_test_splits_ilidsvid.mat') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.cam_1_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam1') self.cam_2_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam2') self._download_data() self._check_before_run() self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( "split_id exceeds range, received {}, but expected between 0 and {}" .format(split_id, len(splits) - 1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] print("# train identites: {}, # test identites {}".format( len(train_dirs), len(test_dirs))) train, num_train_tracklets, num_train_pids, num_imgs_train = \ self._process_data(train_dirs, cam1=True, cam2=True) query, num_query_tracklets, num_query_pids, num_imgs_query = \ self._process_data(test_dirs, cam1=True, cam2=False) gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \ self._process_data(test_dirs, cam1=False, cam2=True) num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery min_num = np.min(num_imgs_per_tracklet) max_num = np.max(num_imgs_per_tracklet) avg_num = np.mean(num_imgs_per_tracklet) num_total_pids = num_train_pids + num_query_pids num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets if verbose: print("=> iLIDS-VID loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # tracklets") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_tracklets)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_tracklets)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_tracklets)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_tracklets)) print(" number of images per tracklet: {} ~ {}, average {:.1f}". format(min_num, max_num, avg_num)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): self.dataset_dir = osp.join(root, self.dataset_dir) self.dataset_url = 'http://www.eecs.qmul.ac.uk/~xiatian/iLIDS-VID/iLIDS-VID.tar' self.data_dir = osp.join(self.dataset_dir, 'i-LIDS-VID') self.split_dir = osp.join(self.dataset_dir, 'train-test people splits') self.split_mat_path = osp.join(self.split_dir, 'train_test_splits_ilidsvid.mat') self.split_path = osp.join(self.dataset_dir, 'splits.json') self.cam_1_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam1') self.cam_2_path = osp.join(self.dataset_dir, 'i-LIDS-VID/sequences/cam2') self.pose_file = osp.join(self.dataset_dir, 'pose.json') self._download_data() self._check_before_run() with open(self.pose_file, 'r') as f: self.poses = json.load(f) # process the pose information self.process_poses = dict() for key in self.poses: # save only one body maxidx = -1 maxarea = -1 maxscore = -1 assert len(self.poses[key]['bodies']) >= 1, 'pose of {} is empty'.format(key) if len(self.poses[key]['bodies']) == 1: self.process_poses[key] = np.array(self.poses[key]['bodies'][0]['joints']).reshape((-1, 3)) else: for idx, body in enumerate(self.poses[key]['bodies']): tmp_kps = np.array(body['joints']).reshape((-1, 3)) tmp_area = (max(tmp_kps[:, 0]) - min(tmp_kps[:, 0])) * (max(tmp_kps[:, 1]) - min(tmp_kps[:, 1])) tmp_score = body['score'] if tmp_score > maxscore: if tmp_area > maxarea and tmp_score > 1.1 * maxscore: maxscore = tmp_score maxidx = idx self.process_poses[key] = np.array(self.poses[key]['bodies'][maxidx]['joints']).reshape((-1, 3)) self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError("split_id exceeds range, received {}, but expected between 0 and {}".format(split_id, len(splits)-1)) split = splits[split_id] train_dirs, test_dirs = split['train'], split['test'] print("# train identites: {}, # test identites {}".format(len(train_dirs), len(test_dirs))) train, num_train_tracklets, num_train_pids, num_imgs_train = \ self._process_data(train_dirs, cam1=True, cam2=True) query, num_query_tracklets, num_query_pids, num_imgs_query = \ self._process_data(test_dirs, cam1=True, cam2=False) gallery, num_gallery_tracklets, num_gallery_pids, num_imgs_gallery = \ self._process_data(test_dirs, cam1=False, cam2=True) num_imgs_per_tracklet = num_imgs_train + num_imgs_query + num_imgs_gallery min_num = np.min(num_imgs_per_tracklet) max_num = np.max(num_imgs_per_tracklet) avg_num = np.mean(num_imgs_per_tracklet) num_total_pids = num_train_pids + num_query_pids num_total_tracklets = num_train_tracklets + num_query_tracklets + num_gallery_tracklets if verbose: print("=> iLIDS-VID loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # tracklets") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_tracklets)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_tracklets)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_tracklets)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_tracklets)) print(" number of images per tracklet: {} ~ {}, average {:.1f}".format(min_num, max_num, avg_num)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(GRID, self).__init__() self.dataset_dir = osp.join(root, self.dataset_dir) self.dataset_url = 'http://personal.ie.cuhk.edu.hk/~ccloy/files/datasets/underground_reid.zip' self.probe_path = osp.join(self.dataset_dir, 'underground_reid', 'probe') self.gallery_path = osp.join(self.dataset_dir, 'underground_reid', 'gallery') self.split_mat_path = osp.join(self.dataset_dir, 'underground_reid', 'features_and_partitions.mat') self.split_path = osp.join(self.dataset_dir, 'splits.json') self._download_data() self._check_before_run() self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( "split_id exceeds range, received {}, but expected between 0 and {}" .format(split_id, len(splits) - 1)) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] num_train_pids = split['num_train_pids'] num_query_pids = split['num_query_pids'] num_gallery_pids = split['num_gallery_pids'] num_train_imgs = len(train) num_query_imgs = len(query) num_gallery_imgs = len(gallery) num_total_pids = num_train_pids + num_gallery_pids num_total_imgs = num_train_imgs + num_query_imgs + num_gallery_imgs if verbose: print("=> GRID loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # images") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_imgs)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_imgs)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_imgs)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_imgs)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids
def __init__(self, root='data', split_id=0, verbose=True, **kwargs): super(CUHK01, self).__init__() self.dataset_dir = osp.join(root, self.dataset_dir) self.zip_path = osp.join(self.dataset_dir, 'CUHK01.zip') self.campus_dir = osp.join(self.dataset_dir, 'campus') self.split_path = osp.join(self.dataset_dir, 'splits.json') self._extract_file() self._check_before_run() self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( "split_id exceeds range, received {}, but expected between 0 and {}" .format(split_id, len(splits) - 1)) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] num_train_pids = split['num_train_pids'] num_query_pids = split['num_query_pids'] num_gallery_pids = split['num_gallery_pids'] num_train_imgs = len(train) num_query_imgs = len(query) num_gallery_imgs = len(gallery) num_total_pids = num_train_pids + num_query_pids num_total_imgs = num_train_imgs + num_query_imgs if verbose: print("=> CUHK01 loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # images") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_imgs)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_imgs)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_imgs)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_imgs)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids
def __init__(self, root='data', split_id=0, min_seq_len=0, verbose=True, **kwargs): super(PRID450S, self).__init__() self.dataset_dir = osp.join(root, self.dataset_dir) self.dataset_url = 'https://files.icg.tugraz.at/f/8c709245bb/?raw=1' self.split_path = osp.join(self.dataset_dir, 'splits.json') self.cam_a_path = osp.join(self.dataset_dir, 'cam_a') self.cam_b_path = osp.join(self.dataset_dir, 'cam_b') self._download_data() self._check_before_run() self._prepare_split() splits = read_json(self.split_path) if split_id >= len(splits): raise ValueError( "split_id exceeds range, received {}, but expected between 0 and {}" .format(split_id, len(splits) - 1)) split = splits[split_id] train = split['train'] query = split['query'] gallery = split['gallery'] train = [tuple(item) for item in train] query = [tuple(item) for item in query] gallery = [tuple(item) for item in gallery] num_train_pids = split['num_train_pids'] num_query_pids = split['num_query_pids'] num_gallery_pids = split['num_gallery_pids'] num_train_imgs = len(train) num_query_imgs = len(query) num_gallery_imgs = len(gallery) num_total_pids = num_train_pids + num_query_pids num_total_imgs = num_train_imgs + num_query_imgs if verbose: print("=> PRID450S loaded") print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # images") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_imgs)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_imgs)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_imgs)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_imgs)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids
def __init__(self, root='/data_1/zhaojianan/Project/Video_ReID/data/image_ReID', split_id=0, cuhk03_labeled=False, cuhk03_classic_split=False, verbose=True, **kwargs): super(CUHK03, self).__init__() self.dataset_dir = osp.join(root, self.dataset_dir) self.data_dir = osp.join(self.dataset_dir, 'cuhk03_release') self.raw_mat_path = osp.join(self.data_dir, 'cuhk-03.mat') self.imgs_detected_dir = osp.join(self.dataset_dir, 'images_detected') self.imgs_labeled_dir = osp.join(self.dataset_dir, 'images_labeled') self.split_classic_det_json_path = osp.join( self.dataset_dir, 'splits_classic_detected.json') self.split_classic_lab_json_path = osp.join( self.dataset_dir, 'splits_classic_labeled.json') self.split_new_det_json_path = osp.join(self.dataset_dir, 'splits_new_detected.json') self.split_new_lab_json_path = osp.join(self.dataset_dir, 'splits_new_labeled.json') self.split_new_det_mat_path = osp.join( self.dataset_dir, 'cuhk03_new_protocol_config_detected.mat') self.split_new_lab_mat_path = osp.join( self.dataset_dir, 'cuhk03_new_protocol_config_labeled.mat') self._check_before_run() self._preprocess() if cuhk03_labeled: image_type = 'labeled' split_path = self.split_classic_lab_json_path if cuhk03_classic_split else self.split_new_lab_json_path else: image_type = 'detected' split_path = self.split_classic_det_json_path if cuhk03_classic_split else self.split_new_det_json_path splits = read_json(split_path) assert split_id < len( splits ), "Condition split_id ({}) < len(splits) ({}) is false".format( split_id, len(splits)) split = splits[split_id] print("Split index = {}".format(split_id)) train = split['train'] query = split['query'] gallery = split['gallery'] num_train_pids = split['num_train_pids'] num_query_pids = split['num_query_pids'] num_gallery_pids = split['num_gallery_pids'] num_total_pids = num_train_pids + num_query_pids num_train_imgs = split['num_train_imgs'] num_query_imgs = split['num_query_imgs'] num_gallery_imgs = split['num_gallery_imgs'] num_total_imgs = num_train_imgs + num_query_imgs if verbose: print("=> CUHK03 ({}) loaded".format(image_type)) print("Dataset statistics:") print(" ------------------------------") print(" subset | # ids | # images") print(" ------------------------------") print(" train | {:5d} | {:8d}".format(num_train_pids, num_train_imgs)) print(" query | {:5d} | {:8d}".format(num_query_pids, num_query_imgs)) print(" gallery | {:5d} | {:8d}".format(num_gallery_pids, num_gallery_imgs)) print(" ------------------------------") print(" total | {:5d} | {:8d}".format(num_total_pids, num_total_imgs)) print(" ------------------------------") self.train = train self.query = query self.gallery = gallery self.num_train_pids = num_train_pids self.num_query_pids = num_query_pids self.num_gallery_pids = num_gallery_pids