def add_bbox(mode, overwrite): with get_file(mode, 'a') as src: if 'bounding_boxes' in src: if overwrite: logger.info('Deleting existing bounding_boxes') del src['bounding_boxes'] bbs = None else: # raise RuntimeError( # '"bounding_boxes" group already exists. ' # 'Use --overwrite if sure.') logger.info('Found existing bounding_boxes') bbs = src['bounding_boxes'] else: bbs = None logger.info('Loading filenames...') filenames = np.array(src['filenames']) assert (all(fn.endswith('.JPEG') for fn in filenames)) filenames = [fn[:-5] for fn in filenames] logger.info('Indexing filenames...') filename_indices = {k: i for i, k in enumerate(filenames)} if bbs is None: logger.info('Creating bounding_boxes dataset') vlen_dtype = h5py.special_dtype(vlen=np.dtype(np.int32)) bbs = src.create_dataset('bounding_boxes', shape=(len(filenames), ), dtype=vlen_dtype) empty_filenames = None else: logger.info('Finding empty bounding box filenames...') empty_filenames = set(fn for fn, bb in zip(filenames, np.array(bbs)) if bb.shape[0] == 0) with get_tarred_xml_file(mode) as tar: logger.info('Getting members...') members = tar.getmembers() if empty_filenames is not None: members = [ m for m in members if m.name.endswith('.xml') and m.name.split('/')[-1][:-4] in empty_filenames ] logger.info('Parsing remaining xml files...') bar = IncrementalBar(max=len(members)) for member in members: filename = member.name.split('/')[-1][:-4] i = filename_indices[filename] fp = tar.extractfile(member) bb = parse_xml_bbox(fp).flatten() bbs[i] = bb bar.next() bar.finish()
def convert_train(delete_tar=False, overwrite=False): from imagenet.tarred import get_wordnet_ids mode = 'train' check_overwrite(mode, overwrite) wordnet_ids = get_wordnet_ids() print('Counting examples...') n_examples = get_train_length(wordnet_ids) print('n examples: %d' % n_examples) with get_file(mode, 'w') as fp: records = get_train_records( wordnet_ids, delete_tar=delete_tar) write_examples( fp, n_examples, include_targets=True, records=records, shuffle=True)
def convert_other(mode, delete_tar, overwrite=False): mode = get_mode(mode) check_overwrite(mode, overwrite) n_examples = get_tar_length(get_tar_path(mode)) with get_file(mode, 'w') as fp: if mode == 'val': write_examples( fp, n_examples, include_targets=True, records=get_val_records(delete_tar), shuffle=False) elif mode == 'test': write_examples( fp, n_examples, include_targets=False, records=get_test_records(delete_tar), shuffle=False) else: raise ValueError('Invalid mode: "%s"' % mode)
def main(_): mode = FLAGS.mode with get_file(get_mode(mode), 'r') as fp: images = fp['encoded_images'] filenames = fp['filenames'] targets = fp['targets'] if 'bounding_boxes' in fp: bbs = fp['bounding_boxes'] else: bbs = None n = len(images) print('number of examples: %d' % n) indices = list(range(n)) print('shuffling...') random.shuffle(indices) print('Done!') for i in indices: vis(images[i], filenames[i], targets[i], None if bbs is None else bbs[i])
def compute_bbox(mode, overwrite): from multiprocessing import Pool with get_file(mode, 'r') as src: filenames = list(src['filenames']) n = len(filenames) with get_tarred_xml_file(mode) as tar: bar = IncrementalBar(max=n) result = {} def f(filename): subpath = get_xml_subpath(filename) member = tar.getmember(subpath) fp = tar.extractfile(member) bb = parse_xml_bbox(fp).flatten() return filename, bb pool = Pool(processes=4) for filename, bb in pool.imap_unordered(f, filenames): result[filename] = bb bar.next() bar.finish() return result