Пример #1
0
def add_bbox(mode, overwrite):
    with get_file(mode, 'a') as src:
        if 'bounding_boxes' in src:
            if overwrite:
                logger.info('Deleting existing bounding_boxes')
                del src['bounding_boxes']
                bbs = None
            else:
                # raise RuntimeError(
                #     '"bounding_boxes" group already exists. '
                #     'Use --overwrite if sure.')
                logger.info('Found existing bounding_boxes')
                bbs = src['bounding_boxes']
        else:
            bbs = None
        logger.info('Loading filenames...')
        filenames = np.array(src['filenames'])
        assert (all(fn.endswith('.JPEG') for fn in filenames))
        filenames = [fn[:-5] for fn in filenames]
        logger.info('Indexing filenames...')
        filename_indices = {k: i for i, k in enumerate(filenames)}
        if bbs is None:
            logger.info('Creating bounding_boxes dataset')
            vlen_dtype = h5py.special_dtype(vlen=np.dtype(np.int32))
            bbs = src.create_dataset('bounding_boxes',
                                     shape=(len(filenames), ),
                                     dtype=vlen_dtype)
            empty_filenames = None
        else:
            logger.info('Finding empty bounding box filenames...')
            empty_filenames = set(fn
                                  for fn, bb in zip(filenames, np.array(bbs))
                                  if bb.shape[0] == 0)

        with get_tarred_xml_file(mode) as tar:
            logger.info('Getting members...')
            members = tar.getmembers()

            if empty_filenames is not None:
                members = [
                    m for m in members if m.name.endswith('.xml')
                    and m.name.split('/')[-1][:-4] in empty_filenames
                ]
            logger.info('Parsing remaining xml files...')
            bar = IncrementalBar(max=len(members))
            for member in members:
                filename = member.name.split('/')[-1][:-4]
                i = filename_indices[filename]
                fp = tar.extractfile(member)
                bb = parse_xml_bbox(fp).flatten()
                bbs[i] = bb
                bar.next()
            bar.finish()
Пример #2
0
def convert_train(delete_tar=False, overwrite=False):
    from imagenet.tarred import get_wordnet_ids
    mode = 'train'
    check_overwrite(mode, overwrite)
    wordnet_ids = get_wordnet_ids()

    print('Counting examples...')
    n_examples = get_train_length(wordnet_ids)
    print('n examples: %d' % n_examples)

    with get_file(mode, 'w') as fp:
        records = get_train_records(
            wordnet_ids, delete_tar=delete_tar)
        write_examples(
            fp, n_examples, include_targets=True, records=records,
            shuffle=True)
Пример #3
0
def convert_other(mode, delete_tar, overwrite=False):
    mode = get_mode(mode)
    check_overwrite(mode, overwrite)
    n_examples = get_tar_length(get_tar_path(mode))

    with get_file(mode, 'w') as fp:
        if mode == 'val':
            write_examples(
                fp, n_examples, include_targets=True,
                records=get_val_records(delete_tar), shuffle=False)
        elif mode == 'test':
            write_examples(
                fp, n_examples, include_targets=False,
                records=get_test_records(delete_tar), shuffle=False)
        else:
            raise ValueError('Invalid mode: "%s"' % mode)
Пример #4
0
def main(_):
    mode = FLAGS.mode
    with get_file(get_mode(mode), 'r') as fp:
        images = fp['encoded_images']
        filenames = fp['filenames']
        targets = fp['targets']
        if 'bounding_boxes' in fp:
            bbs = fp['bounding_boxes']
        else:
            bbs = None
        n = len(images)
        print('number of examples: %d' % n)
        indices = list(range(n))
        print('shuffling...')
        random.shuffle(indices)
        print('Done!')
        for i in indices:
            vis(images[i], filenames[i], targets[i],
                None if bbs is None else bbs[i])
Пример #5
0
def compute_bbox(mode, overwrite):
    from multiprocessing import Pool
    with get_file(mode, 'r') as src:
        filenames = list(src['filenames'])
        n = len(filenames)

    with get_tarred_xml_file(mode) as tar:
        bar = IncrementalBar(max=n)

        result = {}

        def f(filename):
            subpath = get_xml_subpath(filename)
            member = tar.getmember(subpath)
            fp = tar.extractfile(member)
            bb = parse_xml_bbox(fp).flatten()
            return filename, bb

        pool = Pool(processes=4)
        for filename, bb in pool.imap_unordered(f, filenames):
            result[filename] = bb
            bar.next()
        bar.finish()
    return result