def main(dataset: str, split: int):
    [train, dev, _, _] = php.get_meta_paths(dataset, split)
    env.LOGGER.info(f'Splitting {train} into train and dev...')
    meta_train, meta_dev = _split_train_dev(train)
    meta_train.to_json(ct.WORK_ROOT / train, orient='index')
    meta_dev.to_json(ct.WORK_ROOT / dev, orient='index')
    env.LOGGER.info('...Done')
示例#2
0
def main(dataset: str, split: int):
    global DATA_ROOT_DIR
    DATA_ROOT_DIR = php.get_data_root_path(dataset)
    [train, dev, _, test] = php.get_meta_paths(dataset, split)

    for path in [train, dev, test]:
        env.LOGGER.info(f'Extracting jpeg images from {path.as_posix()}...')
        meta = ghp.read_meta(path)
        for _ in ghp.parallel.execute(_extract_jpeg, list(meta.iterrows()), 1):
            continue
        env.LOGGER.info('...Done')
def main(dataset: str, split: int):
    global DATA_ROOT_DIR
    DATA_ROOT_DIR = php.get_data_root_path(dataset)
    [train, dev, _, test] = php.get_meta_paths(dataset, split)

    for path in [train, dev, test]:
        env.LOGGER.info(f'Augmenting metadata at {path.as_posix()}...')
        meta = ghp.read_meta(path)
        add_columns(meta)
        for index, row in ghp.parallel.execute(_augment_meta,
                                               list(meta.iterrows()), 1):
            meta.loc[index] = row
        meta.to_json(ct.WORK_ROOT / path, orient='index')
        env.LOGGER.info('...Done')
示例#4
0
def main(dataset: str, split: int):
    _, _, merged_meta_path, _ = php.get_meta_paths(dataset, split)
    stats_path = php.get_stats_path(dataset, split)

    env.LOGGER.info(f'Gathering size statistics for the {dataset} dataset...')
    meta = ghp.read_meta(merged_meta_path)
    stats = {
        'min_length': [meta['length'].min()],
        'mean_length': [meta['length'].mean()],
        'max_length': [meta['length'].max()],
        'min_height': [meta['height'].min()],
        'mean_height': [meta['height'].mean()],
        'max_height': [meta['height'].max()],
        'min_width': [meta['width'].min()],
        'mean_width': [meta['width'].mean()],
        'max_width': [meta['width'].max()],
    }
    _store_stats(stats, stats_path)
    env.LOGGER.info('...Done.')
def main(dataset: str, split: int):
    env.LOGGER.info(f'Merging {dataset} train and dev DataFrames into one...')
    [train, dev, merged, _] = php.get_meta_paths(dataset, split)
    merge_meta(train, dev, merged)
    env.LOGGER.info('...Done.')