Python build_files_dataframe示例

编程语言: Python

命名空间/包名称: project_core.utils

方法/功能: build_files_dataframe

hotexamples.com的示例: 3

Python build_files_dataframe - 已找到3个示例。这些是从开源项目中提取的最受好评的project_core.utils.build_files_dataframe现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： main.py 项目： david-riser/33k-images

def load_data(base_dir, min_samples, images_per_class=None):

    train = build_files_dataframe(os.path.join(args.base_dir, 'train'))
    print(train.head())
    train = prune_file_list(train,
                            label_col='label',
                            min_samples=args.min_samples)
    train = train.sample(frac=1).reset_index(drop=True)

    # Filter out the classes that we do not wait
    # in our dev set.
    classes = np.unique(train['label'])

    # We either get None or some
    # integer which specifies the
    # target number of samples.
    if images_per_class:

        counts = {}
        for c in classes:
            counts[c] = len(train[train['label'] == c])

        if images_per_class > max(counts.values()):
            print((
                "[FATAL] The number of images per class requested is larger than "
                " the number of samples in the majority class.  This is a fatal error!"
            ))
            exit()

        train_dataframes = []
        for c in classes:
            train_dataframes.append(
                resample(train, replace=True, n_samples=images_per_class))

        # Replace the training dataframe by the resampled stuffs
        # here.
        train = pd.concat(train_dataframes)

    # Load dev set and return it
    dev = build_files_dataframe(os.path.join(args.base_dir, 'dev'))
    print(dev.head())
    dev = dev.sample(frac=1).reset_index(drop=True)
    return_cols = list(dev.columns)
    dev['keep'] = dev['label'].apply(lambda x: x in classes)
    dev = dev[dev['keep'] == True]

    # Load test set and return it
    test = build_files_dataframe(os.path.join(args.base_dir, 'test'))
    print(test.head())
    test = test.sample(frac=1).reset_index(drop=True)
    return_cols = list(test.columns)
    test['keep'] = test['label'].apply(lambda x: x in classes)
    test = test[test['keep'] == True]

    return train, dev[return_cols], test[return_cols]

示例#2

显示文件

文件： agglom_pca.py 项目： david-riser/33k-images

def load_dataframes(data_dir, min_samples):

    train = build_files_dataframe(os.path.join(data_dir, 'train'))
    train = prune_file_list(train, 'label', min_samples)

    dev = build_files_dataframe(os.path.join(data_dir, 'dev'))
    dev_cols = list(dev.columns)
    classes = np.unique(train['label'])
    dev['keep'] = dev['label'].apply(lambda x: x in classes)
    dev = dev[dev['keep'] == True]

    train = train.sample(frac=1).reset_index(drop=True)
    dev = dev.sample(frac=1).reset_index(drop=True)
    return train, dev

示例#3

显示文件

    ap.add_argument('--backbone', required=True, type=str)
    ap.add_argument('--pooling', required=True, type=str)
    ap.add_argument('--output_dir', required=True, type=str)
    ap.add_argument('--min_samples', required=True, type=int)
    ap.add_argument('--cores', required=True, type=int)
    ap.add_argument('--save_features', action='store_true')
    return ap.parse_args()


if __name__ == "__main__":

    args = get_args()

    # Load images and remove the classes with
    # too few examples.
    train = build_files_dataframe(os.path.join(args.base_dir, 'train'))
    train = prune_file_list(data=train,
                            label_col='label',
                            min_samples=args.min_samples)
    n_classes = train['label'].nunique()
    print("We have {} classes.".format(n_classes))

    # Setup output
    create_directory(args.output_dir, recursive=True)

    # Build the model and import the correct pre-processing
    # function.  Each model uses a different function.
    # Maybe they're the same under the hood because
    # they are all trained with imagenet (something to look
    # into).
    model, preprocess_input = model_factory(args.backbone, args.pooling)