def main():
    config.init()

    for crop_size, crop_size_stamp in zip(config.ALL_CROP_SIZE,
                                          config.ALL_CROP_SIZE_STAMP):
        crop = crop_size['crop']
        size = crop_size['size']

        dataset_path = common.dataset_path(config.DATASET, crop, size)
        out_validset_path = common.dataset_path(config.DATASET + '_sg_valid',
                                                crop, size)
        out_trainset_path = common.dataset_path(config.DATASET + '_sg_train',
                                                crop, size)

        print("")
        print("")
        print("Seed + Google train/valid set.")
        print("Original dataset: " + dataset_path)
        print("Out s+g trainset: " + out_trainset_path)
        print("Out s+g validset: " + out_validset_path)
        print("")

        trainingset = ImageDataset()
        print("loading hdf5 dataset set: {}".format(dataset_path))
        trainingset.load_hdf5(dataset_path)
        print("hdf5 file loaded.")

        print("Getting sub dataset (seed dataset)")
        seeds_dataset = trainingset.sub_dataset_from_filename(
            filename_start_with="seed")
        print("Getting sub dataset (google dataset)")
        google_dataset = trainingset.sub_dataset_from_filename_multi(
            filename_start_with=["google"], filename_end_with=FNAME_END)
        print("Splitting google dataset in train/valid")
        google_train, google_valid = google_dataset.validation_per_class_split(
            [SplitOptions("", 0.33)])

        print("Creating double_seeds_dataset")
        double_seeds_dataset = ImageDataset.merge_datasets(
            seeds_dataset, seeds_dataset)
        print(
            "Creating train dataset (merge google_train with double_seeds_dataset)"
        )
        train = ImageDataset.merge_datasets(google_train, double_seeds_dataset)
        print("Creating valid dataset (merge google_valid with seeds_dataset)")
        valid = ImageDataset.merge_datasets(google_valid, seeds_dataset)

        print("Saving train on h5")
        train.save_hdf5(out_trainset_path)
        print("Saving valid on h5")
        valid.save_hdf5(out_validset_path)
        print("Done.")

    print("")
    print("All done.")
Exemplo n.º 2
0
def exp_outlier_merge(sub_dataset=['_train', '_valid', '_test'],
                      outlier_dataset_name='outliers'):

    datasets = [cfg.dataset + sd for sd in sub_dataset]

    for feat_net in cfg.nets:

        outliers_dataset = common.feat_dataset(outlier_dataset_name, feat_net)

        for dataset in datasets:
            feat_dataset = common.feat_dataset(dataset, feat_net)
            out_dataset_path = common.feat_path(dataset + "_outl", feat_net)
            print("")
            print("Features net:     {}".format(feat_net))
            print("Input dataset:    {}".format(
                common.feat_fname(dataset, feat_net)))
            print("Merging with:     {}".format(
                common.feat_fname(outlier_dataset_name, feat_net)))
            print("Out feat dataset: {}".format(
                common.feat_fname(dataset + "_outl", feat_net)))

            out = ImageDataset.merge_datasets(feat_dataset,
                                              outliers_dataset,
                                              label_mode='new')
            out.save_hdf5(out_dataset_path)
            print("Done")
        print("")

    print("")
    print("All done.")
def merge_features_train_valid(feat_net='resnet50'):
    dataset_name = cfg.dataset
    trainset_name = dataset_name + '_train'
    validset_name = dataset_name + '_valid'

    print("Merging training and validation data-features (feature net: " + feat_net + ")")

    train_feat_set = common.feat_dataset(trainset_name, feat_net)
    valid_feat_set = common.feat_dataset(validset_name, feat_net)

    merged_feat_set = ImageDataset.merge_datasets(train_feat_set, valid_feat_set)

    merged_dataset_path = common.feat_path(dataset_name, feat_net)
    merged_feat_set.save_hdf5(merged_dataset_path)