def main(): config.init() for crop_size, crop_size_stamp in zip(config.ALL_CROP_SIZE, config.ALL_CROP_SIZE_STAMP): crop = crop_size['crop'] size = crop_size['size'] dataset_path = common.dataset_path(config.DATASET, crop, size) out_validset_path = common.dataset_path(config.DATASET + '_sg_valid', crop, size) out_trainset_path = common.dataset_path(config.DATASET + '_sg_train', crop, size) print("") print("") print("Seed + Google train/valid set.") print("Original dataset: " + dataset_path) print("Out s+g trainset: " + out_trainset_path) print("Out s+g validset: " + out_validset_path) print("") trainingset = ImageDataset() print("loading hdf5 dataset set: {}".format(dataset_path)) trainingset.load_hdf5(dataset_path) print("hdf5 file loaded.") print("Getting sub dataset (seed dataset)") seeds_dataset = trainingset.sub_dataset_from_filename( filename_start_with="seed") print("Getting sub dataset (google dataset)") google_dataset = trainingset.sub_dataset_from_filename_multi( filename_start_with=["google"], filename_end_with=FNAME_END) print("Splitting google dataset in train/valid") google_train, google_valid = google_dataset.validation_per_class_split( [SplitOptions("", 0.33)]) print("Creating double_seeds_dataset") double_seeds_dataset = ImageDataset.merge_datasets( seeds_dataset, seeds_dataset) print( "Creating train dataset (merge google_train with double_seeds_dataset)" ) train = ImageDataset.merge_datasets(google_train, double_seeds_dataset) print("Creating valid dataset (merge google_valid with seeds_dataset)") valid = ImageDataset.merge_datasets(google_valid, seeds_dataset) print("Saving train on h5") train.save_hdf5(out_trainset_path) print("Saving valid on h5") valid.save_hdf5(out_validset_path) print("Done.") print("") print("All done.")
def exp_outlier_merge(sub_dataset=['_train', '_valid', '_test'], outlier_dataset_name='outliers'): datasets = [cfg.dataset + sd for sd in sub_dataset] for feat_net in cfg.nets: outliers_dataset = common.feat_dataset(outlier_dataset_name, feat_net) for dataset in datasets: feat_dataset = common.feat_dataset(dataset, feat_net) out_dataset_path = common.feat_path(dataset + "_outl", feat_net) print("") print("Features net: {}".format(feat_net)) print("Input dataset: {}".format( common.feat_fname(dataset, feat_net))) print("Merging with: {}".format( common.feat_fname(outlier_dataset_name, feat_net))) print("Out feat dataset: {}".format( common.feat_fname(dataset + "_outl", feat_net))) out = ImageDataset.merge_datasets(feat_dataset, outliers_dataset, label_mode='new') out.save_hdf5(out_dataset_path) print("Done") print("") print("") print("All done.")
def merge_features_train_valid(feat_net='resnet50'): dataset_name = cfg.dataset trainset_name = dataset_name + '_train' validset_name = dataset_name + '_valid' print("Merging training and validation data-features (feature net: " + feat_net + ")") train_feat_set = common.feat_dataset(trainset_name, feat_net) valid_feat_set = common.feat_dataset(validset_name, feat_net) merged_feat_set = ImageDataset.merge_datasets(train_feat_set, valid_feat_set) merged_dataset_path = common.feat_path(dataset_name, feat_net) merged_feat_set.save_hdf5(merged_dataset_path)