def __init__(self, teacher, dataset_path, store_path, dataset_config, best_trade_off): self.dataset_path = dataset_path self.store_path = store_path self.teacher = teacher self.dataset_config = dataset_config self.rotate = dataset_config.use_rotation self.trade_off = best_trade_off if os.path.exists(self.store_path): raise Exception("Store path already exists") else: os.makedirs(self.store_path) os.makedirs(os.path.join(self.store_path, "train")) os.makedirs(os.path.join(self.store_path, "valid")) os.makedirs(os.path.join(self.store_path, "test")) self.evaluate = util.create_simple_predictor(teacher['model'], teacher['params']) self.creator = Creator( self.dataset_path, dim=(self.dataset_config.input_dim, self.dataset_config.output_dim), preproccessing=self.dataset_config.use_preprocessing, std=self.dataset_config.dataset_std, reduce_training=self.dataset_config.reduce_training, reduce_testing=self.dataset_config.reduce_testing, reduce_validation=self.dataset_config.reduce_validation, only_mixed=self.dataset_config.only_mixed_labels, mix_ratio=self.dataset_config.mix_ratio) self.creator.load_dataset()
def load(self, dataset_path, params, batch_size=1): print_section('Creating aerial image dataset') self.std = params.dataset_std chunks = params.chunk_size #TODO: ensure that the dataset is as expected. creator = Creator(dataset_path, dim=(params.input_dim, params.output_dim), rotation=params.use_rotation, preproccessing=params.use_preprocessing, std=self.std, only_mixed=params.only_mixed_labels, reduce_testing=params.reduce_testing, reduce_training=params.reduce_training, reduce_validation=params.reduce_validation) train, valid, test = creator.dynamically_create( params.samples_per_image, enable_label_noise=params.use_label_noise, label_noise=params.label_noise, only_mixed=params.only_mixed_labels) #Testing dataset size requirements AerialDataset.dataset_check('train', train, batch_size) AerialDataset.dataset_check('valid', valid, batch_size) AerialDataset.dataset_check('test', test, batch_size) AerialDataset.dataset_shared_stats(train[0].shape, train[1].shape, chunks) self.set_nr_examples(train, valid, test) nr_of_chunks = AerialDataset.dataset_sizes(train, valid, test, chunks) training_chunks = self._chunkify(train, nr_of_chunks, batch_size) AerialDataset.dataset_chunk_stats(len(training_chunks), len(training_chunks[0][0]), len(training_chunks[-1][0])) self.active = self.shared_dataset(training_chunks[0], cast_to_int=False) self.set['train'] = self.active[0], T.cast(self.active[1], 'int32') self.set['validation'] = self.shared_dataset(valid, cast_to_int=True) self.set['test'] = self.shared_dataset(test, cast_to_int=True) #Not stored on the GPU, unlike the shared variables defined above. self.all_training = training_chunks return True
def __init__(self, teacher, dataset_path, store_path, dataset_config, best_trade_off): self.dataset_path = dataset_path self.store_path = store_path self.teacher = teacher self.dataset_config = dataset_config self.rotate = dataset_config.use_rotation self.trade_off = best_trade_off if os.path.exists(self.store_path): raise Exception("Store path already exists") else: os.makedirs(self.store_path) os.makedirs(os.path.join(self.store_path, "train")) os.makedirs(os.path.join(self.store_path, "valid")) os.makedirs(os.path.join(self.store_path, "test")) self.evaluate = util.create_simple_predictor(teacher['model'], teacher['params']) self.creator = Creator( self.dataset_path, dim=(self.dataset_config.input_dim, self.dataset_config.output_dim), preproccessing=self.dataset_config.use_preprocessing, std=self.dataset_config.dataset_std, reduce_training=self.dataset_config.reduce_training, reduce_testing=self.dataset_config.reduce_testing, reduce_validation=self.dataset_config.reduce_validation, only_mixed=self.dataset_config.only_mixed_labels, mix_ratio=self.dataset_config.mix_ratio ) self.creator.load_dataset()
def load(self, dataset_path, params, batch_size=1): print_section('Creating aerial image dataset') self.std = params.dataset_std chunks = params.chunk_size #TODO: ensure that the dataset is as expected. creator = Creator(dataset_path, dim=(params.input_dim, params.output_dim), rotation=params.use_rotation, preproccessing=params.use_preprocessing, std=self.std, only_mixed=params.only_mixed_labels, reduce_testing=params.reduce_testing, reduce_training=params.reduce_training, reduce_validation=params.reduce_validation) train, valid, test = creator.dynamically_create( params.samples_per_image, enable_label_noise=params.use_label_noise, label_noise=params.label_noise, only_mixed=params.only_mixed_labels ) #Testing dataset size requirements AerialDataset.dataset_check('train', train, batch_size) AerialDataset.dataset_check('valid', valid, batch_size) AerialDataset.dataset_check('test', test, batch_size) AerialDataset.dataset_shared_stats(train[0].shape, train[1].shape, chunks) self.set_nr_examples(train, valid, test) nr_of_chunks = AerialDataset.dataset_sizes(train, valid, test, chunks) training_chunks = self._chunkify(train, nr_of_chunks, batch_size) AerialDataset.dataset_chunk_stats(len(training_chunks), len(training_chunks[0][0]), len(training_chunks[-1][0])) self.active = self.shared_dataset(training_chunks[0], cast_to_int=False) self.set['train'] = self.active[0], T.cast(self.active[1], 'int32') self.set['validation'] = self.shared_dataset(valid, cast_to_int=True ) self.set['test'] = self.shared_dataset(test, cast_to_int=True) #Not stored on the GPU, unlike the shared variables defined above. self.all_training = training_chunks return True
def _create_dataset(self, set_name): dim = (self.dataset_config.input_dim, self.dataset_config.output_dim) path = self.dataset_path preprocessing = self.dataset_config.use_preprocessing print("---- Using preprossing: {}".format(preprocessing)) std = self.dataset_config.dataset_std samples_per_image = 200 creator = Creator(path, dim=dim, preproccessing=preprocessing, std=std) creator.load_dataset() #Creating a shared variable of sampled test data raw_set = None if set_name == "valid": raw_set = creator.valid else: raw_set = creator.test aerial = AerialDataset() return aerial.shared_dataset(creator.sample_data(raw_set, samples_per_image), cast_to_int=True)
def _create_dataset(self, set_name): dim = (self.dataset_config.input_dim, self.dataset_config.output_dim) path = self.dataset_path preprocessing = self.dataset_config.use_preprocessing print("---- Using preprossing: {}".format(preprocessing)) std = self.dataset_config.dataset_std samples_per_image = 200 creator = Creator(path, dim=dim, preproccessing=preprocessing, std=std) creator.load_dataset() #Creating a shared variable of sampled test data raw_set = None if set_name == "valid": raw_set = creator.valid else: raw_set = creator.test aerial = AerialDataset() return aerial.shared_dataset(creator.sample_data( raw_set, samples_per_image), cast_to_int=True)
class CurriculumDataset(object): def __init__(self, teacher, dataset_path, store_path, dataset_config, best_trade_off): self.dataset_path = dataset_path self.store_path = store_path self.teacher = teacher self.dataset_config = dataset_config self.rotate = dataset_config.use_rotation self.trade_off = best_trade_off if os.path.exists(self.store_path): raise Exception("Store path already exists") else: os.makedirs(self.store_path) os.makedirs(os.path.join(self.store_path, "train")) os.makedirs(os.path.join(self.store_path, "valid")) os.makedirs(os.path.join(self.store_path, "test")) self.evaluate = util.create_simple_predictor(teacher['model'], teacher['params']) self.creator = Creator( self.dataset_path, dim=(self.dataset_config.input_dim, self.dataset_config.output_dim), preproccessing=self.dataset_config.use_preprocessing, std=self.dataset_config.dataset_std, reduce_training=self.dataset_config.reduce_training, reduce_testing=self.dataset_config.reduce_testing, reduce_validation=self.dataset_config.reduce_validation, only_mixed=self.dataset_config.only_mixed_labels, mix_ratio=self.dataset_config.mix_ratio) self.creator.load_dataset() def create_dataset(self, is_baseline, thresholds=None, base_sample=100, secondary_sample=100): print("---- Starting sampling. WARNING: this might take a while.") #Sampling at different thresholds. if thresholds == None: thresholds = np.arange(0.05, 1, 0.05) if is_baseline: thresholds = np.ones(thresholds.shape) print("---- Main dataset") self._generate_stage("stage0", thresholds[0], base_sample) for i in range(1, thresholds.shape[0]): print("---- Stage{} dataset".format(i)) self._generate_stage("stage{}".format(i), thresholds[i], secondary_sample) self._generate_set("test", self.creator.test, base_sample) self._generate_set("valid", self.creator.valid, base_sample) def _generate_set(self, set_name, dataset, samples): ''' Validation and test data is also pre-generated. This means the result is self contained. ''' data, labels = self.creator.sample_data(dataset, samples) stage_path = os.path.join(self.store_path, set_name) os.makedirs(os.path.join(stage_path, "labels")) os.makedirs(os.path.join(stage_path, "data")) np.save(os.path.join(stage_path, "labels", "examples"), labels) np.save(os.path.join(stage_path, "data", "examples"), data) def _generate_stage(self, name, threshold, samples): ''' Training set is a special case, which involve training folder with several stages. These stages can be introduced in the active training data over time. Slowly transforming the simple distribution to the real dataset distribution of data. :return: ''' print("SAMPLES ", samples) stage_path = os.path.join(self.store_path, "train", name) os.makedirs(stage_path) data, labels = self.creator.sample_data( self.creator.train, samples, mixed_labels=self.dataset_config.only_mixed_labels, curriculum=self.evaluate, curriculum_threshold=threshold, rotation=self.rotate, best_trade_off=self.trade_off) os.makedirs(os.path.join(stage_path, "labels")) os.makedirs(os.path.join(stage_path, "data")) np.save(os.path.join(stage_path, "labels", "examples"), labels) np.save(os.path.join(stage_path, "data", "examples"), data)
#Dataset path. Config used if not supplied is_alt_dataset, alt_dataset = get_command('-dataset') if is_alt_dataset: dataset_path = alt_dataset #============================================================== store = ParamStorage() teacher = store.load_params(path=teacher_location) evaluate = util.create_simple_predictor(teacher['model'], teacher['params']) if not verify: creator = Creator(pr_path, dim=(dataset_params.input_dim, dataset_params.output_dim), preproccessing=dataset_params.use_preprocessing, std=dataset_params.dataset_std, reduce_training=dataset_params.reduce_training, reduce_testing=dataset_params.reduce_testing, reduce_validation=dataset_params.reduce_validation) creator.load_dataset() data, labels = creator.sample_data(creator.train, samples, rotation=dataset_params.use_rotation) else: aerial_data = AerialCurriculumDataset() data, labels = aerial_data.load_set(dataset_path, "train", stage=stage) road_diff = [] non_road_diff = [] all_diff = []
if is_alt_dataset: dataset_path = alt_dataset #============================================================== store = ParamStorage() teacher = store.load_params(path=teacher_location) evaluate = util.create_simple_predictor(teacher['model'], teacher['params']) if not verify: creator = Creator( pr_path, dim=(dataset_params.input_dim, dataset_params.output_dim), preproccessing=dataset_params.use_preprocessing, std=dataset_params.dataset_std, reduce_training=dataset_params.reduce_training, reduce_testing=dataset_params.reduce_testing, reduce_validation=dataset_params.reduce_validation ) creator.load_dataset() data, labels = creator.sample_data( creator.train, samples, rotation=dataset_params.use_rotation ) else: aerial_data = AerialCurriculumDataset() data, labels = aerial_data.load_set(dataset_path, "train", stage=stage)
class CurriculumDataset(object): def __init__(self, teacher, dataset_path, store_path, dataset_config, best_trade_off): self.dataset_path = dataset_path self.store_path = store_path self.teacher = teacher self.dataset_config = dataset_config self.rotate = dataset_config.use_rotation self.trade_off = best_trade_off if os.path.exists(self.store_path): raise Exception("Store path already exists") else: os.makedirs(self.store_path) os.makedirs(os.path.join(self.store_path, "train")) os.makedirs(os.path.join(self.store_path, "valid")) os.makedirs(os.path.join(self.store_path, "test")) self.evaluate = util.create_simple_predictor(teacher['model'], teacher['params']) self.creator = Creator( self.dataset_path, dim=(self.dataset_config.input_dim, self.dataset_config.output_dim), preproccessing=self.dataset_config.use_preprocessing, std=self.dataset_config.dataset_std, reduce_training=self.dataset_config.reduce_training, reduce_testing=self.dataset_config.reduce_testing, reduce_validation=self.dataset_config.reduce_validation, only_mixed=self.dataset_config.only_mixed_labels, mix_ratio=self.dataset_config.mix_ratio ) self.creator.load_dataset() def create_dataset(self, is_baseline, thresholds=None, base_sample=100, secondary_sample=100): print("---- Starting sampling. WARNING: this might take a while.") #Sampling at different thresholds. if thresholds == None: thresholds = np.arange(0.05 , 1, 0.05) if is_baseline: thresholds = np.ones(thresholds.shape) print("---- Main dataset") self._generate_stage("stage0", thresholds[0], base_sample) for i in range(1, thresholds.shape[0]): print("---- Stage{} dataset".format(i)) self._generate_stage("stage{}".format(i), thresholds[i], secondary_sample) self._generate_set("test", self.creator.test, base_sample) self._generate_set("valid", self.creator.valid, base_sample) def _generate_set(self, set_name, dataset, samples): ''' Validation and test data is also pre-generated. This means the result is self contained. ''' data, labels = self.creator.sample_data(dataset, samples) stage_path = os.path.join(self.store_path, set_name) os.makedirs(os.path.join(stage_path, "labels")) os.makedirs(os.path.join(stage_path, "data")) np.save(os.path.join(stage_path, "labels", "examples"), labels) np.save(os.path.join(stage_path, "data", "examples"), data) def _generate_stage(self, name, threshold, samples): ''' Training set is a special case, which involve training folder with several stages. These stages can be introduced in the active training data over time. Slowly transforming the simple distribution to the real dataset distribution of data. :return: ''' print("SAMPLES ", samples) stage_path = os.path.join(self.store_path, "train", name) os.makedirs(stage_path) data, labels = self.creator.sample_data( self.creator.train, samples, mixed_labels=self.dataset_config.only_mixed_labels, curriculum=self.evaluate, curriculum_threshold=threshold, rotation=self.rotate, best_trade_off=self.trade_off ) os.makedirs(os.path.join(stage_path, "labels")) os.makedirs(os.path.join(stage_path, "data")) np.save(os.path.join(stage_path, "labels", "examples"), labels) np.save(os.path.join(stage_path, "data", "examples"), data)
width and height of dim_label. ''' dataset_dir = "/home/olav/Pictures/Norwegian_roads_dataset_vbase" x_grid = 5 y_grid = 4 dim_data = 64 dim_label = 16 padding = 8 def to_rgb(im, w, h): ret = np.empty((w, h, 3), dtype=np.uint8) ret[:, :, 2] = ret[:, :, 1] = ret[:, :, 0] = im return ret l_pad = (dim_data -dim_label)/2 c = Creator(dataset_dir, preproccessing=False, only_mixed=True) c.load_dataset() data, labels = c.sample_data(c.train, 10, mixed_labels=True) shuffled_index = range(len(data)) random.shuffle(shuffled_index) width = x_grid*2*dim_data + (padding*x_grid) height = y_grid*dim_data + (padding*y_grid) patch_showcase = np.zeros((height, width, 3), dtype=np.uint8) patch_showcase[:, :, :] = (255, 255, 255) #Puts the label and images in a grid pattern, which include padding inbetween . for i in range(0,height, dim_data + padding): for j in range(0, width, (dim_data*2) +padding): idx = shuffled_index.pop()
dataset_dir = "/home/olav/Pictures/Norwegian_roads_dataset_vbase" x_grid = 5 y_grid = 4 dim_data = 64 dim_label = 16 padding = 8 def to_rgb(im, w, h): ret = np.empty((w, h, 3), dtype=np.uint8) ret[:, :, 2] = ret[:, :, 1] = ret[:, :, 0] = im return ret l_pad = (dim_data - dim_label) / 2 c = Creator(dataset_dir, preproccessing=False, only_mixed=True) c.load_dataset() data, labels = c.sample_data(c.train, 10, mixed_labels=True) shuffled_index = range(len(data)) random.shuffle(shuffled_index) width = x_grid * 2 * dim_data + (padding * x_grid) height = y_grid * dim_data + (padding * y_grid) patch_showcase = np.zeros((height, width, 3), dtype=np.uint8) patch_showcase[:, :, :] = (255, 255, 255) #Puts the label and images in a grid pattern, which include padding inbetween . for i in range(0, height, dim_data + padding): for j in range(0, width, (dim_data * 2) + padding): idx = shuffled_index.pop()