def _create_dataset(self, set_name): dim = (self.dataset_config.input_dim, self.dataset_config.output_dim) path = self.dataset_path preprocessing = self.dataset_config.use_preprocessing print("---- Using preprossing: {}".format(preprocessing)) std = self.dataset_config.dataset_std samples_per_image = 200 creator = Creator(path, dim=dim, preproccessing=preprocessing, std=std) creator.load_dataset() #Creating a shared variable of sampled test data raw_set = None if set_name == "valid": raw_set = creator.valid else: raw_set = creator.test aerial = AerialDataset() return aerial.shared_dataset(creator.sample_data(raw_set, samples_per_image), cast_to_int=True)
def _create_dataset(self, set_name): dim = (self.dataset_config.input_dim, self.dataset_config.output_dim) path = self.dataset_path preprocessing = self.dataset_config.use_preprocessing print("---- Using preprossing: {}".format(preprocessing)) std = self.dataset_config.dataset_std samples_per_image = 200 creator = Creator(path, dim=dim, preproccessing=preprocessing, std=std) creator.load_dataset() #Creating a shared variable of sampled test data raw_set = None if set_name == "valid": raw_set = creator.valid else: raw_set = creator.test aerial = AerialDataset() return aerial.shared_dataset(creator.sample_data( raw_set, samples_per_image), cast_to_int=True)
class CurriculumDataset(object): def __init__(self, teacher, dataset_path, store_path, dataset_config, best_trade_off): self.dataset_path = dataset_path self.store_path = store_path self.teacher = teacher self.dataset_config = dataset_config self.rotate = dataset_config.use_rotation self.trade_off = best_trade_off if os.path.exists(self.store_path): raise Exception("Store path already exists") else: os.makedirs(self.store_path) os.makedirs(os.path.join(self.store_path, "train")) os.makedirs(os.path.join(self.store_path, "valid")) os.makedirs(os.path.join(self.store_path, "test")) self.evaluate = util.create_simple_predictor(teacher['model'], teacher['params']) self.creator = Creator( self.dataset_path, dim=(self.dataset_config.input_dim, self.dataset_config.output_dim), preproccessing=self.dataset_config.use_preprocessing, std=self.dataset_config.dataset_std, reduce_training=self.dataset_config.reduce_training, reduce_testing=self.dataset_config.reduce_testing, reduce_validation=self.dataset_config.reduce_validation, only_mixed=self.dataset_config.only_mixed_labels, mix_ratio=self.dataset_config.mix_ratio) self.creator.load_dataset() def create_dataset(self, is_baseline, thresholds=None, base_sample=100, secondary_sample=100): print("---- Starting sampling. WARNING: this might take a while.") #Sampling at different thresholds. if thresholds == None: thresholds = np.arange(0.05, 1, 0.05) if is_baseline: thresholds = np.ones(thresholds.shape) print("---- Main dataset") self._generate_stage("stage0", thresholds[0], base_sample) for i in range(1, thresholds.shape[0]): print("---- Stage{} dataset".format(i)) self._generate_stage("stage{}".format(i), thresholds[i], secondary_sample) self._generate_set("test", self.creator.test, base_sample) self._generate_set("valid", self.creator.valid, base_sample) def _generate_set(self, set_name, dataset, samples): ''' Validation and test data is also pre-generated. This means the result is self contained. ''' data, labels = self.creator.sample_data(dataset, samples) stage_path = os.path.join(self.store_path, set_name) os.makedirs(os.path.join(stage_path, "labels")) os.makedirs(os.path.join(stage_path, "data")) np.save(os.path.join(stage_path, "labels", "examples"), labels) np.save(os.path.join(stage_path, "data", "examples"), data) def _generate_stage(self, name, threshold, samples): ''' Training set is a special case, which involve training folder with several stages. These stages can be introduced in the active training data over time. Slowly transforming the simple distribution to the real dataset distribution of data. :return: ''' print("SAMPLES ", samples) stage_path = os.path.join(self.store_path, "train", name) os.makedirs(stage_path) data, labels = self.creator.sample_data( self.creator.train, samples, mixed_labels=self.dataset_config.only_mixed_labels, curriculum=self.evaluate, curriculum_threshold=threshold, rotation=self.rotate, best_trade_off=self.trade_off) os.makedirs(os.path.join(stage_path, "labels")) os.makedirs(os.path.join(stage_path, "data")) np.save(os.path.join(stage_path, "labels", "examples"), labels) np.save(os.path.join(stage_path, "data", "examples"), data)
teacher = store.load_params(path=teacher_location) evaluate = util.create_simple_predictor(teacher['model'], teacher['params']) if not verify: creator = Creator(pr_path, dim=(dataset_params.input_dim, dataset_params.output_dim), preproccessing=dataset_params.use_preprocessing, std=dataset_params.dataset_std, reduce_training=dataset_params.reduce_training, reduce_testing=dataset_params.reduce_testing, reduce_validation=dataset_params.reduce_validation) creator.load_dataset() data, labels = creator.sample_data(creator.train, samples, rotation=dataset_params.use_rotation) else: aerial_data = AerialCurriculumDataset() data, labels = aerial_data.load_set(dataset_path, "train", stage=stage) road_diff = [] non_road_diff = [] all_diff = [] pred_diff = [] nr_with_road = 0 nr_with_pred = 0 best_trade_off = tradeoff nr_of_examples = data.shape[0] for i in range(nr_of_examples):
if not verify: creator = Creator( pr_path, dim=(dataset_params.input_dim, dataset_params.output_dim), preproccessing=dataset_params.use_preprocessing, std=dataset_params.dataset_std, reduce_training=dataset_params.reduce_training, reduce_testing=dataset_params.reduce_testing, reduce_validation=dataset_params.reduce_validation ) creator.load_dataset() data, labels = creator.sample_data( creator.train, samples, rotation=dataset_params.use_rotation ) else: aerial_data = AerialCurriculumDataset() data, labels = aerial_data.load_set(dataset_path, "train", stage=stage) road_diff = [] non_road_diff = [] all_diff = [] pred_diff = [] nr_with_road = 0 nr_with_pred = 0 best_trade_off = tradeoff nr_of_examples = data.shape[0]
class CurriculumDataset(object): def __init__(self, teacher, dataset_path, store_path, dataset_config, best_trade_off): self.dataset_path = dataset_path self.store_path = store_path self.teacher = teacher self.dataset_config = dataset_config self.rotate = dataset_config.use_rotation self.trade_off = best_trade_off if os.path.exists(self.store_path): raise Exception("Store path already exists") else: os.makedirs(self.store_path) os.makedirs(os.path.join(self.store_path, "train")) os.makedirs(os.path.join(self.store_path, "valid")) os.makedirs(os.path.join(self.store_path, "test")) self.evaluate = util.create_simple_predictor(teacher['model'], teacher['params']) self.creator = Creator( self.dataset_path, dim=(self.dataset_config.input_dim, self.dataset_config.output_dim), preproccessing=self.dataset_config.use_preprocessing, std=self.dataset_config.dataset_std, reduce_training=self.dataset_config.reduce_training, reduce_testing=self.dataset_config.reduce_testing, reduce_validation=self.dataset_config.reduce_validation, only_mixed=self.dataset_config.only_mixed_labels, mix_ratio=self.dataset_config.mix_ratio ) self.creator.load_dataset() def create_dataset(self, is_baseline, thresholds=None, base_sample=100, secondary_sample=100): print("---- Starting sampling. WARNING: this might take a while.") #Sampling at different thresholds. if thresholds == None: thresholds = np.arange(0.05 , 1, 0.05) if is_baseline: thresholds = np.ones(thresholds.shape) print("---- Main dataset") self._generate_stage("stage0", thresholds[0], base_sample) for i in range(1, thresholds.shape[0]): print("---- Stage{} dataset".format(i)) self._generate_stage("stage{}".format(i), thresholds[i], secondary_sample) self._generate_set("test", self.creator.test, base_sample) self._generate_set("valid", self.creator.valid, base_sample) def _generate_set(self, set_name, dataset, samples): ''' Validation and test data is also pre-generated. This means the result is self contained. ''' data, labels = self.creator.sample_data(dataset, samples) stage_path = os.path.join(self.store_path, set_name) os.makedirs(os.path.join(stage_path, "labels")) os.makedirs(os.path.join(stage_path, "data")) np.save(os.path.join(stage_path, "labels", "examples"), labels) np.save(os.path.join(stage_path, "data", "examples"), data) def _generate_stage(self, name, threshold, samples): ''' Training set is a special case, which involve training folder with several stages. These stages can be introduced in the active training data over time. Slowly transforming the simple distribution to the real dataset distribution of data. :return: ''' print("SAMPLES ", samples) stage_path = os.path.join(self.store_path, "train", name) os.makedirs(stage_path) data, labels = self.creator.sample_data( self.creator.train, samples, mixed_labels=self.dataset_config.only_mixed_labels, curriculum=self.evaluate, curriculum_threshold=threshold, rotation=self.rotate, best_trade_off=self.trade_off ) os.makedirs(os.path.join(stage_path, "labels")) os.makedirs(os.path.join(stage_path, "data")) np.save(os.path.join(stage_path, "labels", "examples"), labels) np.save(os.path.join(stage_path, "data", "examples"), data)
dataset_dir = "/home/olav/Pictures/Norwegian_roads_dataset_vbase" x_grid = 5 y_grid = 4 dim_data = 64 dim_label = 16 padding = 8 def to_rgb(im, w, h): ret = np.empty((w, h, 3), dtype=np.uint8) ret[:, :, 2] = ret[:, :, 1] = ret[:, :, 0] = im return ret l_pad = (dim_data -dim_label)/2 c = Creator(dataset_dir, preproccessing=False, only_mixed=True) c.load_dataset() data, labels = c.sample_data(c.train, 10, mixed_labels=True) shuffled_index = range(len(data)) random.shuffle(shuffled_index) width = x_grid*2*dim_data + (padding*x_grid) height = y_grid*dim_data + (padding*y_grid) patch_showcase = np.zeros((height, width, 3), dtype=np.uint8) patch_showcase[:, :, :] = (255, 255, 255) #Puts the label and images in a grid pattern, which include padding inbetween . for i in range(0,height, dim_data + padding): for j in range(0, width, (dim_data*2) +padding): idx = shuffled_index.pop() d = data[idx] l = labels[idx]*255
y_grid = 4 dim_data = 64 dim_label = 16 padding = 8 def to_rgb(im, w, h): ret = np.empty((w, h, 3), dtype=np.uint8) ret[:, :, 2] = ret[:, :, 1] = ret[:, :, 0] = im return ret l_pad = (dim_data - dim_label) / 2 c = Creator(dataset_dir, preproccessing=False, only_mixed=True) c.load_dataset() data, labels = c.sample_data(c.train, 10, mixed_labels=True) shuffled_index = range(len(data)) random.shuffle(shuffled_index) width = x_grid * 2 * dim_data + (padding * x_grid) height = y_grid * dim_data + (padding * y_grid) patch_showcase = np.zeros((height, width, 3), dtype=np.uint8) patch_showcase[:, :, :] = (255, 255, 255) #Puts the label and images in a grid pattern, which include padding inbetween . for i in range(0, height, dim_data + padding): for j in range(0, width, (dim_data * 2) + padding): idx = shuffled_index.pop() d = data[idx] l = labels[idx] * 255