def from_schema(dls, schema): input = Input() if 'csv_file_path' in schema: input.csv_file_path = schema['csv_file_path'] if 'train_csv_file_path' in schema: input.train_csv_file_path = schema['train_csv_file_path'] if 'validation_scv_file_path' in schema: input.validation_scv_file_path = schema['validation_scv_file_path'] if 'header' in schema: input.header = schema['header'] if 'delimiter' in schema: input.delimiter = str(schema['delimiter']) from img2d import Img2DColumn columns = [] for column_schema in schema['columns']: column_type = str(column_schema['type']) if column_type == Column.Type.NUMERIC: columns.append(NumericColumn.from_schema(column_schema)) elif column_type == Column.Type.VECTOR: columns.append(VectorColumn.from_schema(column_schema)) elif column_type == Column.Type.CATEGORICAL: columns.append(CategoricalColumn.from_schema(column_schema)) elif column_type == Column.Type.IMG_2D: img2d = Img2DColumn.from_schema(column_schema) if hasattr(input, 'csv_file_path'): img2d.csv_file_path(os.path.dirname(input.csv_file_path)) else: img2d.csv_file_path( os.path.dirname(input.train_csv_file_path)) columns.append(img2d) else: raise TypeError("Unsupported column type: %s" % column_type) input.columns = columns return input
def test_img2d_ser_de_is_raw_img_false(self): img2d_col = Img2DColumn(columns_indexes=[0], pre_transforms=[], post_transforms=[], is_raw_img=False) reader = img2d_col.reader ser_de = img2d_col.ser_de img = reader.read([self.test_img_file_path]) img_s = ser_de.serialize(img) img_d = ser_de.deserialize(img_s) self.assertTrue(np.array_equal(img[0], img_d))
def test_img2d_column_metadata(self): img2d_col = Img2DColumn(columns_indexes=[0], pre_transforms=[], post_transforms=[]) aggregated_metadata = [] for i in range(0, 5): img, img_fmt = img2d_col.reader.read([self.test_img_file_path]) metadata = Img2DColumnMetadata() metadata.aggregate(img=img) aggregated_metadata.append(metadata) img2d_col.metadata.merge(aggregated_metadata) mean_img = img2d_col.metadata.img original_img = img2d_col.reader.read([self.test_img_file_path])[0] # Should be equal because we are using the same image self.assertTrue(np.array_equal(mean_img, original_img))
def create_test_dataset(test_dir, test_csv_file_path, dataset_name, header=False, is_related_path=False): col_0 = 'col_0' col_1 = 'col_1' col_5 = 'col_5' if header: col_0 = 'col_0_h' col_1 = 'col_1_h' col_5 = 'col_5_h' schema = Schema.from_csv(csv_path=test_csv_file_path, header=header) schema.merge_columns_in_range('col_vector', (2, 4)) input = Input(schema) input.add_categorical_column(col_0) input.add_numeric_column(col_1) input.add_vector_column('col_vector') img2d = Img2DColumn(is_related_path=is_related_path) input.add_column(col_5, img2d) return Dataset.Builder(input, dataset_name, test_dir, parallelism_level=2).build()
def test_write_read_record_raw_img_false(self): schema = Schema.from_csv(csv_path=self.test_csv_file_path) schema.merge_columns_in_range('col_vector', (2, 4)) input = Input(schema) input.add_categorical_column('col_0') for column in input.columns: if column.name == 'col_0': metadata = CategoricalColumnMetadata() metadata._categories = categories column.metadata = metadata input.add_numeric_column('col_1') input.add_vector_column('col_vector') img2d = Img2DColumn(pre_transforms=[], post_transforms=[], is_raw_img=False) input.add_column("col_5", img2d) os.makedirs(os.path.join(self.test_dir, Dataset.DATA_DIR_NAME)) record_writer = RecordWriter.factory('HDF5', self.test_dir, input.columns) csv_row = [ ent.strip() for ent in Schema.read_n_rows( csv_file_path=self.test_csv_file_path, delimiter=",", rows_number=1)[0] ] precessed_row = {} for column in input.columns: precessed_row[column.name] = column.process_on_write(csv_row) record_writer.write(precessed_row, 0) record_reader = RecordReader.factory('HDF5', self.test_dir) record = record_reader.read(0) data = {} for column in input.columns: data[column.name] = column.process_on_read(record) img_deserialized = data['col_5'] img_original = skimgio.imread(self.test_img_file_path) self.assertTrue(np.array_equal(img_deserialized, img_original))
import os import glob # datasets_base_path = app_flask.config['DATASETS_BASE_PATH'] lstDB = glob.glob('%s/test-*' % datasets_base_path) numDB = len(lstDB) if numDB < 1: path_csv = '../../../../data-test/dataset-image2d/simple4c_test/test-csv-v1.csv' if not os.path.isfile(path_csv): raise Exception('Cant find file [%s]' % path_csv) schema = Schema.from_csv(path_csv, header=True, delimiter=',') schema.merge_columns_in_range('col_vector', (2, 4)) schema.print_data() schema['path'] = 'image' schema.print_columns() input = Input(schema=schema) input.add_categorical_column("label") input.add_vector_column('col_vector') img2d = Img2DColumn(is_related_path=True) input.add_column("image", img2d) dataset = Dataset.Builder(input, "test", datasets_base_path, parallelism_level=2).build() else: dataset = Dataset.load(lstDB[0]) dataShapes = dataset.shapes() data = dataset.get_train_batch(5) for k in data._data.keys(): print '%s : %s' % (k, data[k].shape)