Пример #1
0
    def from_schema(dls, schema):
        input = Input()
        if 'csv_file_path' in schema:
            input.csv_file_path = schema['csv_file_path']
        if 'train_csv_file_path' in schema:
            input.train_csv_file_path = schema['train_csv_file_path']
        if 'validation_scv_file_path' in schema:
            input.validation_scv_file_path = schema['validation_scv_file_path']
        if 'header' in schema:
            input.header = schema['header']
        if 'delimiter' in schema:
            input.delimiter = str(schema['delimiter'])

        from img2d import Img2DColumn
        columns = []
        for column_schema in schema['columns']:
            column_type = str(column_schema['type'])
            if column_type == Column.Type.NUMERIC:
                columns.append(NumericColumn.from_schema(column_schema))
            elif column_type == Column.Type.VECTOR:
                columns.append(VectorColumn.from_schema(column_schema))
            elif column_type == Column.Type.CATEGORICAL:
                columns.append(CategoricalColumn.from_schema(column_schema))
            elif column_type == Column.Type.IMG_2D:
                img2d = Img2DColumn.from_schema(column_schema)
                if hasattr(input, 'csv_file_path'):
                    img2d.csv_file_path(os.path.dirname(input.csv_file_path))
                else:
                    img2d.csv_file_path(
                        os.path.dirname(input.train_csv_file_path))
                columns.append(img2d)
            else:
                raise TypeError("Unsupported column type: %s" % column_type)
        input.columns = columns
        return input
Пример #2
0
 def test_img2d_ser_de_is_raw_img_false(self):
     img2d_col = Img2DColumn(columns_indexes=[0],
                             pre_transforms=[],
                             post_transforms=[],
                             is_raw_img=False)
     reader = img2d_col.reader
     ser_de = img2d_col.ser_de
     img = reader.read([self.test_img_file_path])
     img_s = ser_de.serialize(img)
     img_d = ser_de.deserialize(img_s)
     self.assertTrue(np.array_equal(img[0], img_d))
Пример #3
0
 def test_img2d_column_metadata(self):
     img2d_col = Img2DColumn(columns_indexes=[0],
                             pre_transforms=[],
                             post_transforms=[])
     aggregated_metadata = []
     for i in range(0, 5):
         img, img_fmt = img2d_col.reader.read([self.test_img_file_path])
         metadata = Img2DColumnMetadata()
         metadata.aggregate(img=img)
         aggregated_metadata.append(metadata)
     img2d_col.metadata.merge(aggregated_metadata)
     mean_img = img2d_col.metadata.img
     original_img = img2d_col.reader.read([self.test_img_file_path])[0]
     # Should be equal because we are using the same image
     self.assertTrue(np.array_equal(mean_img, original_img))
Пример #4
0
def create_test_dataset(test_dir,
                        test_csv_file_path,
                        dataset_name,
                        header=False,
                        is_related_path=False):
    col_0 = 'col_0'
    col_1 = 'col_1'
    col_5 = 'col_5'
    if header:
        col_0 = 'col_0_h'
        col_1 = 'col_1_h'
        col_5 = 'col_5_h'
    schema = Schema.from_csv(csv_path=test_csv_file_path, header=header)
    schema.merge_columns_in_range('col_vector', (2, 4))
    input = Input(schema)
    input.add_categorical_column(col_0)
    input.add_numeric_column(col_1)
    input.add_vector_column('col_vector')
    img2d = Img2DColumn(is_related_path=is_related_path)
    input.add_column(col_5, img2d)
    return Dataset.Builder(input, dataset_name, test_dir,
                           parallelism_level=2).build()
Пример #5
0
 def test_write_read_record_raw_img_false(self):
     schema = Schema.from_csv(csv_path=self.test_csv_file_path)
     schema.merge_columns_in_range('col_vector', (2, 4))
     input = Input(schema)
     input.add_categorical_column('col_0')
     for column in input.columns:
         if column.name == 'col_0':
             metadata = CategoricalColumnMetadata()
             metadata._categories = categories
             column.metadata = metadata
     input.add_numeric_column('col_1')
     input.add_vector_column('col_vector')
     img2d = Img2DColumn(pre_transforms=[],
                         post_transforms=[],
                         is_raw_img=False)
     input.add_column("col_5", img2d)
     os.makedirs(os.path.join(self.test_dir, Dataset.DATA_DIR_NAME))
     record_writer = RecordWriter.factory('HDF5', self.test_dir,
                                          input.columns)
     csv_row = [
         ent.strip() for ent in Schema.read_n_rows(
             csv_file_path=self.test_csv_file_path,
             delimiter=",",
             rows_number=1)[0]
     ]
     precessed_row = {}
     for column in input.columns:
         precessed_row[column.name] = column.process_on_write(csv_row)
     record_writer.write(precessed_row, 0)
     record_reader = RecordReader.factory('HDF5', self.test_dir)
     record = record_reader.read(0)
     data = {}
     for column in input.columns:
         data[column.name] = column.process_on_read(record)
     img_deserialized = data['col_5']
     img_original = skimgio.imread(self.test_img_file_path)
     self.assertTrue(np.array_equal(img_deserialized, img_original))
Пример #6
0
    import os
    import glob
    #
    datasets_base_path = app_flask.config['DATASETS_BASE_PATH']
    lstDB = glob.glob('%s/test-*' % datasets_base_path)
    numDB = len(lstDB)
    if numDB < 1:
        path_csv = '../../../../data-test/dataset-image2d/simple4c_test/test-csv-v1.csv'
        if not os.path.isfile(path_csv):
            raise Exception('Cant find file [%s]' % path_csv)
        schema = Schema.from_csv(path_csv, header=True, delimiter=',')
        schema.merge_columns_in_range('col_vector', (2, 4))
        schema.print_data()
        schema['path'] = 'image'
        schema.print_columns()
        input = Input(schema=schema)
        input.add_categorical_column("label")
        input.add_vector_column('col_vector')
        img2d = Img2DColumn(is_related_path=True)
        input.add_column("image", img2d)
        dataset = Dataset.Builder(input,
                                  "test",
                                  datasets_base_path,
                                  parallelism_level=2).build()
    else:
        dataset = Dataset.load(lstDB[0])
    dataShapes = dataset.shapes()
    data = dataset.get_train_batch(5)
    for k in data._data.keys():
        print '%s : %s' % (k, data[k].shape)