示例#1
0
 def test_process_csv_file(self):
     schema = Schema.from_csv(csv_path=self.test_csv_file_path)
     input = Input(schema)
     input.add_categorical_column('col_0')
     rows = Dataset.Builder(input=input,
                            name="test",
                            root_dir=self.test_dir,
                            parallelism_level=2)._process_csv_files()
     self.assertEqual(len(rows), 10)
     for column in input.columns:
         if column.name == 'col_0':
             self.assertTrue(len(column.metadata.categories), 4)
示例#2
0
def create_test_dataset(test_dir,
                        test_csv_file_path,
                        dataset_name,
                        header=False,
                        is_related_path=False):
    col_0 = 'col_0'
    col_1 = 'col_1'
    col_5 = 'col_5'
    if header:
        col_0 = 'col_0_h'
        col_1 = 'col_1_h'
        col_5 = 'col_5_h'
    schema = Schema.from_csv(csv_path=test_csv_file_path, header=header)
    schema.merge_columns_in_range('col_vector', (2, 4))
    input = Input(schema)
    input.add_categorical_column(col_0)
    input.add_numeric_column(col_1)
    input.add_vector_column('col_vector')
    img2d = Img2DColumn(is_related_path=is_related_path)
    input.add_column(col_5, img2d)
    return Dataset.Builder(input, dataset_name, test_dir,
                           parallelism_level=2).build()