Пример #1
0
 def test_can_define_column_configuration(self):
     schema = Schema()
     schema.define_column('A', type='int', config={'a': 10})
     self.assertEqual(('A', ), schema.columns)
     self.assertEqual('A', schema.producers[0].name)
     self.assertEqual('int', schema.producers[0].type)
     self.assertEqual({'a': 10}, schema.producers[0].config)
Пример #2
0
 def test_can_obtain_a_column_type(self):
     schema = Schema()
     schema.define_column('A', type='int')
     self.assertEqual(('A', ), schema.columns)
     self.assertEqual('A', schema.producers[0].name)
     self.assertEqual('int', schema.producers[0].type)
     self.assertEqual({}, schema.producers[0].config)
Пример #3
0
    def test_can_build_a_generator_from_a_schema_with_config(self):
        schema = Schema()
        schema.define_column('A', type='int', config={'min': 10})
        engine = Engine(schema, self.library)

        self.assertEqual(1, engine.number_of_columns)
        values = set(engine.generate_data(20))
        expected_values = {(self.rand_copy.randint(10, 1_000_000),) for _ in range(20)}
Пример #4
0
 def test_can_mix_reference_and_auto_generated_producers(self):
     schema = Schema()
     schema.add_producer('my_producer', type='int')
     schema.define_column('A', producer='my_producer')
     schema.define_column('B', type='int')
     producers = sorted(schema.producers, key=lambda x: x.name)
     self.assertEqual(2, len(producers))
     self.assertEqual(SimpleNamespace(name='B', type='int', config={}),
                      producers[0])
     self.assertEqual(
         SimpleNamespace(name='my_producer', type='int', config={}),
         producers[1])
Пример #5
0
    def test_add_column_does_not_add_a_column_if_it_raises_error(self):
        schema = Schema()
        schema.add_column('A')
        with self.assertRaises(SchemaError):
            schema.add_column('A')

        self.assertEqual(('A', ), schema.columns)
Пример #6
0
    def test_must_specify_producer_or_type(self):
        schema = Schema()
        with self.assertRaises(TypeError):
            schema.define_column('A')

        with self.assertRaises(TypeError):
            schema.define_column('A', config={'a': 1})
Пример #7
0
    def test_raises_error_if_register_same_producer_multiple_times(self):
        schema = Schema()
        schema.define_column('A', type='int')
        with self.assertRaises(SchemaError) as ctx:
            schema.define_column('A', type='int')

        self.assertEqual("Column 'A' is already defined.", str(ctx.exception))
Пример #8
0
    def test_add_column_raises_error_if_column_is_already_defined(self):
        schema = Schema()
        schema.add_column('A')
        with self.assertRaises(SchemaError) as ctx:
            schema.add_column('A')

        self.assertEqual("Column 'A' is already defined.", str(ctx.exception))
Пример #9
0
    def test_raises_error_if_register_same_transformer_multiple_times(self):
        schema = Schema()
        schema.define_column('A', type='int')

        ret_none = FunctionalTransformer(lambda x: None)
        schema.add_transformer('my_transformer',
                               inputs=['A'],
                               outputs=['A'],
                               transformer=ret_none)
        with self.assertRaises(SchemaError) as ctx:
            schema.add_transformer('my_transformer',
                                   inputs=['A'],
                                   outputs=['A'],
                                   transformer=ret_none)
        self.assertEqual("Transformer 'my_transformer' is already defined.",
                         str(ctx.exception))
Пример #10
0
    def test_can_generate_two_identical_columns_by_referencing_name_of_auto_created_producer(self):
        schema = Schema()
        schema.define_column('A', type='int')
        schema.define_column('B', producer='A')

        engine = Engine(schema, self.library)
        generated_values = list(engine.generate_data(number_of_rows=10))
        first_col, second_col = zip(*generated_values)
        self.assertEqual(first_col, second_col)
Пример #11
0
 def test_can_generate_some_data_no_header(self):
     schema = Schema(show_header=False)
     schema.define_column('A', type='int')
     schema.define_column('B', type='int')
     schema.define_column('C', type='int')
     saved_data = StringIO()
     generate_data(schema, self.library, saved_data, number_of_rows=1)
     lines = saved_data.getvalue()
     expected_values = tuple(self.rand_copy.randint(0, 1_000_000) for _ in range(3))
Пример #12
0
 def test_can_generate_some_data_bytecount(self):
     schema = Schema()
     schema.define_column('A', type='int')
     schema.define_column('B', type='int')
     schema.define_column('C', type='int')
     saved_data = StringIO()
     generate_data(schema, self.library, saved_data, byte_count=128)
     lines = saved_data.getvalue()
     expected_values = [','.join(map(str, (self.rand_copy.randint(0, 1_000_000) for _ in range(3)))) for _ in
                        range(6)]
Пример #13
0
    def test_can_generate_producer_data_with_number_of_rows(self):
        schema = Schema()
        schema.define_column('A', type='int')
        schema.define_column('B', type='int')
        schema.define_column('C', type='int')
        engine = Engine(schema, self.library)

        generated_values = list(engine.generate_data(number_of_rows=10))
        self.assertEqual(10, len(generated_values))
        iterable = (self.rand_copy.randint(0, 1_000_000) for _ in range(30))
Пример #14
0
    def test_can_generate_stream_of_data(self):
        schema = Schema()
        schema.define_column('A', type='int')
        schema.define_column('B', type='int')
        schema.define_column('C', type='int')
        engine = Engine(schema, self.library)

        generated_values = list(it.islice(engine.generate_data(), 1000))
        self.assertEqual(1000, len(generated_values))
        iterable = (self.rand_copy.randint(0, 1_000_000) for _ in range(3000))
Пример #15
0
 def test_can_create_columns_with_same_producer(self):
     schema = Schema()
     schema.add_producer('my_producer', type='int')
     schema.define_column('A', producer='my_producer')
     schema.define_column('B', producer='my_producer')
     self.assertEqual(('A', 'B'), schema.columns)
     self.assertEqual(1, len(schema.producers))
     self.assertEqual('my_producer', schema.producers[0].name)
     self.assertEqual('int', schema.producers[0].type)
     self.assertEqual({}, schema.producers[0].config)
Пример #16
0
 def test_can_generate_some_data_stream(self):
     schema = Schema()
     schema.define_column('A', type='int')
     schema.define_column('B', type='int')
     schema.define_column('C', type='int')
     saved_data = MaxSizeFileIO(256)
     with self.assertRaises(IOError):
         generate_data(schema, self.library, saved_data, stream_mode=True)
     lines = saved_data.buffer
     expected_values = [','.join(map(str, (self.rand_copy.randint(0, 1_000_000) for _ in range(3))))
                        for _ in range(12)]
Пример #17
0
    def test_can_build_a_generator_from_a_schema(self):
        schema = Schema()
        schema.define_column('A', type='int')
        schema.define_column('B', type='int')
        schema.define_column('C', type='int')
        engine = Engine(schema, self.library)

        self.assertEqual(3, engine.number_of_columns)
        values = list(engine.generate_data(1))
        self.assertEqual(1, len(values))
        expected_values = tuple(self.rand_copy.randint(0, 1_000_000) for _ in range(3))
Пример #18
0
    def test_raises_an_error_if_inputs_do_not_exist(self):
        schema = Schema()
        schema.define_column('A', type='int')

        ret_none = FunctionalTransformer(lambda x: None)
        with self.assertRaises(SchemaError) as ctx:
            schema.add_transformer('my_transformer',
                                   inputs=['B'],
                                   outputs=['A'],
                                   transformer=ret_none)
        self.assertEqual("Inputs: 'B' are not defined in the schema.",
                         str(ctx.exception))
Пример #19
0
    def test_raises_an_error_if_double_output_name(self):
        schema = Schema()
        schema.define_column('A', type='int')
        schema.define_column('B', type='int')

        ret_none = FunctionalTransformer(lambda x: None, num_outputs=2)
        with self.assertRaises(SchemaError) as ctx:
            schema.add_transformer('my_transformer',
                                   inputs=['A'],
                                   outputs=['A', 'A'],
                                   transformer=ret_none)
        self.assertEqual("Outputs must be unique. Got multiple 'A' outputs.",
                         str(ctx.exception))
Пример #20
0
    def test_raises_an_error_if_num_outputs_do_not_match_arity(self):
        schema = Schema()
        schema.define_column('A', type='int')
        schema.define_column('B', type='int')

        ret_none = FunctionalTransformer(lambda x: None)
        with self.assertRaises(SchemaError) as ctx:
            schema.add_transformer('my_transformer',
                                   inputs=['A'],
                                   outputs=['A', 'B'],
                                   transformer=ret_none)
        self.assertEqual(
            "Got 2 outputs: 'A', 'B' but transformer's number of outputs is 1.",
            str(ctx.exception))
Пример #21
0
 def test_can_add_a_transformer(self):
     schema = Schema()
     schema.define_column('A', type='int')
     add_one = FunctionalTransformer(lambda x: x + 1)
     schema.add_transformer('my_transformer',
                            inputs=['A'],
                            outputs=['A'],
                            transformer=add_one)
     self.assertEqual(1, len(schema.transformers))
     self.assertEqual(
         SimpleNamespace(name='my_transformer',
                         inputs=['A'],
                         outputs=['A'],
                         transformer=add_one), schema.transformers[0])
Пример #22
0
 def test_str(self):
     schema = Schema()
     schema.add_producer('my_producer', type='int')
     schema.define_column('A', producer='my_producer')
     schema.define_column('B', producer='my_producer')
     str_regex = re.compile(
         r'''
         Schema\(
         \s*columns=\[[^]]+\],
         \s*producers=\{my_producer:\s*\{'type':\s'int',\s'config':\s\{\}\}\},
         \s*transformers=(\{'name':\s'(A|B)',\s*'transformer':\s<feanor\.schema\.ProjectionTransformer\sobject\sat\s\w+>,\s*'inputs':\s\[[^]]+\],\s'outputs':\s\[[^]]+\]\},?\s*)+
         show_header=True\s*
         \)
     ''', re.VERBOSE)
     self.assertRegex(str(schema), str_regex)
Пример #23
0
 def test_can_create_column_by_referencing_producer(self):
     schema = Schema()
     schema.add_producer('my_producer', type='int')
     schema.define_column('A', producer='my_producer')
     self.assertEqual(('A', ), schema.columns)
     self.assertEqual(1, len(schema.producers))
     self.assertEqual('my_producer', schema.producers[0].name)
     self.assertEqual('int', schema.producers[0].type)
     self.assertEqual({}, schema.producers[0].config)
     self.assertEqual(1, len(schema.transformers))
     self.assertEqual('A', schema.transformers[0].name)
     self.assertEqual(['my_producer'], schema.transformers[0].inputs)
     self.assertEqual(['A'], schema.transformers[0].outputs)
     self.assertEqual(ProjectionTransformer(1, 0),
                      schema.transformers[0].transformer)
Пример #24
0
    def test_can_repeat_input_name_of_transformer(self):
        schema = Schema()
        schema.define_column('A', type='int')
        schema.define_column('B', type='int')

        ret_none = FunctionalTransformer(lambda x, y: x + y)
        schema.add_transformer('my_transformer',
                               inputs=['A', 'A'],
                               outputs=['A'],
                               transformer=ret_none)
        self.assertEqual(len(schema.transformers), 1)
        self.assertEqual(
            SimpleNamespace(name='my_transformer',
                            inputs=['A', 'A'],
                            outputs=['A'],
                            transformer=ret_none), schema.transformers[0])
Пример #25
0
 def test_creates_different_producers_when_multiple_columns(self):
     schema = Schema()
     schema.define_column('A', type='int')
     schema.define_column('B', type='int')
     schema.define_column('C', type='int')
     self.assertEqual(('A', 'B', 'C'), schema.columns)
     self.assertEqual(3, len(schema.producers))
     producers = sorted(schema.producers, key=lambda x: x.name)
     self.assertEqual('A', producers[0].name)
     self.assertEqual('int', producers[0].type)
     self.assertEqual({}, producers[0].config)
     self.assertEqual('B', producers[1].name)
     self.assertEqual('int', producers[1].type)
     self.assertEqual({}, producers[1].config)
     self.assertEqual('C', producers[2].name)
     self.assertEqual('int', producers[2].type)
     self.assertEqual({}, producers[2].config)
Пример #26
0
    def test_can_use_transformer_to_filter_value(self):
        schema = Schema()
        schema.define_column('A', type='int')

        def test_transformer(unused):
            return None

        ret_none = FunctionalTransformer(test_transformer)
        schema.add_transformer('my_transformer',
                               inputs=['A'],
                               outputs=['A'],
                               transformer=ret_none)
        self.assertEqual(1, len(schema.transformers))
        self.assertEqual(
            SimpleNamespace(name='my_transformer',
                            inputs=['A'],
                            outputs=['A'],
                            transformer=ret_none), schema.transformers[0])
Пример #27
0
 def test_generate_data_raises_if_missing_size_parameters(self):
     with self.assertRaises(TypeError):
         generate_data(Schema(), self.library, mock.MagicMock())
Пример #28
0
 def test_can_add_columns_to_a_schema(self):
     schema = Schema()
     schema.add_column('A')
     schema.add_column('B')
     schema.add_column('C')
     self.assertEqual(('A', 'B', 'C'), schema.columns)
Пример #29
0
 def test_generate_data_raises_if_both_num_rows_and_num_bytes_are_specified(self):
     with self.assertRaises(TypeError):
         generate_data(Schema(), self.library, mock.MagicMock(), number_of_rows=10, byte_count=100)
Пример #30
0
 def test_can_specify_header_visibility(self):
     schema = Schema(show_header=False)
     self.assertFalse(schema.show_header)