示例#1
0
    def test_convert_object_with_simple_list_with_join_serialization(self):
        # given:
        contain_list = {
            'first': 'hello',
            'list': [1, 2, 3, 4],
            'list2': ['one', 'two'],
        }
        schema = skinfer.generate_schema(contain_list)
        serialize_options = dict(method='join_values')
        schema['properties']['list']['flatson_serialize'] = serialize_options

        # when:
        f = Flatson(schema=schema)

        # then:
        self.assertEquals(['first', 'list', 'list2'], f.fieldnames)
        self.assertEquals(['hello', '1,2,3,4', '["one","two"]'],
                          f.flatten(contain_list))

        # and when:
        schema['properties']['list']['flatson_serialize']['separator'] = '+'
        f = Flatson(schema=schema)

        # then:
        self.assertEquals(['hello', '1+2+3+4', '["one","two"]'],
                          f.flatten(contain_list))
示例#2
0
    def test_convert_object_with_simple_list_with_default_serialization(self):
        contain_list = {
            'first': 'hello',
            'list': [1, 2, 3, 4],
            'list2': ['one', 'two'],
        }
        schema = skinfer.generate_schema(contain_list)

        f = Flatson(schema=schema)
        self.assertEquals(['first', 'list', 'list2'], f.fieldnames)
        self.assertEquals(['hello', '[1,2,3,4]', '["one","two"]'], f.flatten(contain_list))
示例#3
0
    def test_lists_with_objects_with_default_serialization(self):
        # given:
        schema = skinfer.generate_schema(SAMPLE_WITH_LIST_OF_OBJECTS)
        f = Flatson(schema=schema)

        # when:
        result = f.flatten(SAMPLE_WITH_LIST_OF_OBJECTS)

        # then:
        expected = '[{"key1":"value1","key2":"value2"},{"key1":"value3","key2":"value4"}]'
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', expected], result)
示例#4
0
    def test_convert_object_with_simple_list_with_default_serialization(self):
        contain_list = {
            'first': 'hello',
            'list': [1, 2, 3, 4],
            'list2': ['one', 'two'],
        }
        schema = skinfer.generate_schema(contain_list)

        f = Flatson(schema=schema)
        self.assertEquals(['first', 'list', 'list2'], f.fieldnames)
        self.assertEquals(['hello', '[1,2,3,4]', '["one","two"]'],
                          f.flatten(contain_list))
示例#5
0
 def test_convert_nested_objects(self):
     contain_nested_object = {
         'first': 'hello',
         'second': {
             'one': 1,
             'two': 2,
         }
     }
     schema = skinfer.generate_schema(contain_nested_object)
     f = Flatson(schema=schema)
     self.assertEquals(['first', 'second.one', 'second.two'], f.fieldnames)
     self.assertEquals(['hello', 1, 2], f.flatten(contain_nested_object))
示例#6
0
 def test_convert_nested_objects(self):
     contain_nested_object = {
         'first': 'hello',
         'second': {
             'one': 1,
             'two': 2,
         }
     }
     schema = skinfer.generate_schema(contain_nested_object)
     f = Flatson(schema=schema)
     self.assertEquals(['first', 'second.one', 'second.two'], f.fieldnames)
     self.assertEquals(['hello', 1, 2], f.flatten(contain_nested_object))
示例#7
0
    def test_lists_with_objects_with_default_serialization(self):
        # given:
        schema = skinfer.generate_schema(SAMPLE_WITH_LIST_OF_OBJECTS)
        f = Flatson(schema=schema)

        # when:
        result = f.flatten(SAMPLE_WITH_LIST_OF_OBJECTS)

        # then:
        expected = '[{"key1":"value1","key2":"value2"},{"key1":"value3","key2":"value4"}]'
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', expected], result)
示例#8
0
    def test_convert_object_with_nested_simple_list_with_default_serialization(self):
        contain_list = {
            'first': 'hello',
            'second': {
                'list1': [1, 2, 3, 4],
                'word': 'world',

            },
        }
        schema = skinfer.generate_schema(contain_list)
        f = Flatson(schema=schema)
        self.assertEquals(['first', 'second.list1', 'second.word'], f.fieldnames)
        self.assertEquals(['hello', '[1,2,3,4]', 'world'], f.flatten(contain_list))
示例#9
0
    def test_array_serialization_with_extract_first(self):
        # given:
        sample = {'first': 'hello', 'list': ['one', 'two']}
        schema = skinfer.generate_schema(sample)
        serialize_options = dict(method='extract_first')
        schema['properties']['list']['flatson_serialize'] = serialize_options

        # when:
        f = Flatson(schema=schema)
        result = f.flatten(sample)

        # then:
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', 'one'], result)

        # and when:
        sample2 = {'first': 'hello', 'list': []}
        result = f.flatten(sample2)

        # then:
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', None], result)
示例#10
0
    def test_array_serialization_with_extract_first(self):
        # given:
        sample = {'first': 'hello', 'list': ['one', 'two']}
        schema = skinfer.generate_schema(sample)
        serialize_options = dict(method='extract_first')
        schema['properties']['list']['flatson_serialize'] = serialize_options

        # when:
        f = Flatson(schema=schema)
        result = f.flatten(sample)

        # then:
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', 'one'], result)

        # and when:
        sample2 = {'first': 'hello', 'list': []}
        result = f.flatten(sample2)

        # then:
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', None], result)
示例#11
0
    def test_array_serialization_with_extract_key_values(self):
        # given:
        schema = skinfer.generate_schema(SAMPLE_WITH_LIST_OF_OBJECTS)
        serialize_options = dict(method='extract_key_values')

        # when:
        schema['properties']['list']['flatson_serialize'] = serialize_options
        f = Flatson(schema=schema)
        result = f.flatten(SAMPLE_WITH_LIST_OF_OBJECTS)

        # then:
        expected = 'key1:value1,key2:value2;key1:value3,key2:value4'
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', expected], result)
示例#12
0
    def test_array_serialization_with_extract_key_values(self):
        # given:
        schema = skinfer.generate_schema(SAMPLE_WITH_LIST_OF_OBJECTS)
        serialize_options = dict(method='extract_key_values')

        # when:
        schema['properties']['list']['flatson_serialize'] = serialize_options
        f = Flatson(schema=schema)
        result = f.flatten(SAMPLE_WITH_LIST_OF_OBJECTS)

        # then:
        expected = 'key1:value1,key2:value2;key1:value3,key2:value4'
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', expected], result)
示例#13
0
    def test_register_custom_serialization_method(self):
        # given:
        sample = {'first': 'hello', 'list': ['one', 'two']}
        schema = skinfer.generate_schema(sample)
        serialize_options = dict(method='always_one')
        schema['properties']['list']['flatson_serialize'] = serialize_options

        # when:
        f = Flatson(schema=schema)
        f.register_serialization_method('always_one', lambda _v, **kw: '1')
        result = f.flatten(sample)

        # then:
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', '1'], result)
示例#14
0
    def test_convert_object_with_simple_list_with_join_serialization(self):
        # given:
        contain_list = {
            'first': 'hello',
            'list': [1, 2, 3, 4],
            'list2': ['one', 'two'],
        }
        schema = skinfer.generate_schema(contain_list)
        serialize_options = dict(method='join_values')
        schema['properties']['list']['flatson_serialize'] = serialize_options

        # when:
        f = Flatson(schema=schema)

        # then:
        self.assertEquals(['first', 'list', 'list2'], f.fieldnames)
        self.assertEquals(['hello', '1,2,3,4', '["one","two"]'], f.flatten(contain_list))

        # and when:
        schema['properties']['list']['flatson_serialize']['separator'] = '+'
        f = Flatson(schema=schema)

        # then:
        self.assertEquals(['hello', '1+2+3+4', '["one","two"]'], f.flatten(contain_list))
示例#15
0
 def test_convert_object_with_nested_simple_list_with_default_serialization(
         self):
     contain_list = {
         'first': 'hello',
         'second': {
             'list1': [1, 2, 3, 4],
             'word': 'world',
         },
     }
     schema = skinfer.generate_schema(contain_list)
     f = Flatson(schema=schema)
     self.assertEquals(['first', 'second.list1', 'second.word'],
                       f.fieldnames)
     self.assertEquals(['hello', '[1,2,3,4]', 'world'],
                       f.flatten(contain_list))
示例#16
0
    def test_register_custom_serialization_method(self):
        # given:
        sample = {'first': 'hello', 'list': ['one', 'two']}
        schema = skinfer.generate_schema(sample)
        serialize_options = dict(method='always_one')
        schema['properties']['list']['flatson_serialize'] = serialize_options

        # when:
        f = Flatson(schema=schema)
        f.register_serialization_method('always_one', lambda _v, **kw: '1')
        result = f.flatten(sample)

        # then:
        self.assertEquals(['first', 'list'], f.fieldnames)
        self.assertEquals(['hello', '1'], result)
示例#17
0
class ODOWriter(BaseWriter):
    """
    Writes items to a odo destination. https://odo.readthedocs.org/en/latest/

    Needed parameters:

        - schema (object)
            schema object.

        - odo_uri (str)
            ODO valid destination uri.
    """

    requirements = {
        'schema': {
            'type': object,
            'required': True
        },
        'odo_uri': {
            'type': six.string_types,
            'required': True
        }
    }

    def __init__(self, options):
        super(ODOWriter, self).__init__(options)
        from flatson import Flatson
        schema = self.read_option('schema', None)
        self.odo_uri = self.read_option('odo_uri', None)
        self.flatson = Flatson(schema)
        self.logger.info('ODOWriter has been initiated. Writing to: {}'.format(
            self.odo_uri))

    @retry_long
    def write(self, dump_path, group_key=''):
        from odo import odo, resource, discover
        import pandas as pd
        with gzip.open(dump_path) as f:
            lines = [
                json.loads(line.replace('\n', '')) for line in f.readlines()
            ]
        flattened_lines = (self.flatson.flatten(line) for line in lines)
        pf = pd.DataFrame(flattened_lines, columns=self.flatson.fieldnames)
        dshape = discover(pf)
        odo(pf, resource(self.odo_uri), dshape=dshape)
示例#18
0
class ODOWriter(BaseWriter):
    """
    Writes items to a odo destination. https://odo.readthedocs.org/en/latest/

    Needed parameters:

        - schema (object)
            schema object.

        - odo_uri (str)
            ODO valid destination uri.
    """

    requirements = {
        'schema': {'type': object, 'required': True},
        'odo_uri': {'type': basestring, 'required': True}
    }

    def __init__(self, options):
        super(ODOWriter, self).__init__(options)
        from flatson import Flatson
        schema = self.read_option('schema', None)
        self.odo_uri = self.read_option('odo_uri', None)
        self.flatson = Flatson(schema)
        self.logger.info('ODOWriter has been initiated. Writing to: {}'.format(self.odo_uri))

    @retry_long
    def write(self, dump_path, group_key=''):
        from odo import odo, resource, discover
        import pandas as pd
        with gzip.open(dump_path) as f:
            lines = [json.loads(line.replace('\n', '')) for line in f.readlines()]
        flattened_lines = (self.flatson.flatten(line) for line in lines)
        pf = pd.DataFrame(flattened_lines, columns=self.flatson.fieldnames)
        dshape = discover(pf)
        odo(pf, resource(self.odo_uri), dshape=dshape)
示例#19
0
 def test_when_no_declared_properties_flatten_empty_list(self):
     f = Flatson(schema=EMPTY_SCHEMA)
     result = f.flatten({'a_prop': 'a_value'})
     self.assertEquals([], result)
示例#20
0
 def test_convert_simple_objects(self):
     f = Flatson(schema=SIMPLE_SCHEMA)
     self.assertEquals(['a_prop'], f.fieldnames)
     self.assertEquals(['a_value'], f.flatten({'a_prop': 'a_value'}))
     self.assertEquals([None], f.flatten({}))
示例#21
0
 def test_when_no_declared_properties_flatten_empty_list(self):
     f = Flatson(schema=EMPTY_SCHEMA)
     result = f.flatten({'a_prop': 'a_value'})
     self.assertEquals([], result)
示例#22
0
 def test_convert_simple_objects(self):
     f = Flatson(schema=SIMPLE_SCHEMA)
     self.assertEquals(['a_prop'], f.fieldnames)
     self.assertEquals(['a_value'], f.flatten({'a_prop': 'a_value'}))
     self.assertEquals([None], f.flatten({}))