Exemplo n.º 1
0
    def test_transform_map_type(self) -> None:
        column = ColumnMetadata('col1', None, 'map<string,map<string,int>>', 0)
        column.set_column_key(self.column_key)

        map_type = MapTypeMetadata(name='col1',
                                   parent=column,
                                   type_str='map<string,map<string,int>>')
        map_key = ScalarTypeMetadata(name='_map_key',
                                     parent=map_type,
                                     type_str='string')
        map_value = MapTypeMetadata(name='_map_value',
                                    parent=map_type,
                                    type_str='map<string,int>')
        inner_map_key = ScalarTypeMetadata(name='_map_key',
                                           parent=map_value,
                                           type_str='string')
        inner_scalar = ScalarTypeMetadata(name='_map_value',
                                          parent=map_value,
                                          type_str='int')

        map_type.map_key_type = map_key
        map_type.map_value_type = map_value
        map_value.map_key_type = inner_map_key
        map_value.map_value_type = inner_scalar

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, map_type)
Exemplo n.º 2
0
    def transform(self, record: Any) -> TableMetadata:
        if not isinstance(record, TableMetadata):
            raise Exception(
                f"ComplexTypeTransformer expects record of type TableMetadata, received {type(record)}"
            )

        for column in record.columns:
            try:
                column.set_column_key(record._get_col_key(column))
                column.set_type_metadata(
                    self._parsing_function(column.type, column.name, column))
            except Exception as e:
                # Default to scalar type if the type string cannot be parsed
                column.set_type_metadata(
                    ScalarTypeMetadata(name=column.name,
                                       parent=column,
                                       type_str=column.type))
                self.failure_count += 1
                LOGGER.warning(
                    f"Could not parse type for column {column.name} in table {record.name}: {e}"
                )
            else:
                self.success_count += 1

        return record
Exemplo n.º 3
0
    def test_transform_struct_map_array_nested_type(self) -> None:
        column = ColumnMetadata(
            'col1', None,
            'struct<nest1:map<string,array<int>>,nest2:array<string>>', 0)
        column.set_column_key(self.column_key)

        struct_type = StructTypeMetadata(
            name='col1',
            parent=column,
            type_str='struct<nest1:map<string,array<int>>,nest2:array<string>>'
        )
        inner_map = MapTypeMetadata(name='nest1',
                                    parent=struct_type,
                                    type_str='map<string,array<int>>')
        inner_map_key = ScalarTypeMetadata(name='_map_key',
                                           parent=inner_map,
                                           type_str='string')
        inner_map_array = ArrayTypeMetadata(name='_map_value',
                                            parent=inner_map,
                                            type_str='array<int>')
        inner_struct_array = ArrayTypeMetadata(name='nest2',
                                               parent=struct_type,
                                               type_str='array<string>')

        struct_type.struct_items = {
            'nest1': inner_map,
            'nest2': inner_struct_array
        }
        inner_map.map_key_type = inner_map_key
        inner_map.map_value_type = inner_map_array
        inner_map.sort_order = 0
        inner_struct_array.sort_order = 1

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, struct_type)
Exemplo n.º 4
0
    def test_hive_parser_with_failures(self) -> None:
        transformer = ComplexTypeTransformer()
        config = ConfigFactory.from_dict({
            PARSING_FUNCTION:
            'databuilder.utils.hive_complex_type_parser.parse_hive_type',
        })
        transformer.init(conf=config)

        column = ColumnMetadata('col1', 'array type', 'array<array<int>>', 0)
        table_metadata = TableMetadata('hive', 'gold', 'test_schema',
                                       'test_table', 'test_table', [column])

        default_scalar_type = ScalarTypeMetadata(name='col1',
                                                 parent=column,
                                                 type_str='array<array<int>>')

        with patch.object(transformer, '_parsing_function') as mock:
            mock.side_effect = MagicMock(
                side_effect=Exception('Could not parse'))

            result = transformer.transform(table_metadata)

            self.assertEqual(transformer.success_count, 0)
            self.assertEqual(transformer.failure_count, 1)
            for actual in result.columns:
                self.assertEqual(actual.get_type_metadata(),
                                 default_scalar_type)
Exemplo n.º 5
0
    def test_transform_map_struct_nested_type(self) -> None:
        column = ColumnMetadata('col1', None,
                                'map<string,struct<nest1:int,nest2:int>>', 0)
        column.set_column_key(self.column_key)

        map_type = MapTypeMetadata(
            name='col1',
            parent=column,
            type_str='map<string,struct<nest1:int,nest2:int>>')
        map_key = ScalarTypeMetadata(name='_map_key',
                                     parent=map_type,
                                     type_str='string')
        inner_struct = StructTypeMetadata(
            name='_map_value',
            parent=map_type,
            type_str='struct<nest1:int,nest2:int>')
        inner_scalar_nest1 = ScalarTypeMetadata(name='nest1',
                                                parent=inner_struct,
                                                type_str='int')
        inner_scalar_nest2 = ScalarTypeMetadata(name='nest2',
                                                parent=inner_struct,
                                                type_str='int')

        map_type.map_key_type = map_key
        map_type.map_value_type = inner_struct
        inner_struct.struct_items = {
            'nest1': inner_scalar_nest1,
            'nest2': inner_scalar_nest2
        }
        inner_scalar_nest1.sort_order = 0
        inner_scalar_nest2.sort_order = 1

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, map_type)
Exemplo n.º 6
0
    def test_transform_no_complex_type(self) -> None:
        column = ColumnMetadata('col1', None, 'int', 0)
        column.set_column_key(self.column_key)

        scalar_type = ScalarTypeMetadata(name='col1',
                                         parent=column,
                                         type_str='int')

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, scalar_type)
Exemplo n.º 7
0
    def test_transform_array_struct_nested_type(self) -> None:
        column = ColumnMetadata('col1', None,
                                'array<struct<nest1:int,nest2:int>>', 0)
        column.set_column_key(self.column_key)

        array_type = ArrayTypeMetadata(
            name='col1',
            parent=column,
            type_str='array<struct<nest1:int,nest2:int>>')
        inner_struct = StructTypeMetadata(
            name='_inner_',
            parent=array_type,
            type_str='struct<nest1:int,nest2:int>')
        inner_scalar_nest1 = ScalarTypeMetadata(name='nest1',
                                                parent=inner_struct,
                                                type_str='int')
        inner_scalar_nest2 = ScalarTypeMetadata(name='nest2',
                                                parent=inner_struct,
                                                type_str='int')

        array_type.array_inner_type = inner_struct
        inner_struct.struct_items = {
            'nest1': inner_scalar_nest1,
            'nest2': inner_scalar_nest2
        }
        inner_scalar_nest1.sort_order = 0
        inner_scalar_nest2.sort_order = 1

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, array_type)
Exemplo n.º 8
0
    def test_transform_union_as_nested_type(self) -> None:
        column = ColumnMetadata(
            'col1', None,
            'struct<nest1:uniontype<string,struct<c1:int,c2:string>>,'
            'nest2:uniontype<string,int>>', 0)
        column.set_column_key(self.column_key)

        struct_type = StructTypeMetadata(
            name='col1',
            parent=column,
            type_str='struct<nest1:uniontype<string,struct<c1:int,c2:string>>,'
            'nest2:uniontype<string,int>>')
        inner_scalar_nest1 = ScalarTypeMetadata(
            name='nest1',
            parent=struct_type,
            type_str='uniontype<string,struct<c1:int,c2:string>>')
        inner_scalar_nest2 = ScalarTypeMetadata(
            name='nest2', parent=struct_type, type_str='uniontype<string,int>')

        struct_type.struct_items = {
            'nest1': inner_scalar_nest1,
            'nest2': inner_scalar_nest2
        }
        inner_scalar_nest1.sort_order = 0
        inner_scalar_nest2.sort_order = 1

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, struct_type)
Exemplo n.º 9
0
    def test_transform_union_as_scalar_type(self) -> None:
        column = ColumnMetadata('col1', None,
                                'uniontype<string,struct<c1:int,c2:string>>',
                                0)
        column.set_column_key(self.column_key)

        struct_type = ScalarTypeMetadata(
            name='col1',
            parent=column,
            type_str='uniontype<string,struct<c1:int,c2:string>>')

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, struct_type)
Exemplo n.º 10
0
    def test_transform_array_map_nested_type(self) -> None:
        column = ColumnMetadata('col1', None, 'array<map<string,int>>', 0)
        column.set_column_key(self.column_key)

        array_type = ArrayTypeMetadata(name='col1',
                                       parent=column,
                                       type_str='array<map<string,int>>')
        inner_map = MapTypeMetadata(name='_inner_',
                                    parent=array_type,
                                    type_str='map<string,int>')
        inner_map_key = ScalarTypeMetadata(name='_map_key',
                                           parent=inner_map,
                                           type_str='string')
        inner_scalar = ScalarTypeMetadata(name='_map_value',
                                          parent=inner_map,
                                          type_str='int')

        array_type.array_inner_type = inner_map
        inner_map.map_key_type = inner_map_key
        inner_map.map_value_type = inner_scalar

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, array_type)
Exemplo n.º 11
0
def parse_hive_type(type_str: str, name: str, parent: Union[ColumnMetadata, TypeMetadata]) -> TypeMetadata:
    type_str = type_str.lower()
    parsed_type = complex_type.parseString(type_str, parseAll=True)

    if parsed_type.scalar_type:
        return ScalarTypeMetadata(name=name,
                                  parent=parent,
                                  type_str=type_str)

    results = parsed_type[0]
    if parsed_type.array_type:
        array_type_metadata = ArrayTypeMetadata(name=name,
                                                parent=parent,
                                                type_str=type_str)
        array_inner_type = parse_hive_type(results.type, '_inner_', array_type_metadata)
        if not isinstance(array_inner_type, ScalarTypeMetadata):
            array_type_metadata.array_inner_type = array_inner_type
        return array_type_metadata
    elif parsed_type.map_type:
        map_type_metadata = MapTypeMetadata(name=name,
                                            parent=parent,
                                            type_str=type_str)
        map_type_metadata.map_key_type = parse_hive_type(results.key, '_map_key', map_type_metadata)
        map_type_metadata.map_value_type = parse_hive_type(results.type, '_map_value', map_type_metadata)
        return map_type_metadata
    elif parsed_type.struct_type:
        struct_type_metadata = StructTypeMetadata(name=name,
                                                  parent=parent,
                                                  type_str=type_str)
        struct_items = {}
        for index, result in enumerate(results):
            struct_items[result.name] = parse_hive_type(result.type, result.name, struct_type_metadata)
            struct_items[result.name].sort_order = index

        struct_type_metadata.struct_items = struct_items
        return struct_type_metadata
    else:
        raise Exception(f"Unrecognized type: {type_str}")
Exemplo n.º 12
0
    def test_transform_non_alpha_only_types(self) -> None:
        column = ColumnMetadata(
            'col1', None, 'struct<nest1:decimal(10,2),nest2:double precision,'
            'nest3:varchar(32),nest4:map<varchar(32),decimal(10,2)>,'
            'nest5:interval_day_time>', 0)
        column.set_column_key(self.column_key)

        struct_type = StructTypeMetadata(
            name='col1',
            parent=column,
            type_str='struct<nest1:decimal(10,2),nest2:double precision,'
            'nest3:varchar(32),nest4:map<varchar(32),decimal(10,2)>,'
            'nest5:interval_day_time>')
        inner_scalar_nest1 = ScalarTypeMetadata(name='nest1',
                                                parent=struct_type,
                                                type_str='decimal(10,2)')
        inner_scalar_nest2 = ScalarTypeMetadata(name='nest2',
                                                parent=struct_type,
                                                type_str='double precision')
        inner_scalar_nest3 = ScalarTypeMetadata(name='nest3',
                                                parent=struct_type,
                                                type_str='varchar(32)')
        inner_map_nest4 = MapTypeMetadata(
            name='nest4',
            parent=struct_type,
            type_str='map<varchar(32),decimal(10,2)>')
        inner_map_nest4_key = ScalarTypeMetadata(name='_map_key',
                                                 parent=inner_map_nest4,
                                                 type_str='varchar(32)')
        inner_map_nest4_value = ScalarTypeMetadata(name='_map_value',
                                                   parent=inner_map_nest4,
                                                   type_str='decimal(10,2)')
        inner_scalar_nest5 = ScalarTypeMetadata(name='nest5',
                                                parent=struct_type,
                                                type_str='interval_day_time')

        struct_type.struct_items = {
            'nest1': inner_scalar_nest1,
            'nest2': inner_scalar_nest2,
            'nest3': inner_scalar_nest3,
            'nest4': inner_map_nest4,
            'nest5': inner_scalar_nest5
        }
        inner_map_nest4.map_key_type = inner_map_nest4_key
        inner_map_nest4.map_value_type = inner_map_nest4_value
        inner_scalar_nest1.sort_order = 0
        inner_scalar_nest2.sort_order = 1
        inner_scalar_nest3.sort_order = 2
        inner_map_nest4.sort_order = 3
        inner_scalar_nest5.sort_order = 4

        actual = parse_hive_type(column.type, column.name, column)
        self.assertEqual(actual, struct_type)
Exemplo n.º 13
0
    def test_serialize_struct_type_metadata(self) -> None:
        column = ColumnMetadata(
            'col1', None,
            'struct<c1:struct<c2:struct<c3:string,c4:string>>,c5:string>', 0)
        column.set_column_key(self.column_key)

        struct_type_metadata = StructTypeMetadata(
            name='col1',
            parent=column,
            type_str=
            'struct<c1:struct<c2:struct<c3:string,c4:string>>,c5:string>')
        nested_struct_type_metadata_level1 = StructTypeMetadata(
            name='c1',
            parent=struct_type_metadata,
            type_str='struct<c2:struct<c3:string,c4:string>>')
        nested_struct_type_metadata_level2 = StructTypeMetadata(
            name='c2',
            parent=nested_struct_type_metadata_level1,
            type_str='struct<c3:string,c4:string>')
        nested_scalar_type_metadata_c3 = ScalarTypeMetadata(
            name='c3',
            parent=nested_struct_type_metadata_level2,
            type_str='string',
            description='description of c3')
        nested_scalar_type_metadata_c4 = ScalarTypeMetadata(
            name='c4',
            parent=nested_struct_type_metadata_level2,
            type_str='string')
        nested_scalar_type_metadata_c5 = ScalarTypeMetadata(
            name='c5',
            parent=struct_type_metadata,
            type_str='string',
            description='description of c5')

        struct_type_metadata.struct_items = {
            'c1': nested_struct_type_metadata_level1,
            'c5': nested_scalar_type_metadata_c5
        }
        nested_struct_type_metadata_level1.struct_items = {
            'c2': nested_struct_type_metadata_level2
        }
        nested_struct_type_metadata_level2.struct_items = {
            'c3': nested_scalar_type_metadata_c3,
            'c4': nested_scalar_type_metadata_c4
        }
        nested_struct_type_metadata_level1.sort_order = 0
        nested_scalar_type_metadata_c5.sort_order = 1
        nested_struct_type_metadata_level2.sort_order = 0
        nested_scalar_type_metadata_c3.sort_order = 0
        nested_scalar_type_metadata_c4.sort_order = 1

        expected_nodes = [{
            'kind':
            'struct',
            'name':
            'col1',
            'data_type':
            'struct<c1:struct<c2:struct<c3:string,c4:string>>,c5:string>',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1'
        }, {
            'kind':
            'struct',
            'name':
            'c1',
            'data_type':
            'struct<c2:struct<c3:string,c4:string>>',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            0,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1'
        }, {
            'kind':
            'struct',
            'name':
            'c2',
            'data_type':
            'struct<c3:string,c4:string>',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            0,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2'
        }, {
            'kind':
            'scalar',
            'name':
            'c3',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            0,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3'
        }, {
            'description': 'description of c3',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3/_description',
            'LABEL': 'Description',
            'description_source': 'description'
        }, {
            'kind':
            'scalar',
            'name':
            'c4',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            1,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c4'
        }, {
            'kind':
            'scalar',
            'name':
            'c5',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            1,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5'
        }, {
            'description': 'description of c5',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5/_description',
            'LABEL': 'Description',
            'description_source': 'description'
        }]
        expected_rels = [{
            'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Column',
            'TYPE': 'TYPE_METADATA',
            'REVERSE_TYPE': 'TYPE_METADATA_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3/_description',
            'START_LABEL': 'Type_Metadata',
            'END_LABEL': 'Description',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c3',
            'TYPE': 'DESCRIPTION',
            'REVERSE_TYPE': 'DESCRIPTION_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2/c4',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c1/c2',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5/_description',
            'START_LABEL': 'Type_Metadata',
            'END_LABEL': 'Description',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/c5',
            'TYPE': 'DESCRIPTION',
            'REVERSE_TYPE': 'DESCRIPTION_OF'
        }]

        node_row = struct_type_metadata.next_node()
        actual = []
        while node_row:
            node_row_serialized = neo4_serializer.serialize_node(node_row)
            actual.append(node_row_serialized)
            node_row = struct_type_metadata.next_node()
        for i in range(0, len(expected_nodes)):
            self.assertEqual(actual[i], expected_nodes[i])

        relation_row = struct_type_metadata.next_relation()
        actual = []
        while relation_row:
            relation_row_serialized = neo4_serializer.serialize_relationship(
                relation_row)
            actual.append(relation_row_serialized)
            relation_row = struct_type_metadata.next_relation()
        for i in range(0, len(expected_rels)):
            self.assertEqual(actual[i], expected_rels[i])
Exemplo n.º 14
0
    def test_serialize_map_struct_type_metadata(self) -> None:
        column = ColumnMetadata(
            'col1', None,
            'map<string,struct<c1:map<string,string>,c2:string>>', 0)
        column.set_column_key(self.column_key)

        map_type_metadata = MapTypeMetadata(
            name='col1',
            parent=column,
            type_str='map<string,struct<c1:map<string,string>,c2:string>>')
        map_key = ScalarTypeMetadata(name='_map_key',
                                     parent=map_type_metadata,
                                     type_str='string')
        nested_struct_type_metadata_level1 = StructTypeMetadata(
            name='_map_value',
            parent=map_type_metadata,
            type_str='struct<c1:map<string,string>,c2:string>')
        nested_map_type_metadata_level2 = MapTypeMetadata(
            name='c1',
            parent=nested_struct_type_metadata_level1,
            type_str='map<string,string>')
        nested_map_key = ScalarTypeMetadata(
            name='_map_key',
            parent=nested_map_type_metadata_level2,
            type_str='string')
        nested_scalar_type_metadata_level3 = ScalarTypeMetadata(
            name='_map_value',
            parent=nested_map_type_metadata_level2,
            type_str='string')
        nested_scalar_type_metadata_level2 = ScalarTypeMetadata(
            name='c2',
            parent=nested_struct_type_metadata_level1,
            type_str='string')

        map_type_metadata.map_key_type = map_key
        map_type_metadata.map_value_type = nested_struct_type_metadata_level1
        nested_struct_type_metadata_level1.struct_items = {
            'c1': nested_map_type_metadata_level2,
            'c2': nested_scalar_type_metadata_level2
        }
        nested_map_type_metadata_level2.map_key_type = nested_map_key
        nested_map_type_metadata_level2.map_value_type = nested_scalar_type_metadata_level3
        nested_map_type_metadata_level2.sort_order = 0
        nested_scalar_type_metadata_level2.sort_order = 1

        expected_nodes = [{
            'kind':
            'map',
            'name':
            'col1',
            'data_type':
            'map<string,struct<c1:map<string,string>,c2:string>>',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1'
        }, {
            'kind':
            'scalar',
            'name':
            '_map_key',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_key'
        }, {
            'kind':
            'struct',
            'name':
            '_map_value',
            'data_type':
            'struct<c1:map<string,string>,c2:string>',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value'
        }, {
            'kind':
            'map',
            'name':
            'c1',
            'data_type':
            'map<string,string>',
            'sort_order:UNQUOTED':
            0,
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1'
        }, {
            'kind':
            'scalar',
            'name':
            '_map_key',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_key'
        }, {
            'kind':
            'scalar',
            'name':
            '_map_value',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_value'
        }, {
            'kind':
            'scalar',
            'name':
            'c2',
            'data_type':
            'string',
            'LABEL':
            'Type_Metadata',
            'sort_order:UNQUOTED':
            1,
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c2'
        }]
        expected_rels = [{
            'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Column',
            'TYPE': 'TYPE_METADATA',
            'REVERSE_TYPE': 'TYPE_METADATA_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_key',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_key',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1/_map_value',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value/c2',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_map_value',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }]

        node_row = map_type_metadata.next_node()
        actual = []
        while node_row:
            node_row_serialized = neo4_serializer.serialize_node(node_row)
            actual.append(node_row_serialized)
            node_row = map_type_metadata.next_node()
        for i in range(0, len(expected_nodes)):
            self.assertEqual(actual[i], expected_nodes[i])

        relation_row = map_type_metadata.next_relation()
        actual = []
        while relation_row:
            relation_row_serialized = neo4_serializer.serialize_relationship(
                relation_row)
            actual.append(relation_row_serialized)
            relation_row = map_type_metadata.next_relation()
        for i in range(0, len(expected_rels)):
            self.assertEqual(actual[i], expected_rels[i])
Exemplo n.º 15
0
    def test_serialize_array_type_metadata(self) -> None:
        column = ColumnMetadata('col1', None, 'array<array<array<string>>>', 0)
        column.set_column_key(self.column_key)

        array_type_metadata = ArrayTypeMetadata(
            name='col1', parent=column, type_str='array<array<array<string>>>')
        nested_array_type_metadata_level1 = ArrayTypeMetadata(
            name='_inner_',
            parent=array_type_metadata,
            type_str='array<array<string>>')
        nested_array_type_metadata_level2 = ArrayTypeMetadata(
            name='_inner_',
            parent=nested_array_type_metadata_level1,
            type_str='array<string>')
        nested_scalar_type_metadata_level3 = ScalarTypeMetadata(
            name='_inner_',
            parent=nested_array_type_metadata_level2,
            type_str='string')

        array_type_metadata.array_inner_type = nested_array_type_metadata_level1
        nested_array_type_metadata_level1.array_inner_type = nested_array_type_metadata_level2
        nested_array_type_metadata_level2.array_inner_type = nested_scalar_type_metadata_level3

        expected_nodes = [{
            'kind':
            'array',
            'name':
            'col1',
            'LABEL':
            'Type_Metadata',
            'data_type':
            'array<array<array<string>>>',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1'
        }, {
            'kind':
            'array',
            'name':
            '_inner_',
            'LABEL':
            'Type_Metadata',
            'data_type':
            'array<array<string>>',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_'
        }, {
            'kind':
            'array',
            'name':
            '_inner_',
            'LABEL':
            'Type_Metadata',
            'data_type':
            'array<string>',
            'KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_/_inner_'
        }]
        expected_rels = [{
            'END_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Column',
            'TYPE': 'TYPE_METADATA',
            'REVERSE_TYPE': 'TYPE_METADATA_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_',
            'START_KEY': 'hive://gold.test_schema1/test_table1/col1/type/col1',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }, {
            'END_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_/_inner_',
            'START_KEY':
            'hive://gold.test_schema1/test_table1/col1/type/col1/_inner_',
            'END_LABEL': 'Type_Metadata',
            'START_LABEL': 'Type_Metadata',
            'TYPE': 'SUBTYPE',
            'REVERSE_TYPE': 'SUBTYPE_OF'
        }]

        node_row = array_type_metadata.next_node()
        actual = []
        while node_row:
            node_row_serialized = neo4_serializer.serialize_node(node_row)
            actual.append(node_row_serialized)
            node_row = array_type_metadata.next_node()
        for i in range(0, len(expected_nodes)):
            self.assertEqual(actual[i], expected_nodes[i])

        relation_row = array_type_metadata.next_relation()
        actual = []
        while relation_row:
            relation_row_serialized = neo4_serializer.serialize_relationship(
                relation_row)
            actual.append(relation_row_serialized)
            relation_row = array_type_metadata.next_relation()
        for i in range(0, len(expected_rels)):
            self.assertEqual(actual[i], expected_rels[i])