Пример #1
0
    def test_convert_columns_to_str(self):
        # Test that all columns are string
        mixed_raw = [{
            'col1': 1,
            'col2': 2,
            'col3': 3
        }, {
            'col1': 'one',
            'col2': 2,
            'col3': [3, 'three', 3.0]
        }, {
            'col1': {
                'one': 1,
                "two": 2.0
            },
            'col2': None,
            "col3": 'three'
        }]
        tbl = Table(mixed_raw)
        tbl.convert_columns_to_str()

        cols = tbl.get_columns_type_stats()
        type_set = {i for x in cols for i in x['type']}
        self.assertTrue('str' in type_set and len(type_set) == 1)
Пример #2
0
    def process_json(self, json_blob, obj_type, tidy=False):
        # Internal method for converting most types of json responses into a list of Parsons tables

        # Output goes here
        table_list = []

        # Original table & columns
        obj_table = Table(json_blob)
        cols = obj_table.get_columns_type_stats()
        list_cols = [x['name'] for x in cols if 'list' in x['type']]
        dict_cols = [x['name'] for x in cols if 'dict' in x['type']]

        # Unpack all list columns
        if len(list_cols) > 0:
            for l in list_cols:
                # Check for nested data
                list_rows = obj_table.select_rows(lambda row: isinstance(
                    row[l], list) and any(isinstance(x, dict) for x in row[l]))
                # Add separate long table for each column with nested data
                if list_rows.num_rows > 0:
                    logger.debug(l, 'is a nested column')
                    if len([x for x in cols if x['name'] == l]) == 1:
                        table_list.append({
                            'name':
                            f'{obj_type}_{l}',
                            'tbl':
                            obj_table.long_table(['id'], l)
                        })
                    else:
                        # Ignore if column doesn't exist (or has multiples)
                        continue
                else:
                    if tidy is False:
                        logger.debug(l, 'is a normal list column')
                        obj_table.unpack_list(l)

        # Unpack all dict columns
        if len(dict_cols) > 0 and tidy is False:
            for d in dict_cols:
                logger.debug(d, 'is a dict column')
                obj_table.unpack_dict(d)

        if tidy is not False:
            packed_cols = list_cols + dict_cols
            for p in packed_cols:
                if p in obj_table.columns:
                    logger.debug(p, 'needs to be unpacked into rows')

                    # Determine whether or not to expand based on tidy
                    unpacked_tidy = obj_table.unpack_nested_columns_as_rows(
                        p, expand_original=tidy)
                    # Check if column was removed as sign it was unpacked into separate table
                    if p not in obj_table.columns:
                        table_list.append({
                            'name': f'{obj_type}_{p}',
                            'tbl': unpacked_tidy
                        })
                    else:
                        obj_table = unpacked_tidy

        # Original table will have had all nested columns removed
        if len(obj_table.columns) > 1:
            table_list.append({'name': obj_type, 'tbl': obj_table})

        return table_list