Python Table.get_columns_type_stats примеры использования

Язык программирования: Python

Пространство имен/Пакет: parsons

Класс/Тип: Table

Метод/Функция: get_columns_type_stats

Примеров на hotexamples.com: 2

Python Table.get_columns_type_stats - 2 примера найдено. Это лучшие примеры Python кода для parsons.Table.get_columns_type_stats, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Table(30)

from_csv(9)

concat(6)

from_csv_string(4)

long_table(4)

from_json(4)

rename_column(3)

move_column(2)

map_columns(2)

remove_column(2)

get_columns_type_stats(2)

add_column(2)

reduce_rows(1)

materialize_to_file(1)

from_redshift(1)

match_columns(1)

get_column_max_width(1)

remove_null_rows(1)

materialize(1)

from_dataframe(1)

from_postgres(1)

from_columns(1)

fillna_column(1)

fill_column(1)

empty_column(1)

cut(1)

convert_table(1)

convert_columns_to_str(1)

convert_column(1)

column_data(1)

coalesce_columns(1)

chunk(1)

append_csv(1)

row_data(1)

Пример #1

Показать файл

Файл: test_etl.py Проект: bxjw/parsons

    def test_convert_columns_to_str(self):
        # Test that all columns are string
        mixed_raw = [{
            'col1': 1,
            'col2': 2,
            'col3': 3
        }, {
            'col1': 'one',
            'col2': 2,
            'col3': [3, 'three', 3.0]
        }, {
            'col1': {
                'one': 1,
                "two": 2.0
            },
            'col2': None,
            "col3": 'three'
        }]
        tbl = Table(mixed_raw)
        tbl.convert_columns_to_str()

        cols = tbl.get_columns_type_stats()
        type_set = {i for x in cols for i in x['type']}
        self.assertTrue('str' in type_set and len(type_set) == 1)

Пример #2

Показать файл

    def process_json(self, json_blob, obj_type, tidy=False):
        # Internal method for converting most types of json responses into a list of Parsons tables

        # Output goes here
        table_list = []

        # Original table & columns
        obj_table = Table(json_blob)
        cols = obj_table.get_columns_type_stats()
        list_cols = [x['name'] for x in cols if 'list' in x['type']]
        dict_cols = [x['name'] for x in cols if 'dict' in x['type']]

        # Unpack all list columns
        if len(list_cols) > 0:
            for l in list_cols:
                # Check for nested data
                list_rows = obj_table.select_rows(lambda row: isinstance(
                    row[l], list) and any(isinstance(x, dict) for x in row[l]))
                # Add separate long table for each column with nested data
                if list_rows.num_rows > 0:
                    logger.debug(l, 'is a nested column')
                    if len([x for x in cols if x['name'] == l]) == 1:
                        table_list.append({
                            'name':
                            f'{obj_type}_{l}',
                            'tbl':
                            obj_table.long_table(['id'], l)
                        })
                    else:
                        # Ignore if column doesn't exist (or has multiples)
                        continue
                else:
                    if tidy is False:
                        logger.debug(l, 'is a normal list column')
                        obj_table.unpack_list(l)

        # Unpack all dict columns
        if len(dict_cols) > 0 and tidy is False:
            for d in dict_cols:
                logger.debug(d, 'is a dict column')
                obj_table.unpack_dict(d)

        if tidy is not False:
            packed_cols = list_cols + dict_cols
            for p in packed_cols:
                if p in obj_table.columns:
                    logger.debug(p, 'needs to be unpacked into rows')

                    # Determine whether or not to expand based on tidy
                    unpacked_tidy = obj_table.unpack_nested_columns_as_rows(
                        p, expand_original=tidy)
                    # Check if column was removed as sign it was unpacked into separate table
                    if p not in obj_table.columns:
                        table_list.append({
                            'name': f'{obj_type}_{p}',
                            'tbl': unpacked_tidy
                        })
                    else:
                        obj_table = unpacked_tidy

        # Original table will have had all nested columns removed
        if len(obj_table.columns) > 1:
            table_list.append({'name': obj_type, 'tbl': obj_table})

        return table_list