Пример #1
0
def build_row_desc(data, preserve_index=False):

    if not isinstance(data, pd.DataFrame):
        # Once https://issues.apache.org/jira/browse/ARROW-1576 is complete
        # we can support pa.Table here too
        raise TypeError("Create table is not supported for type {}. "
                        "Use a pandas DataFrame, or perform the create "
                        "separately".format(type(data)))

    if preserve_index:
        data = data.reset_index()
    dtypes = [(col, get_mapd_dtype(data[col])) for col in data.columns]
    # row_desc :: List<TColumnType>
    row_desc = [
        TColumnType(name, TTypeInfo(getattr(TDatumType, mapd_type)))
        for name, mapd_type in dtypes
    ]
    return row_desc
Пример #2
0
    def test_extract_row_details(self):
        data = [
            TColumnType(col_name='date_',
                        col_type=TTypeInfo(type=6, encoding=4, nullable=True,
                                           is_array=False, precision=0,
                                           scale=0, comp_param=32),
                        is_reserved_keyword=False, src_name=''),
            TColumnType(col_name='trans',
                        col_type=TTypeInfo(type=6, encoding=4, nullable=True,
                                           is_array=False, precision=0,
                                           scale=0, comp_param=32),
                        is_reserved_keyword=False, src_name=''),
            TColumnType(col_name='symbol',
                        col_type=TTypeInfo(type=6, encoding=4, nullable=True,
                                           is_array=False, precision=0,
                                           scale=0, comp_param=32),
                        is_reserved_keyword=False, src_name=''),
            TColumnType(col_name='qty',
                        col_type=TTypeInfo(type=1, encoding=0, nullable=True,
                                           is_array=False, precision=0,
                                           scale=0, comp_param=0),
                        is_reserved_keyword=False, src_name=''),
            TColumnType(col_name='price',
                        col_type=TTypeInfo(type=3, encoding=0, nullable=True,
                                           is_array=False, precision=0,
                                           scale=0, comp_param=0),
                        is_reserved_keyword=False, src_name=''),
            TColumnType(col_name='vol',
                        col_type=TTypeInfo(type=3, encoding=0, nullable=True,
                                           is_array=False, precision=0,
                                           scale=0, comp_param=0),
                        is_reserved_keyword=False, src_name='')]
        result = _extract_column_details(data)

        expected = [
            ColumnDetails(name='date_', type='STR', nullable=True, precision=0,
                          scale=0, comp_param=32),
            ColumnDetails(name='trans', type='STR', nullable=True, precision=0,
                          scale=0, comp_param=32),
            ColumnDetails(name='symbol', type='STR', nullable=True,
                          precision=0, scale=0, comp_param=32),
            ColumnDetails(name='qty', type='INT', nullable=True, precision=0,
                          scale=0, comp_param=0),
            ColumnDetails(name='price', type='FLOAT', nullable=True,
                          precision=0, scale=0, comp_param=0),
            ColumnDetails(name='vol', type='FLOAT', nullable=True, precision=0,
                          scale=0, comp_param=0)
        ]
        assert result == expected
Пример #3
0
    def test_nulls_handled(self):
        from mapd.ttypes import (TRowSet, TColumnType, TTypeInfo, TColumn,
                                 TColumnData, TQueryResult, TDatum, TRow,
                                 TDatumVal)

        rs = TQueryResult(
            TRowSet(
                row_desc=[
                    TColumnType(col_name='a',
                                col_type=TTypeInfo(type=0, nullable=True)),
                    TColumnType(col_name='b',
                                col_type=TTypeInfo(type=1, nullable=True)),
                    TColumnType(col_name='c',
                                col_type=TTypeInfo(type=2, nullable=True)),
                    TColumnType(col_name='d',
                                col_type=TTypeInfo(type=3, nullable=True)),
                    TColumnType(col_name='e',
                                col_type=TTypeInfo(type=4, nullable=True)),
                    TColumnType(col_name='f',
                                col_type=TTypeInfo(type=5, nullable=True)),
                    TColumnType(col_name='g',
                                col_type=TTypeInfo(type=6, nullable=True)),
                    TColumnType(col_name='h',
                                col_type=TTypeInfo(type=7, nullable=True)),
                    TColumnType(col_name='i',
                                col_type=TTypeInfo(type=8, nullable=True)),
                    TColumnType(col_name='j',
                                col_type=TTypeInfo(type=9, nullable=True)),
                    TColumnType(col_name='k',
                                col_type=TTypeInfo(type=10, nullable=True)),
                ],
                rows=[],
                columns=[
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(real_col=[-2147483648]),
                            nulls=[True]),  # noqa
                    TColumn(data=TColumnData(real_col=[-2147483648]),
                            nulls=[True]),  # noqa
                    TColumn(data=TColumnData(real_col=[-2147483648]),
                            nulls=[True]),  # noqa
                    TColumn(data=TColumnData(str_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                    TColumn(data=TColumnData(int_col=[-2147483648]),
                            nulls=[True]),
                ],
                is_columnar=True))

        result = list(make_row_results_set(rs))
        assert result == [(None, ) * 11]

        # row-wise
        rs = TQueryResult(
            TRowSet(row_desc=[
                TColumnType(col_name='a',
                            col_type=TTypeInfo(type=0, nullable=True)),
                TColumnType(col_name='b',
                            col_type=TTypeInfo(type=1, nullable=True)),
                TColumnType(col_name='c',
                            col_type=TTypeInfo(type=2, nullable=True)),
                TColumnType(col_name='d',
                            col_type=TTypeInfo(type=3, nullable=True)),
                TColumnType(col_name='e',
                            col_type=TTypeInfo(type=4, nullable=True)),
                TColumnType(col_name='f',
                            col_type=TTypeInfo(type=5, nullable=True)),
                TColumnType(col_name='g',
                            col_type=TTypeInfo(type=6, nullable=True)),
                TColumnType(col_name='h',
                            col_type=TTypeInfo(type=7, nullable=True)),
                TColumnType(col_name='i',
                            col_type=TTypeInfo(type=8, nullable=True)),
                TColumnType(col_name='j',
                            col_type=TTypeInfo(type=9, nullable=True)),
                TColumnType(col_name='k',
                            col_type=TTypeInfo(type=10, nullable=True)),
            ],
                    rows=[
                        TRow(cols=[
                            TDatum(val=TDatumVal(int_val=-1), is_null=True),
                            TDatum(val=TDatumVal(int_val=-1), is_null=True),
                            TDatum(val=TDatumVal(int_val=-1), is_null=True),
                            TDatum(val=TDatumVal(real_val=-1), is_null=True),
                            TDatum(val=TDatumVal(real_val=-1), is_null=True),
                            TDatum(val=TDatumVal(real_val=-1), is_null=True),
                            TDatum(val=TDatumVal(str_val=-1), is_null=True),
                            TDatum(val=TDatumVal(int_val=-1), is_null=True),
                            TDatum(val=TDatumVal(int_val=-1), is_null=True),
                            TDatum(val=TDatumVal(int_val=-1), is_null=True),
                            TDatum(val=TDatumVal(int_val=-1), is_null=True),
                        ])
                    ],
                    is_columnar=False))

        result = list(make_row_results_set(rs))
        assert result == [(None, ) * 11]
Пример #4
0
    def test_build_row_desc(self):
        pd = pytest.importorskip("pandas")
        import numpy as np
        from mapd.ttypes import TTypeInfo, TColumnType

        data = pd.DataFrame(
            {
                "boolean_": [True, False],
                "smallint_": np.array([0, 1], dtype=np.int16),
                "int_": np.array([0, 1], dtype=np.int32),
                "bigint_": np.array([0, 1], dtype=np.int64),
                "float_": np.array([0, 1], dtype=np.float32),
                "double_": np.array([0, 1], dtype=np.float64),
                "varchar_": ["a", "b"],
                "text_": ['a', 'b'],
                "time_": [datetime.time(0, 11, 59),
                          datetime.time(13)],
                "timestamp_": [pd.Timestamp("2016"),
                               pd.Timestamp("2017")],
                "date_":
                [datetime.date(2016, 1, 1),
                 datetime.date(2017, 1, 1)],
            },
            columns=[
                'boolean_', 'smallint_', 'int_', 'bigint_', 'float_',
                'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_'
            ])
        result = _pandas_loaders.build_row_desc(data)
        expected = [
            TColumnType(col_name='boolean_',
                        col_type=TTypeInfo(type=10),
                        is_reserved_keyword=None),
            TColumnType(col_name='smallint_',
                        col_type=TTypeInfo(type=0),
                        is_reserved_keyword=None),
            TColumnType(col_name='int_',
                        col_type=TTypeInfo(type=1),
                        is_reserved_keyword=None),
            TColumnType(col_name='bigint_', col_type=TTypeInfo(type=2)),
            TColumnType(col_name='float_', col_type=TTypeInfo(type=3)),
            TColumnType(col_name='double_', col_type=TTypeInfo(type=5)),
            TColumnType(col_name='varchar_', col_type=TTypeInfo(type=6)),
            TColumnType(col_name='text_', col_type=TTypeInfo(type=6)),
            TColumnType(col_name='time_', col_type=TTypeInfo(type=7)),
            TColumnType(col_name='timestamp_', col_type=TTypeInfo(type=8)),
            TColumnType(col_name='date_', col_type=TTypeInfo(type=9))
        ]

        assert result == expected

        data.index.name = 'idx'
        result = _pandas_loaders.build_row_desc(data, preserve_index=True)
        expected.insert(
            0, TColumnType(col_name='idx', col_type=TTypeInfo(type=2)))

        assert result == expected