def build_row_desc(data, preserve_index=False): if not isinstance(data, pd.DataFrame): # Once https://issues.apache.org/jira/browse/ARROW-1576 is complete # we can support pa.Table here too raise TypeError("Create table is not supported for type {}. " "Use a pandas DataFrame, or perform the create " "separately".format(type(data))) if preserve_index: data = data.reset_index() dtypes = [(col, get_mapd_dtype(data[col])) for col in data.columns] # row_desc :: List<TColumnType> row_desc = [ TColumnType(name, TTypeInfo(getattr(TDatumType, mapd_type))) for name, mapd_type in dtypes ] return row_desc
def test_extract_row_details(self): data = [ TColumnType(col_name='date_', col_type=TTypeInfo(type=6, encoding=4, nullable=True, is_array=False, precision=0, scale=0, comp_param=32), is_reserved_keyword=False, src_name=''), TColumnType(col_name='trans', col_type=TTypeInfo(type=6, encoding=4, nullable=True, is_array=False, precision=0, scale=0, comp_param=32), is_reserved_keyword=False, src_name=''), TColumnType(col_name='symbol', col_type=TTypeInfo(type=6, encoding=4, nullable=True, is_array=False, precision=0, scale=0, comp_param=32), is_reserved_keyword=False, src_name=''), TColumnType(col_name='qty', col_type=TTypeInfo(type=1, encoding=0, nullable=True, is_array=False, precision=0, scale=0, comp_param=0), is_reserved_keyword=False, src_name=''), TColumnType(col_name='price', col_type=TTypeInfo(type=3, encoding=0, nullable=True, is_array=False, precision=0, scale=0, comp_param=0), is_reserved_keyword=False, src_name=''), TColumnType(col_name='vol', col_type=TTypeInfo(type=3, encoding=0, nullable=True, is_array=False, precision=0, scale=0, comp_param=0), is_reserved_keyword=False, src_name='')] result = _extract_column_details(data) expected = [ ColumnDetails(name='date_', type='STR', nullable=True, precision=0, scale=0, comp_param=32), ColumnDetails(name='trans', type='STR', nullable=True, precision=0, scale=0, comp_param=32), ColumnDetails(name='symbol', type='STR', nullable=True, precision=0, scale=0, comp_param=32), ColumnDetails(name='qty', type='INT', nullable=True, precision=0, scale=0, comp_param=0), ColumnDetails(name='price', type='FLOAT', nullable=True, precision=0, scale=0, comp_param=0), ColumnDetails(name='vol', type='FLOAT', nullable=True, precision=0, scale=0, comp_param=0) ] assert result == expected
def test_nulls_handled(self): from mapd.ttypes import (TRowSet, TColumnType, TTypeInfo, TColumn, TColumnData, TQueryResult, TDatum, TRow, TDatumVal) rs = TQueryResult( TRowSet( row_desc=[ TColumnType(col_name='a', col_type=TTypeInfo(type=0, nullable=True)), TColumnType(col_name='b', col_type=TTypeInfo(type=1, nullable=True)), TColumnType(col_name='c', col_type=TTypeInfo(type=2, nullable=True)), TColumnType(col_name='d', col_type=TTypeInfo(type=3, nullable=True)), TColumnType(col_name='e', col_type=TTypeInfo(type=4, nullable=True)), TColumnType(col_name='f', col_type=TTypeInfo(type=5, nullable=True)), TColumnType(col_name='g', col_type=TTypeInfo(type=6, nullable=True)), TColumnType(col_name='h', col_type=TTypeInfo(type=7, nullable=True)), TColumnType(col_name='i', col_type=TTypeInfo(type=8, nullable=True)), TColumnType(col_name='j', col_type=TTypeInfo(type=9, nullable=True)), TColumnType(col_name='k', col_type=TTypeInfo(type=10, nullable=True)), ], rows=[], columns=[ TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(real_col=[-2147483648]), nulls=[True]), # noqa TColumn(data=TColumnData(real_col=[-2147483648]), nulls=[True]), # noqa TColumn(data=TColumnData(real_col=[-2147483648]), nulls=[True]), # noqa TColumn(data=TColumnData(str_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), ], is_columnar=True)) result = list(make_row_results_set(rs)) assert result == [(None, ) * 11] # row-wise rs = TQueryResult( TRowSet(row_desc=[ TColumnType(col_name='a', col_type=TTypeInfo(type=0, nullable=True)), TColumnType(col_name='b', col_type=TTypeInfo(type=1, nullable=True)), TColumnType(col_name='c', col_type=TTypeInfo(type=2, nullable=True)), TColumnType(col_name='d', col_type=TTypeInfo(type=3, nullable=True)), TColumnType(col_name='e', col_type=TTypeInfo(type=4, nullable=True)), TColumnType(col_name='f', col_type=TTypeInfo(type=5, nullable=True)), TColumnType(col_name='g', col_type=TTypeInfo(type=6, nullable=True)), TColumnType(col_name='h', col_type=TTypeInfo(type=7, nullable=True)), TColumnType(col_name='i', col_type=TTypeInfo(type=8, nullable=True)), TColumnType(col_name='j', col_type=TTypeInfo(type=9, nullable=True)), TColumnType(col_name='k', col_type=TTypeInfo(type=10, nullable=True)), ], rows=[ TRow(cols=[ TDatum(val=TDatumVal(int_val=-1), is_null=True), TDatum(val=TDatumVal(int_val=-1), is_null=True), TDatum(val=TDatumVal(int_val=-1), is_null=True), TDatum(val=TDatumVal(real_val=-1), is_null=True), TDatum(val=TDatumVal(real_val=-1), is_null=True), TDatum(val=TDatumVal(real_val=-1), is_null=True), TDatum(val=TDatumVal(str_val=-1), is_null=True), TDatum(val=TDatumVal(int_val=-1), is_null=True), TDatum(val=TDatumVal(int_val=-1), is_null=True), TDatum(val=TDatumVal(int_val=-1), is_null=True), TDatum(val=TDatumVal(int_val=-1), is_null=True), ]) ], is_columnar=False)) result = list(make_row_results_set(rs)) assert result == [(None, ) * 11]
def test_build_row_desc(self): pd = pytest.importorskip("pandas") import numpy as np from mapd.ttypes import TTypeInfo, TColumnType data = pd.DataFrame( { "boolean_": [True, False], "smallint_": np.array([0, 1], dtype=np.int16), "int_": np.array([0, 1], dtype=np.int32), "bigint_": np.array([0, 1], dtype=np.int64), "float_": np.array([0, 1], dtype=np.float32), "double_": np.array([0, 1], dtype=np.float64), "varchar_": ["a", "b"], "text_": ['a', 'b'], "time_": [datetime.time(0, 11, 59), datetime.time(13)], "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")], "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)], }, columns=[ 'boolean_', 'smallint_', 'int_', 'bigint_', 'float_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_' ]) result = _pandas_loaders.build_row_desc(data) expected = [ TColumnType(col_name='boolean_', col_type=TTypeInfo(type=10), is_reserved_keyword=None), TColumnType(col_name='smallint_', col_type=TTypeInfo(type=0), is_reserved_keyword=None), TColumnType(col_name='int_', col_type=TTypeInfo(type=1), is_reserved_keyword=None), TColumnType(col_name='bigint_', col_type=TTypeInfo(type=2)), TColumnType(col_name='float_', col_type=TTypeInfo(type=3)), TColumnType(col_name='double_', col_type=TTypeInfo(type=5)), TColumnType(col_name='varchar_', col_type=TTypeInfo(type=6)), TColumnType(col_name='text_', col_type=TTypeInfo(type=6)), TColumnType(col_name='time_', col_type=TTypeInfo(type=7)), TColumnType(col_name='timestamp_', col_type=TTypeInfo(type=8)), TColumnType(col_name='date_', col_type=TTypeInfo(type=9)) ] assert result == expected data.index.name = 'idx' result = _pandas_loaders.build_row_desc(data, preserve_index=True) expected.insert( 0, TColumnType(col_name='idx', col_type=TTypeInfo(type=2))) assert result == expected