def build_row_desc(data, preserve_index=False): if not isinstance(data, (pd.DataFrame, gpd.GeoDataFrame)): # Once https://issues.apache.org/jira/browse/ARROW-1576 is complete # we can support pa.Table here too raise TypeError("Create table is not supported for type {}. " "Use a pandas DataFrame, or perform the create " "separately".format(type(data))) if preserve_index: data = data.reset_index() dtypes = [] is_array = {} for col in data.columns: _dtype = get_mapd_dtype(data[col]) is_array[col] = True if _dtype.startswith('ARRAY') else None dtypes.append((col, _dtype.replace('ARRAY/', ''))) # row_desc :: List<TColumnType> row_desc = [ TColumnType( name, TTypeInfo(getattr(TDatumType, mapd_type), is_array=is_array[name]), ) for name, mapd_type in dtypes ] # force text encoding dict for all string columns # default is TEXT ENCODING DICT(32) when only tct.col_type.encoding = 4 set # https://github.com/omnisci/pymapd/issues/140#issuecomment-477353420 for tct in row_desc: if tct.col_type.type == 6: tct.col_type.encoding = 4 elif tct.col_type.type in GEO_TYPE_ID: tct.col_type.precision = 23 return row_desc
def build_row_desc(data, preserve_index=False): if not isinstance(data, pd.DataFrame): # Once https://issues.apache.org/jira/browse/ARROW-1576 is complete # we can support pa.Table here too raise TypeError("Create table is not supported for type {}. " "Use a pandas DataFrame, or perform the create " "separately".format(type(data))) if preserve_index: data = data.reset_index() dtypes = [(col, get_mapd_dtype(data[col])) for col in data.columns] # row_desc :: List<TColumnType> row_desc = [ TColumnType(name, TTypeInfo(getattr(TDatumType, mapd_type))) for name, mapd_type in dtypes ] # force text encoding dict for all string columns # default is TEXT ENCODING DICT(32) when only tct.col_type.encoding = 4 set # https://github.com/omnisci/pymapd/issues/140#issuecomment-477353420 for tct in row_desc: if tct.col_type.type == 6: tct.col_type.encoding = 4 return row_desc
def test_build_row_desc(self): data = pd.DataFrame( { 'boolean_': [True, False], 'smallint_': np.array([0, 1], dtype=np.int16), 'int_': np.array([0, 1], dtype=np.int32), 'bigint_': np.array([0, 1], dtype=np.int64), 'float_': np.array([0, 1], dtype=np.float32), 'double_': np.array([0, 1], dtype=np.float64), 'varchar_': ['a', 'b'], 'text_': ['a', 'b'], 'time_': [datetime.time(0, 11, 59), datetime.time(13)], 'timestamp1_': [pd.Timestamp('2016'), pd.Timestamp('2017')], 'timestamp2_': [ np.datetime64('2016-01-01 01:01:01.001001001'), np.datetime64('2017-01-01 01:01:01.001001001'), ], 'date_': [ datetime.date(2016, 1, 1), datetime.date(2017, 1, 1), ], }, columns=[ 'boolean_', 'smallint_', 'int_', 'bigint_', 'float_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp1_', 'timestamp2_', 'date_', ], ) result = _pandas_loaders.build_row_desc(data) expected = [ TColumnType( col_name='boolean_', col_type=TTypeInfo(type=10), is_reserved_keyword=None, ), TColumnType( col_name='smallint_', col_type=TTypeInfo(type=0), is_reserved_keyword=None, ), TColumnType( col_name='int_', col_type=TTypeInfo(type=1), is_reserved_keyword=None, ), TColumnType(col_name='bigint_', col_type=TTypeInfo(type=2)), TColumnType(col_name='float_', col_type=TTypeInfo(type=3)), TColumnType(col_name='double_', col_type=TTypeInfo(type=5)), TColumnType(col_name='varchar_', col_type=TTypeInfo(type=6, encoding=4)), TColumnType(col_name='text_', col_type=TTypeInfo(type=6, encoding=4)), TColumnType(col_name='time_', col_type=TTypeInfo(type=7)), TColumnType(col_name='timestamp1_', col_type=TTypeInfo(type=8)), TColumnType(col_name='timestamp2_', col_type=TTypeInfo(type=8, precision=9)), TColumnType(col_name='date_', col_type=TTypeInfo(type=9)), ] assert result == expected data.index.name = 'idx' result = _pandas_loaders.build_row_desc(data, preserve_index=True) expected.insert( 0, TColumnType(col_name='idx', col_type=TTypeInfo(type=2))) assert result == expected
def test_build_row_desc(self): data = pd.DataFrame({ "boolean_": [True, False], "smallint_": np.array([0, 1], dtype=np.int16), "int_": np.array([0, 1], dtype=np.int32), "bigint_": np.array([0, 1], dtype=np.int64), "float_": np.array([0, 1], dtype=np.float32), "double_": np.array([0, 1], dtype=np.float64), "varchar_": ["a", "b"], "text_": ['a', 'b'], "time_": [datetime.time(0, 11, 59), datetime.time(13)], "timestamp_": [pd.Timestamp("2016"), pd.Timestamp("2017")], "date_": [datetime.date(2016, 1, 1), datetime.date(2017, 1, 1)], }, columns=['boolean_', 'smallint_', 'int_', 'bigint_', 'float_', 'double_', 'varchar_', 'text_', 'time_', 'timestamp_', 'date_']) result = _pandas_loaders.build_row_desc(data) expected = [ TColumnType(col_name='boolean_', col_type=TTypeInfo(type=10), is_reserved_keyword=None), TColumnType(col_name='smallint_', col_type=TTypeInfo(type=0), is_reserved_keyword=None), TColumnType(col_name='int_', col_type=TTypeInfo(type=1), is_reserved_keyword=None), TColumnType(col_name='bigint_', col_type=TTypeInfo(type=2)), TColumnType(col_name='float_', col_type=TTypeInfo(type=3)), TColumnType(col_name='double_', col_type=TTypeInfo(type=5)), TColumnType(col_name='varchar_', col_type=TTypeInfo(type=6, encoding=4)), TColumnType(col_name='text_', col_type=TTypeInfo(type=6, encoding=4)), TColumnType(col_name='time_', col_type=TTypeInfo(type=7)), TColumnType(col_name='timestamp_', col_type=TTypeInfo(type=8)), TColumnType(col_name='date_', col_type=TTypeInfo(type=9)) ] assert result == expected data.index.name = 'idx' result = _pandas_loaders.build_row_desc(data, preserve_index=True) expected.insert(0, TColumnType(col_name='idx', col_type=TTypeInfo(type=2))) assert result == expected
def test_nulls_handled(self): rs = TQueryResult( TRowSet( row_desc=[ TColumnType(col_name='a', col_type=TTypeInfo(type=0, nullable=True)), TColumnType(col_name='b', col_type=TTypeInfo(type=1, nullable=True)), TColumnType(col_name='c', col_type=TTypeInfo(type=2, nullable=True)), TColumnType(col_name='d', col_type=TTypeInfo(type=3, nullable=True)), TColumnType(col_name='e', col_type=TTypeInfo(type=4, nullable=True)), TColumnType(col_name='f', col_type=TTypeInfo(type=5, nullable=True)), TColumnType(col_name='g', col_type=TTypeInfo(type=6, nullable=True)), TColumnType(col_name='h', col_type=TTypeInfo(type=7, nullable=True)), TColumnType(col_name='i', col_type=TTypeInfo(type=8, nullable=True)), TColumnType(col_name='j', col_type=TTypeInfo(type=9, nullable=True)), TColumnType( col_name='k', col_type=TTypeInfo(type=10, nullable=True), ), ], rows=[], columns=[ TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(real_col=[-2147483648]), nulls=[True]), # noqa TColumn(data=TColumnData(real_col=[-2147483648]), nulls=[True]), # noqa TColumn(data=TColumnData(real_col=[-2147483648]), nulls=[True]), # noqa TColumn(data=TColumnData(str_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), TColumn(data=TColumnData(int_col=[-2147483648]), nulls=[True]), ], is_columnar=True, )) result = list(make_row_results_set(rs)) assert result == [(None, ) * 11]
def test_sql_validate(self, con): from omnisci.common.ttypes import TTypeInfo c = con.cursor() c.execute('drop table if exists stocks;') create = ('create table stocks (date_ text, trans text, symbol text, ' 'qty int, price float, vol float);') c.execute(create) q = "select * from stocks" results = con._client.sql_validate(con._session, q) col_names = sorted([r.col_name for r in results]) col_types = [r.col_type for r in results] expected_col_names = [ 'date_', 'price', 'qty', 'symbol', 'trans', 'vol', ] expected_types = [ TTypeInfo( type=6, encoding=4, nullable=True, is_array=False, precision=0, scale=0, comp_param=32, size=-1, ), TTypeInfo( type=6, encoding=4, nullable=True, is_array=False, precision=0, scale=0, comp_param=32, size=-1, ), TTypeInfo( type=6, encoding=4, nullable=True, is_array=False, precision=0, scale=0, comp_param=32, size=-1, ), TTypeInfo( type=1, encoding=0, nullable=True, is_array=False, precision=0, scale=0, comp_param=0, size=-1, ), TTypeInfo( type=3, encoding=0, nullable=True, is_array=False, precision=0, scale=0, comp_param=0, size=-1, ), TTypeInfo( type=3, encoding=0, nullable=True, is_array=False, precision=0, scale=0, comp_param=0, size=-1, ), ] assert col_types == expected_types assert col_names == expected_col_names
def test_extract_row_details(self): data = [ TColumnType( col_name='date_', col_type=TTypeInfo( type=6, encoding=4, nullable=True, is_array=False, precision=0, scale=0, comp_param=32, ), is_reserved_keyword=False, src_name='', ), TColumnType( col_name='trans', col_type=TTypeInfo( type=6, encoding=4, nullable=True, is_array=False, precision=0, scale=0, comp_param=32, ), is_reserved_keyword=False, src_name='', ), TColumnType( col_name='symbol', col_type=TTypeInfo( type=6, encoding=4, nullable=True, is_array=False, precision=0, scale=0, comp_param=32, ), is_reserved_keyword=False, src_name='', ), TColumnType( col_name='qty', col_type=TTypeInfo( type=1, encoding=0, nullable=True, is_array=False, precision=0, scale=0, comp_param=0, ), is_reserved_keyword=False, src_name='', ), TColumnType( col_name='price', col_type=TTypeInfo( type=3, encoding=0, nullable=True, is_array=False, precision=0, scale=0, comp_param=0, ), is_reserved_keyword=False, src_name='', ), TColumnType( col_name='vol', col_type=TTypeInfo( type=3, encoding=0, nullable=True, is_array=False, precision=0, scale=0, comp_param=0, ), is_reserved_keyword=False, src_name='', ), ] result = _extract_column_details(data) expected = [ ColumnDetails( name='date_', type='STR', nullable=True, precision=0, scale=0, comp_param=32, encoding='DICT', is_array=False, ), ColumnDetails( name='trans', type='STR', nullable=True, precision=0, scale=0, comp_param=32, encoding='DICT', is_array=False, ), ColumnDetails( name='symbol', type='STR', nullable=True, precision=0, scale=0, comp_param=32, encoding='DICT', is_array=False, ), ColumnDetails( name='qty', type='INT', nullable=True, precision=0, scale=0, comp_param=0, encoding='NONE', is_array=False, ), ColumnDetails( name='price', type='FLOAT', nullable=True, precision=0, scale=0, comp_param=0, encoding='NONE', is_array=False, ), ColumnDetails( name='vol', type='FLOAT', nullable=True, precision=0, scale=0, comp_param=0, encoding='NONE', is_array=False, ), ] assert result == expected