def test_converts_variable_type_after_init(self): df = pd.DataFrame({ 'id': [0, 1, 2], 'category': ['a', 'b', 'a'], 'ints': ['1', '2', '1'] }) df["category"] = df["category"].astype("category") entityset = EntitySet(id='test') entityset.entity_from_dataframe(entity_id='test_entity', index='id', dataframe=df) e = entityset['test_entity'] df = entityset.get_dataframe('test_entity') e.convert_variable_type('ints', variable_types.Numeric) assert isinstance(e['ints'], variable_types.Numeric) assert df[ 'ints'].dtype.name in variable_types.PandasTypes._pandas_numerics e.convert_variable_type('ints', variable_types.Categorical) assert isinstance(e['ints'], variable_types.Categorical) e.convert_variable_type('ints', variable_types.Ordinal) assert isinstance(e['ints'], variable_types.Ordinal) e.convert_variable_type('ints', variable_types.Boolean, true_val=1, false_val=2) assert isinstance(e['ints'], variable_types.Boolean) assert df['ints'].dtype.name == 'bool'
def test_converts_variable_type_after_init(self): df = pd.DataFrame({'id': [0, 1, 2], 'category': ['a', 'b', 'a'], 'ints': ['1', '2', '1']}) df["category"] = df["category"].astype("category") entityset = EntitySet(id='test') entityset.entity_from_dataframe(entity_id='test_entity', index='id', dataframe=df) e = entityset['test_entity'] df = entityset.get_dataframe('test_entity') e.convert_variable_type('ints', variable_types.Numeric) assert isinstance(e['ints'], variable_types.Numeric) assert df['ints'].dtype.name in variable_types.PandasTypes._pandas_numerics e.convert_variable_type('ints', variable_types.Categorical) assert isinstance(e['ints'], variable_types.Categorical) e.convert_variable_type('ints', variable_types.Ordinal) assert isinstance(e['ints'], variable_types.Ordinal) e.convert_variable_type('ints', variable_types.Boolean, true_val=1, false_val=2) assert isinstance(e['ints'], variable_types.Boolean) assert df['ints'].dtype.name == 'bool'
def test_converts_variable_types_on_init(self): df = pd.DataFrame({ 'id': [0, 1, 2], 'category': ['a', 'b', 'a'], 'category_int': [1, 2, 3], 'ints': ['1', '2', '3'], 'floats': ['1', '2', '3.0'] }) df["category_int"] = df["category_int"].astype("category") vtypes = { 'id': variable_types.Categorical, 'ints': variable_types.Numeric, 'floats': variable_types.Numeric } entityset = EntitySet(id='test') entityset.entity_from_dataframe(entity_id='test_entity', index='id', variable_types=vtypes, dataframe=df) entity_df = entityset.get_dataframe('test_entity') assert entity_df[ 'ints'].dtype.name in variable_types.PandasTypes._pandas_numerics assert entity_df[ 'floats'].dtype.name in variable_types.PandasTypes._pandas_numerics # this is infer from pandas dtype e = entityset["test_entity"] assert isinstance(e['category_int'], variable_types.Categorical)
def test_converts_variable_types_on_init(self): df = pd.DataFrame({'id': [0, 1, 2], 'category': ['a', 'b', 'a'], 'category_int': [1, 2, 3], 'ints': ['1', '2', '3'], 'floats': ['1', '2', '3.0']}) df["category_int"] = df["category_int"].astype("category") vtypes = {'id': variable_types.Categorical, 'ints': variable_types.Numeric, 'floats': variable_types.Numeric} entityset = EntitySet(id='test') entityset.entity_from_dataframe(entity_id='test_entity', index='id', variable_types=vtypes, dataframe=df) entity_df = entityset.get_dataframe('test_entity') assert entity_df['ints'].dtype.name in variable_types.PandasTypes._pandas_numerics assert entity_df['floats'].dtype.name in variable_types.PandasTypes._pandas_numerics # this is infer from pandas dtype e = entityset["test_entity"] assert isinstance(e['category_int'], variable_types.Categorical)