def test_empty_weight_mapping(self): train_csv = pd.read_csv("datasets/encoding/testnew.csv") with pytest.raises(ValueError): params["ord_dict"]["Size"] = None encoder = EncodeData(train_df=train_csv, target_label="Price", params=params) encoder.encode()
def test_mapping(self): train_csv = pd.read_csv("datasets/encoding/testnew.csv") encoder = EncodeData(train_df=train_csv, target_label="Price", params=params) train = encoder.encode() assert train[0]["ProfessionEncoded"].nunique() == 3 assert train[0]["ProfessionEncoded"][2] == 3 assert Counter(params["ord_dict"]["Profession"].values()) == Counter( train[0]["ProfessionEncoded"].unique())
def test_ignore_cat_col(): train_csv = pd.read_csv("datasets/encoding/testnew.csv") params = { "train_df": train_csv, "target_label": "Price", "cat_cols": ["Profession"], "ord_dict": ord_dict, "one_hot": True, } encoder = EncodeData() encoder.encode(params=params) assert "Profession_HOD" not in params["train_df"].columns
def test_one_hot_encoding(): train_csv = pd.read_csv("datasets/encoding/testnew.csv") params = { "train_df": train_csv, "target_label": "Price", "cat_cols": ["Test", "Labels"], "ord_dict": ord_dict, "one_hot": True, } encoder = EncodeData() encoder.encode(params=params) assert "Test_Tata" in params["train_df"].columns assert params["train_df"]["Test_Tata"][1] == 1
def test_empty_weight_mapping(): train_csv = pd.read_csv("datasets/encoding/testnew.csv") train_csv.drop(["Price"], axis=1, inplace=True) ord_dict1 = ord_dict.copy() ord_dict1["Size"] = None params = { "train_df": train_csv, "target_label": "Price", "ord_dict": ord_dict1, } with pytest.raises(ValueError): encoder = EncodeData() encoder.encode(params=params)
def test_mapping(): train_csv = pd.read_csv("datasets/encoding/testnew.csv") train_csv.drop(["Price"], axis=1, inplace=True) params = { "train_df": train_csv, "target_label": "Price", "ord_dict": ord_dict, } encoder = EncodeData() encoder.encode(params=params) assert params["train_df"]["ProfessionEncoded"].nunique() == 3 assert params["train_df"]["ProfessionEncoded"][2] == 3 assert Counter(params["ord_dict"]["Profession"].values()) == Counter( params["train_df"]["ProfessionEncoded"].unique())
def test_warning(): train_csv = pd.read_csv("datasets/encoding/testnew.csv") params = {"train_df": train_csv, "ord_dict": ord_dict} with pytest.warns(UserWarning): encoder = EncodeData() encoder.encode(params=params)
def test_empty_df(): params = {"target_label": "Price", "ord_dict": ord_dict} with pytest.raises(ValueError): encoder = EncodeData() encoder.encode(params=params)
from preprocessy.encoding import EncodeData import pandas as pd ord_dict = {"Profession": {"Student": 1, "Teacher": 2, "HOD": 3}} params = {"ord_dict": ord_dict} train_csv = pd.read_csv("datasets/encoding/testnew.csv") # print(train_csv.dtypes) # train_csv = train_csv.drop(['Unnamed: 5','Unnamed: 6'],axis=1) k = EncodeData(train_df=train_csv, params=params) train = k.encode() print(train)
def test_warning(self): train_csv = pd.read_csv("datasets/encoding/testnew.csv") with pytest.warns(UserWarning): encoder = EncodeData(train_df=train_csv, params=params) encoder.encode()
def test_empty_df(self): with pytest.raises(ValueError): encoder = EncodeData(target_label="Price", params=params) encoder.encode()