def test_merge_dataframe(self): from build import csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') self.assertTrue(isinstance(res, pd.DataFrame))
def test_remove_inf_values(self): from build import remove_inf_values, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = remove_inf_values(res, "age_loglog") self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_loglog(self): from build import loglog, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = loglog(res, ["age"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_correlation_list(self): from build import correlation_list, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = correlation_list(res) self.assertTrue(isinstance(new_res, list))
def test_sqrt_transform(self): from build import sqrt_transform, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = sqrt_transform(res, ["age"]) self.assertTrue(isinstance(new_res, list))
def test_one_hot_encoder(self): from build import one_hot_encoder, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = one_hot_encoder(res, ['device', 'browser_language']) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_label_encoder(self): from build import label_encoder, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = label_encoder(res, ["sex","country", "source", "ads_channel", "browser"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_centre_and_scale(self): from build import centre_and_scale, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = centre_and_scale(res, ["age"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_dtype_category(self): from build import dtype_category, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = dtype_category(res, ["user_id", "sex", "country", "date", "source", "device", "browser_language", "ads_channel", "browser"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_multi_power(self): from build import multi_power, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') column_list = ["age"] list_of_powers = [0.5, 2, 3] new_res = multi_power(res, column_list, list_of_powers) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_best_k_features(self): from build import best_k_features, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') predictors = [ "age", "age^0.5", "age^2", "age^3", "age_log", "age_loglog" ] target = 'test' new_res = best_k_features(res, predictors, target, 3) self.assertTrue(isinstance(new_res, list))
def test_var_check(self): from build import csv_to_dataframe, merge_dataframe, var_check res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') column_list = [ "sex", "country", "source", "device", "browser_language", "ads_channel", "browser", "conversion", "test" ] res_new = var_check(res, 10) self.assertEqual(res_new, [])
def test_dtype_category(self): from build import csv_to_dataframe, merge_dataframe, dtype_category res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') column_list = [ "sex", "country", "source", "device", "browser_language", "ads_channel", "browser", "conversion", "test" ] res_new = dtype_category(res, column_list) self.assertTrue(isinstance(res_new, pd.DataFrame))