def test_merge_dataframe(self): from build import csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') self.assertTrue(isinstance(res, pd.DataFrame))
def test_correlation_list(self): from build import correlation_list, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = correlation_list(res) self.assertTrue(isinstance(new_res, list))
def test_loglog(self): from build import loglog, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = loglog(res, ["age"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_remove_inf_values(self): from build import remove_inf_values, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = remove_inf_values(res, "age_loglog") self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_one_hot_encoder(self): from build import one_hot_encoder, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = one_hot_encoder(res, ['device', 'browser_language']) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_label_encoder(self): from build import label_encoder, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = label_encoder(res, ["sex","country", "source", "ads_channel", "browser"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_centre_and_scale(self): from build import centre_and_scale, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = centre_and_scale(res, ["age"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_dtype_category(self): from build import dtype_category, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = dtype_category(res, ["user_id", "sex", "country", "date", "source", "device", "browser_language", "ads_channel", "browser"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_sqrt_transform(self): from build import sqrt_transform, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') new_res = sqrt_transform(res, ["age"]) self.assertTrue(isinstance(new_res, list))
def test_multi_power(self): from build import multi_power, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') column_list = ["age"] list_of_powers = [0.5, 2, 3] new_res = multi_power(res, column_list, list_of_powers) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_best_k_features(self): from build import best_k_features, csv_to_dataframe, merge_dataframe res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') predictors = [ "age", "age^0.5", "age^2", "age^3", "age_log", "age_loglog" ] target = 'test' new_res = best_k_features(res, predictors, target, 3) self.assertTrue(isinstance(new_res, list))
def test_dtype_category(self): from build import csv_to_dataframe, merge_dataframe, dtype_category res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') column_list = [ "sex", "country", "source", "device", "browser_language", "ads_channel", "browser", "conversion", "test" ] res_new = dtype_category(res, column_list) self.assertTrue(isinstance(res_new, pd.DataFrame))
def test_var_check(self): from build import csv_to_dataframe, merge_dataframe, var_check res1 = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res1, pd.DataFrame)) res2 = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res2, pd.DataFrame)) res = merge_dataframe(res1, res2, 'user_id') column_list = [ "sex", "country", "source", "device", "browser_language", "ads_channel", "browser", "conversion", "test" ] res_new = var_check(res, 10) self.assertEqual(res_new, [])
def test_dtype_category(self): from build import dtype_category, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = dtype_category( res, ["employee_id", "company_id", "dept", "join_date", "quit_date"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_multi_power(self): from build import multi_power, csv_to_dataframe res = csv_to_dataframe(filepath) column_list = ["age", "total_pages_visited"] list_of_powers = [0.5, 2, 3] new_res = multi_power(res, column_list, list_of_powers) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_correlation_list(self): from build import correlation_list, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = correlation_list(res) self.assertTrue(isinstance(new_res, list)) self.assertAlmostEqual(new_res[0][0], 0.5594652047653258, places=3) self.assertTrue("seniority" in new_res[0]) self.assertTrue("salary" in new_res[0])
def test_correlation_list(self): from build import correlation_list, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = correlation_list(res) self.assertTrue(isinstance(new_res, list)) self.assertAlmostEqual(new_res[0][0], -0.045922219138141401, places=3) self.assertTrue("age" in new_res[0]) self.assertTrue("total_pages_visited" in new_res[0])
def test_random_forest_model(self): from build import csv_to_dataframe, random_forest_model res = csv_to_dataframe(filepath) dependent_variable = "salary" independent_variable_list = ['Constant Term', 'seniority', 'seniority^2', 'dept_customer_service', 'dept_data_science', 'dept_design', 'dept_engineer', 'dept_marketing', 'dept_sales', 'company_id'] res_new = random_forest_model(res, dependent_variable, independent_variable_list) self.assertAlmostEqual(res_new, 95.984738152970976, places=3)
def test_linear_regression_model(self): from build import csv_to_dataframe, linear_regression_model res = csv_to_dataframe(filepath) dependent_variable = "salary" independent_variable_list = ['Constant Term', 'seniority', 'seniority^2', 'dept_customer_service', 'dept_data_science', 'dept_design', 'dept_engineer', 'dept_marketing', 'dept_sales', 'company_id'] res_new = linear_regression_model(res, dependent_variable, independent_variable_list) self.assertAlmostEqual(res_new, 3.6960569994332442, places=3)
def test_logistic_regression_model(self): from build import csv_to_dataframe, logistic_regression_model res = csv_to_dataframe(filepath) res_new = logistic_regression_model(res, "converted", [ 'country_China', 'country_Germany', 'country_UK', 'country_US', 'source_Ads', 'source_Direct', 'source_Seo', 'age', 'new_user', 'total_pages_visited', 'Constant Term', 'age^2', 'age x total_pages_visited', 'total_pages_visited^2', 'age^3', 'age^2 x total_pages_visited', 'age x total_pages_visited^2', 'total_pages_visited^3' ]) self.assertAlmostEqual(res_new, 98.282099936748892, places=3)
def test_best_k_features(self): from build import best_k_features, csv_to_dataframe res = csv_to_dataframe(filepath) predictors = ['age', 'total_pages_visited', 'age^0.5', 'total_pages_visited^0.5', 'age^2', 'total_pages_visited^2', 'age^3', 'total_pages_visited^3', 'age_loglog', 'total_pages_visited_loglog', 'age_log', 'total_pages_visited_log'] target = 'converted' new_res = best_k_features(res, predictors, target, 3) self.assertTrue(isinstance(new_res, list)) self.assertTrue("total_pages_visited^3" in new_res) self.assertTrue("total_pages_visited^2" in new_res) self.assertTrue("total_pages_visited" in new_res)
def test_rf_rfe(self): from build import rf_rfe, csv_to_dataframe res = csv_to_dataframe(filepath) predictors = [ "seniority", "seniority^0.5", "seniority^2", "seniority^3", "seniority_log", "seniority_loglog" ] target = 'salary' new_res = rf_rfe(res, predictors, target) self.assertTrue(isinstance(new_res, list)) self.assertTrue("seniority^3" in new_res) self.assertTrue("seniority^2" in new_res) self.assertTrue("seniority_loglog" in new_res)
def test_one_hot_encoder(self): from build import one_hot_encoder, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = one_hot_encoder(res, ["dept"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_skewness(self): from build import skewness, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = skewness(res, ["seniority", "salary"]) self.assertTrue(isinstance(new_res, list))
def test_sqrt_transform(self): from build import sqrt_transform, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = sqrt_transform(res, ["seniority", "salary"]) self.assertTrue(isinstance(new_res, list))
def test_log_log(self): from build import loglog, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = loglog(res, ["seniority"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_csv_to_dataframe(self): from build import csv_to_dataframe res = csv_to_dataframe(filepath1) self.assertTrue(isinstance(res, pd.DataFrame)) res = csv_to_dataframe(filepath2) self.assertTrue(isinstance(res, pd.DataFrame))
def test_dtype_category(self): from build import dtype_category, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = dtype_category(res, ["country", "new_user", "source", "converted"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_log_log(self): from build import loglog, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = loglog(res, ["age", "total_pages_visited"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_remove_inf_values(self): from build import remove_inf_values, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = remove_inf_values(res, "total_pages_visited_loglog") self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_dtype_category(self): from build import dtype_category, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = dtype_category(res, ["employee_id", "company_id", "dept", "join_date", "quit_date"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_centre_and_scale(self): from build import centre_and_scale, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = centre_and_scale(res, ["seniority", "salary"]) self.assertTrue(isinstance(new_res, pd.DataFrame))
def test_label_encoder(self): from build import label_encoder, csv_to_dataframe res = csv_to_dataframe(filepath) new_res = label_encoder(res, ["company_id", "dept"]) self.assertTrue(isinstance(new_res, pd.DataFrame))