def test_merge_dataframe(self):
     from build import csv_to_dataframe, merge_dataframe
     res1 = csv_to_dataframe(filepath1)
     self.assertTrue(isinstance(res1, pd.DataFrame))
     res2 = csv_to_dataframe(filepath2)
     self.assertTrue(isinstance(res2, pd.DataFrame))
     res = merge_dataframe(res1, res2, 'user_id')
     self.assertTrue(isinstance(res, pd.DataFrame))
 def test_merge_dataframe(self):
     from build import csv_to_dataframe, merge_dataframe
     res1 = csv_to_dataframe(filepath1)
     self.assertTrue(isinstance(res1, pd.DataFrame))
     res2 = csv_to_dataframe(filepath2)
     self.assertTrue(isinstance(res2, pd.DataFrame))
     res = merge_dataframe(res1, res2, 'user_id')
     self.assertTrue(isinstance(res, pd.DataFrame))
 def test_correlation_list(self):
     from build import correlation_list, csv_to_dataframe, merge_dataframe
     res1 = csv_to_dataframe(filepath1)
     self.assertTrue(isinstance(res1, pd.DataFrame))
     res2 = csv_to_dataframe(filepath2)
     self.assertTrue(isinstance(res2, pd.DataFrame))
     res = merge_dataframe(res1, res2, 'user_id')
     new_res = correlation_list(res)
     self.assertTrue(isinstance(new_res, list))
 def test_loglog(self):
     from build import loglog, csv_to_dataframe, merge_dataframe
     res1 = csv_to_dataframe(filepath1)
     self.assertTrue(isinstance(res1, pd.DataFrame))
     res2 = csv_to_dataframe(filepath2)
     self.assertTrue(isinstance(res2, pd.DataFrame))
     res = merge_dataframe(res1, res2, 'user_id')
     new_res = loglog(res, ["age"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))
 def test_remove_inf_values(self):
     from build import remove_inf_values, csv_to_dataframe, merge_dataframe
     res1 = csv_to_dataframe(filepath1)
     self.assertTrue(isinstance(res1, pd.DataFrame))
     res2 = csv_to_dataframe(filepath2)
     self.assertTrue(isinstance(res2, pd.DataFrame))
     res = merge_dataframe(res1, res2, 'user_id')
     new_res = remove_inf_values(res, "age_loglog")
     self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 6
0
    def test_one_hot_encoder(self):
        from build import one_hot_encoder, csv_to_dataframe, merge_dataframe
        res1 = csv_to_dataframe(filepath1)
        self.assertTrue(isinstance(res1, pd.DataFrame))
        res2 = csv_to_dataframe(filepath2)
        self.assertTrue(isinstance(res2, pd.DataFrame))
        res = merge_dataframe(res1, res2, 'user_id')

        new_res = one_hot_encoder(res, ['device', 'browser_language'])
        self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 7
0
    def test_label_encoder(self):
        from build import label_encoder, csv_to_dataframe, merge_dataframe
        res1 = csv_to_dataframe(filepath1)
        self.assertTrue(isinstance(res1, pd.DataFrame))
        res2 = csv_to_dataframe(filepath2)
        self.assertTrue(isinstance(res2, pd.DataFrame))
        res = merge_dataframe(res1, res2, 'user_id')

        new_res = label_encoder(res, ["sex","country", "source", "ads_channel", "browser"])
        self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 8
0
    def test_centre_and_scale(self):
        from build import centre_and_scale, csv_to_dataframe, merge_dataframe
        res1 = csv_to_dataframe(filepath1)
        self.assertTrue(isinstance(res1, pd.DataFrame))
        res2 = csv_to_dataframe(filepath2)
        self.assertTrue(isinstance(res2, pd.DataFrame))
        res = merge_dataframe(res1, res2, 'user_id')

        new_res = centre_and_scale(res, ["age"])
        self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 9
0
    def test_dtype_category(self):
        from build import dtype_category, csv_to_dataframe, merge_dataframe
        res1 = csv_to_dataframe(filepath1)
        self.assertTrue(isinstance(res1, pd.DataFrame))
        res2 = csv_to_dataframe(filepath2)
        self.assertTrue(isinstance(res2, pd.DataFrame))
        res = merge_dataframe(res1, res2, 'user_id')

        new_res = dtype_category(res, ["user_id", "sex", "country", "date", "source", "device", "browser_language", "ads_channel", "browser"])
        self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 10
0
    def test_sqrt_transform(self):
        from build import sqrt_transform, csv_to_dataframe, merge_dataframe
        res1 = csv_to_dataframe(filepath1)
        self.assertTrue(isinstance(res1, pd.DataFrame))
        res2 = csv_to_dataframe(filepath2)
        self.assertTrue(isinstance(res2, pd.DataFrame))
        res = merge_dataframe(res1, res2, 'user_id')

        new_res = sqrt_transform(res, ["age"])
        self.assertTrue(isinstance(new_res, list))
 def test_multi_power(self):
     from build import multi_power, csv_to_dataframe, merge_dataframe
     res1 = csv_to_dataframe(filepath1)
     self.assertTrue(isinstance(res1, pd.DataFrame))
     res2 = csv_to_dataframe(filepath2)
     self.assertTrue(isinstance(res2, pd.DataFrame))
     res = merge_dataframe(res1, res2, 'user_id')
     column_list = ["age"]
     list_of_powers = [0.5, 2, 3]
     new_res = multi_power(res, column_list, list_of_powers)
     self.assertTrue(isinstance(new_res, pd.DataFrame))
 def test_best_k_features(self):
     from build import best_k_features, csv_to_dataframe, merge_dataframe
     res1 = csv_to_dataframe(filepath1)
     self.assertTrue(isinstance(res1, pd.DataFrame))
     res2 = csv_to_dataframe(filepath2)
     self.assertTrue(isinstance(res2, pd.DataFrame))
     res = merge_dataframe(res1, res2, 'user_id')
     predictors = [
         "age", "age^0.5", "age^2", "age^3", "age_log", "age_loglog"
     ]
     target = 'test'
     new_res = best_k_features(res, predictors, target, 3)
     self.assertTrue(isinstance(new_res, list))
Exemplo n.º 13
0
    def test_dtype_category(self):
        from build import csv_to_dataframe, merge_dataframe, dtype_category
        res1 = csv_to_dataframe(filepath1)
        self.assertTrue(isinstance(res1, pd.DataFrame))
        res2 = csv_to_dataframe(filepath2)
        self.assertTrue(isinstance(res2, pd.DataFrame))
        res = merge_dataframe(res1, res2, 'user_id')

        column_list = [
            "sex", "country", "source", "device", "browser_language",
            "ads_channel", "browser", "conversion", "test"
        ]
        res_new = dtype_category(res, column_list)
        self.assertTrue(isinstance(res_new, pd.DataFrame))
Exemplo n.º 14
0
    def test_var_check(self):
        from build import csv_to_dataframe, merge_dataframe, var_check
        res1 = csv_to_dataframe(filepath1)
        self.assertTrue(isinstance(res1, pd.DataFrame))
        res2 = csv_to_dataframe(filepath2)
        self.assertTrue(isinstance(res2, pd.DataFrame))
        res = merge_dataframe(res1, res2, 'user_id')

        column_list = [
            "sex", "country", "source", "device", "browser_language",
            "ads_channel", "browser", "conversion", "test"
        ]
        res_new = var_check(res, 10)
        self.assertEqual(res_new, [])
Exemplo n.º 15
0
 def test_dtype_category(self):
     from build import dtype_category, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = dtype_category(
         res,
         ["employee_id", "company_id", "dept", "join_date", "quit_date"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 16
0
 def test_multi_power(self):
     from build import multi_power, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     column_list = ["age", "total_pages_visited"]
     list_of_powers = [0.5, 2, 3]
     new_res = multi_power(res, column_list, list_of_powers)
     self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 17
0
 def test_correlation_list(self):
     from build import correlation_list, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = correlation_list(res)
     self.assertTrue(isinstance(new_res, list))
     self.assertAlmostEqual(new_res[0][0], 0.5594652047653258, places=3)
     self.assertTrue("seniority" in new_res[0])
     self.assertTrue("salary" in new_res[0])
Exemplo n.º 18
0
 def test_correlation_list(self):
     from build import correlation_list, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = correlation_list(res)
     self.assertTrue(isinstance(new_res, list))
     self.assertAlmostEqual(new_res[0][0], -0.045922219138141401, places=3)
     self.assertTrue("age" in new_res[0])
     self.assertTrue("total_pages_visited" in new_res[0])
Exemplo n.º 19
0
 def test_random_forest_model(self):
     from build import csv_to_dataframe, random_forest_model
     res = csv_to_dataframe(filepath)
     dependent_variable = "salary"
     independent_variable_list = ['Constant Term', 'seniority', 'seniority^2', 'dept_customer_service',
                                  'dept_data_science', 'dept_design', 'dept_engineer', 'dept_marketing',
                                  'dept_sales', 'company_id']
     res_new = random_forest_model(res, dependent_variable, independent_variable_list)
     self.assertAlmostEqual(res_new, 95.984738152970976, places=3)
Exemplo n.º 20
0
 def test_linear_regression_model(self):
     from build import csv_to_dataframe, linear_regression_model
     res = csv_to_dataframe(filepath)
     dependent_variable = "salary"
     independent_variable_list = ['Constant Term', 'seniority', 'seniority^2', 'dept_customer_service',
                                  'dept_data_science', 'dept_design', 'dept_engineer', 'dept_marketing',
                                  'dept_sales', 'company_id']
     res_new = linear_regression_model(res, dependent_variable, independent_variable_list)
     self.assertAlmostEqual(res_new, 3.6960569994332442, places=3)
Exemplo n.º 21
0
 def test_logistic_regression_model(self):
     from build import csv_to_dataframe, logistic_regression_model
     res = csv_to_dataframe(filepath)
     res_new = logistic_regression_model(res, "converted", [
         'country_China', 'country_Germany', 'country_UK', 'country_US',
         'source_Ads', 'source_Direct', 'source_Seo', 'age', 'new_user',
         'total_pages_visited', 'Constant Term', 'age^2',
         'age x total_pages_visited', 'total_pages_visited^2', 'age^3',
         'age^2 x total_pages_visited', 'age x total_pages_visited^2',
         'total_pages_visited^3'
     ])
     self.assertAlmostEqual(res_new, 98.282099936748892, places=3)
Exemplo n.º 22
0
 def test_best_k_features(self):
     from build import best_k_features, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     predictors = ['age', 'total_pages_visited', 'age^0.5', 'total_pages_visited^0.5', 'age^2',
                   'total_pages_visited^2', 'age^3', 'total_pages_visited^3', 'age_loglog',
                   'total_pages_visited_loglog', 'age_log', 'total_pages_visited_log']
     target = 'converted'
     new_res = best_k_features(res, predictors, target, 3)
     self.assertTrue(isinstance(new_res, list))
     self.assertTrue("total_pages_visited^3" in new_res)
     self.assertTrue("total_pages_visited^2" in new_res)
     self.assertTrue("total_pages_visited" in new_res)
Exemplo n.º 23
0
 def test_rf_rfe(self):
     from build import rf_rfe, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     predictors = [
         "seniority", "seniority^0.5", "seniority^2", "seniority^3",
         "seniority_log", "seniority_loglog"
     ]
     target = 'salary'
     new_res = rf_rfe(res, predictors, target)
     self.assertTrue(isinstance(new_res, list))
     self.assertTrue("seniority^3" in new_res)
     self.assertTrue("seniority^2" in new_res)
     self.assertTrue("seniority_loglog" in new_res)
Exemplo n.º 24
0
 def test_one_hot_encoder(self):
     from build import one_hot_encoder, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = one_hot_encoder(res, ["dept"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 25
0
 def test_skewness(self):
     from build import skewness, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = skewness(res, ["seniority", "salary"])
     self.assertTrue(isinstance(new_res, list))
Exemplo n.º 26
0
 def test_sqrt_transform(self):
     from build import sqrt_transform, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = sqrt_transform(res, ["seniority", "salary"])
     self.assertTrue(isinstance(new_res, list))
Exemplo n.º 27
0
 def test_log_log(self):
     from build import loglog, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = loglog(res, ["seniority"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))
 def test_csv_to_dataframe(self):
     from build import csv_to_dataframe
     res = csv_to_dataframe(filepath1)
     self.assertTrue(isinstance(res, pd.DataFrame))
     res = csv_to_dataframe(filepath2)
     self.assertTrue(isinstance(res, pd.DataFrame))
 def test_one_hot_encoder(self):
     from build import one_hot_encoder, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = one_hot_encoder(res, ["dept"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 30
0
 def test_dtype_category(self):
     from build import dtype_category, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = dtype_category(res, ["country", "new_user", "source", "converted"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 31
0
 def test_log_log(self):
     from build import loglog, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = loglog(res, ["age", "total_pages_visited"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))
Exemplo n.º 32
0
 def test_remove_inf_values(self):
     from build import remove_inf_values, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = remove_inf_values(res, "total_pages_visited_loglog")
     self.assertTrue(isinstance(new_res, pd.DataFrame))
 def test_dtype_category(self):
     from build import dtype_category, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = dtype_category(res, ["employee_id", "company_id", "dept", "join_date", "quit_date"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))
 def test_sqrt_transform(self):
     from build import sqrt_transform, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = sqrt_transform(res, ["seniority", "salary"])
     self.assertTrue(isinstance(new_res, list))
 def test_skewness(self):
     from build import skewness, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = skewness(res, ["seniority", "salary"])
     self.assertTrue(isinstance(new_res, list))
 def test_csv_to_dataframe(self):
     from build import csv_to_dataframe
     res = csv_to_dataframe(filepath1)
     self.assertTrue(isinstance(res, pd.DataFrame))
     res = csv_to_dataframe(filepath2)
     self.assertTrue(isinstance(res, pd.DataFrame))
 def test_centre_and_scale(self):
     from build import centre_and_scale, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = centre_and_scale(res, ["seniority", "salary"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))
 def test_label_encoder(self):
     from build import label_encoder, csv_to_dataframe
     res = csv_to_dataframe(filepath)
     new_res = label_encoder(res, ["company_id", "dept"])
     self.assertTrue(isinstance(new_res, pd.DataFrame))