def test_proflogit_patsy_demo_data(self): X_train, y_train = load_data("../data/patsy_demo_train.dat", "y") X_test, y_test = load_data("../data/patsy_demo_test.dat", 0) pfl = ProfLogit(rga_kws={"niter": 5, "random_state": 42}, ) pfl.fit(X_train, y_train) self.assertEqual(pfl.formula, "c + standardize(x)") self.assertTrue("Intercept" in pfl.design_info.column_names) self.assertTrue(pfl.intercept) empc = pfl.score(X_test, y_test) self.assertAlmostEqual(empc, 25.5035418741)
def test_proflogit_nfev_correct_large(self): X, y = load_data("../data/patsy_demo_train.dat", "y") n_fev = 43 pfl = ProfLogit(rga_kws={ "nfev": n_fev, "disp": False, "random_state": 42 }, ) pfl.fit(X, y) self.assertEqual(pfl.rga.res.nfev, n_fev) self.assertEqual(pfl.formula, "c + standardize(x)") self.assertTrue("Intercept" in pfl.design_info.column_names) self.assertTrue(pfl.intercept)
def test_default_bounds(self): data = self.data ynm = "Y" X = {k: v for k, v in data.items() if k != ynm} y = data[ynm] for t in [(-3, 3, 28.0), (-5, 5, 28.0)]: pfl = ProfLogit( rga_kws={ "nfev": 500, "random_state": 2017 }, default_bounds=(t[0], t[1]), ) pfl.fit(X, y) empc = pfl.score(X, y) self.assertAlmostEqual(empc, t[2])
def test_proflogit_on_class_data(self): data = self.data ynm = "Y" X = {k: v for k, v in data.items() if k != ynm} y = data[ynm] form = " + ".join(["standardize({})".format(k) for k in X if k != ynm]) pfl = ProfLogit(rga_kws={ "nfev": 500, "disp": False, "random_state": 2017 }, ) pfl.fit(X, y) self.assertEqual(pfl.formula, form) self.assertTrue("Intercept" in pfl.design_info.column_names) self.assertTrue(pfl.intercept) empc = pfl.score(X, y) self.assertAlmostEqual(empc, 28.0)
def test_proflogit_no_intercept_through_formula(self): ynm = "y" X_train, y_train = load_data("../data/patsy_demo_train.dat", ynm) X_test, y_test = load_data("../data/patsy_demo_test.dat", 0) pfl = ProfLogit( formula_like="c + standardize(x) - 1", rga_kws={ "niter": 5, "disp": False, "random_state": 42 }, ) pfl.fit(X_train, y_train) self.assertEqual(pfl.formula, "c + standardize(x) - 1") self.assertFalse("Intercept" in pfl.design_info.column_names) self.assertFalse(pfl.intercept) empc = pfl.score(X_test, y_test) self.assertAlmostEqual(empc, 25.5035418741)
def test_proflogit_reg_kws_change_only_lambda(self): data = self.data ynm = "Y" X = {k: v for k, v in data.items() if k != ynm} y = data[ynm] form = " + ".join(["standardize({})".format(k) for k in X if k != ynm]) pfl = ProfLogit( reg_kws={"lambda": 0.01}, # Arbitrary value, needs to be tuned rga_kws={ "nfev": 500, "random_state": 2017 }, ) pfl.fit(X, y) self.assertEqual(pfl.formula, form) self.assertTrue("Intercept" in pfl.design_info.column_names) self.assertTrue(pfl.intercept) empc = pfl.score(X, y) self.assertAlmostEqual(empc, 28.0)
def test_proflogit_patsy(self): yix = 0 X, y = load_data("../data/two-predictors.dat", yix, False) form = " + ".join([ "standardize(f{})".format(cix) for cix in range(len(X) + 1) if cix != yix ]) pfl = ProfLogit(rga_kws={ "niter": 5, "disp": False, "random_state": 42 }, ) pfl.fit(X, y) self.assertEqual(pfl.formula, form) self.assertTrue("Intercept" in pfl.design_info.column_names) self.assertTrue(pfl.intercept) y_score = pfl.predict_proba(X) empc1 = EMPChurn(y, y_score).empc() empc2 = pfl.score(X, y) self.assertAlmostEqual(empc1, empc2)
def test_proflogit_empc_kws(self): data = self.data ynm = "Y" X = {k: v for k, v in data.items() if k != ynm} y = data[ynm] pfl = ProfLogit( empc_kws={ "alpha": 6, # Alpha parameter of unimodal beta (alpha > 1) "beta": 14, # Beta parameter of unimodal beta (beta > 1) "clv": 200, # Constant CLV per retained customer (clv > d) "d": 10, # Constant cost of retention offer (d > 0) "f": 2, # Constant cost of contact (f > 0) }, rga_kws={ "nfev": 500, "random_state": 2017 }, ) pfl.fit(X, y) empc = pfl.score(X, y) self.assertAlmostEqual(empc, 27.500000004757432)
def test_bounds(self): data = self.data ynm = "Y" X = {k: v for k, v in data.items() if k != ynm} y = data[ynm] # n_params = intercept + #n_num_variables + # #(n_levels - 1) per cat_variable # For data, n_params = 4 b = [ (-3, 3), # Intercept (-1, 1), # X1 (-3, 4), # X2 (-9, 0), # X3 ] pfl = ProfLogit(rga_kws={ "bounds": b, "nfev": 500, "random_state": 2017 }, ) pfl.fit(X, y) empc = pfl.score(X, y) self.assertAlmostEqual(empc, 25.8003164898)
def test_proflogit_reg_kws(self): data = self.data ynm = "Y" X = {k: v for k, v in data.items() if k != ynm} y = data[ynm] form = " + ".join(["standardize({})".format(k) for k in X if k != ynm]) pfl = ProfLogit( reg_kws={ "lambda": 0.01, # Arbitrary value, needs to be tuned "alpha": 1.0, # By default, applying lasso penalty "soft-thd": True, # Apply soft-thresholding }, rga_kws={ "nfev": 500, "random_state": 2017 }, ) pfl.fit(X, y) self.assertEqual(pfl.formula, form) self.assertTrue("Intercept" in pfl.design_info.column_names) self.assertTrue(pfl.intercept) empc = pfl.score(X, y) self.assertAlmostEqual(empc, 28.0)