def test_add_predictors_target_not_in_sync_raise_ValueError(self): stacker = LinearPredictorStacker(metric=metric_rmse) self.assertRaises(ValueError, stacker.add_predictors_by_filename, files=[ get_path('noid_OOF_predictions_1.csv'), get_path('noid_OOF_predictions_2.csv') ])
def test_standard_fit_regression_stacker_no_bagging_free_weights(self): """Test regression stacking with metric mean absolute error, no bagging""" stacker = LinearPredictorStacker( metric=metric_mae, algorithm=LinearPredictorStacker.STANDARD, normed_weights=False, max_iter=200, n_bags=1, colsample=1., subsample=1., verbose=2, eps=1e-3, seed=0) stacker.add_predictors_by_filename(files=[ get_path('noid_OOF_predictions_2.csv'), get_path('noid_OOF_predictions_3.csv'), get_path('noid_OOF_predictions_4.csv') ]) self.assertEqual(len(stacker.target), 1000) self.assertEqual(len(stacker.predictors), 1000) self.assertEqual(stacker.predictors.shape[1], 22) stacker.fit() print(stacker.get_weights()) self.assertAlmostEqual(stacker.score, 1185.0745095110819, places=4) self.assertAlmostEqual(stacker.mean_score, 1188.5725272161117, places=4)
def test_fit_swapping_regression_stacker_no_bagging_normed_weights(self): """Test regression stacking with metric mean absolute error, no bagging""" stacker = LinearPredictorStacker( metric=metric_mae, algorithm=LinearPredictorStacker.SWAPPING, normed_weights=True, max_iter=2000, n_bags=1, colsample=1., subsample=1., verbose=2, eps=1e-3, seed=11) stacker.add_predictors_by_filename(files=[ get_path('noid_OOF_predictions_2.csv'), get_path('noid_OOF_predictions_3.csv'), get_path('noid_OOF_predictions_4.csv') ]) self.assertEqual(len(stacker.target), 1000) self.assertEqual(len(stacker.predictors), 1000) self.assertEqual(stacker.predictors.shape[1], 22) stacker.fit() self.assertAlmostEqual(stacker.score, 1156.3066, places=4) self.assertAlmostEqual(stacker.mean_score, 1188.5725272161117, places=4)
def test_init_stacker(self): """Check LinearPredictorStacker's attributes initialization""" stacker = LinearPredictorStacker(metric=metric_rmse) self.assertEqual(stacker.metric, metric_rmse) self.assertEqual(stacker.predictors, None) self.assertEqual(stacker.target, None) self.assertEqual(stacker.weights, None) self.assertEqual(stacker.score, None) self.assertEqual(stacker.maximize, False) self.assertEqual(stacker.algo, LinearPredictorStacker.STANDARD) self.assertEqual(stacker.max_predictors, 1.0) self.assertEqual(stacker.max_samples, 1.0) self.assertEqual(stacker.n_bags, 1) self.assertEqual(stacker.max_iter, 10) # self.assertEqual(stacker.step, 1) self.assertEqual(stacker.verbose, 0) self.assertEqual(stacker.verb_round, 1) self.assertEqual(stacker.normed_weights, True) self.assertEqual(stacker.eps, 1e-5) self.assertEqual(stacker.seed, None)
def test_fit_with_predictors_and_target_exception_tests(self): data = pd.read_csv(get_path('noid_OOF_predictions_2.csv')) target = data.loss data.drop(['loss'], axis=1, inplace=True) stacker = LinearPredictorStacker( metric=metric_rmse, algorithm=LinearPredictorStacker.SWAPPING, max_iter=20, n_bags=1, colsample=1., subsample=1., verbose=0) # Predictors and target do not have same length self.assertRaises(ValueError, stacker.fit, predictors=data, target=target.head(100)) # Predictors contain null values data_null = data.copy() data_null[data_null < 1000] = np.nan self.assertRaises(ValueError, stacker.fit, predictors=data_null, target=target) # target contains null values target_null = target.copy() target_null[target_null < 1000] = np.nan self.assertRaises(ValueError, stacker.fit, predictors=data, target=target_null) # target contains more than one columns self.assertRaises(ValueError, stacker.fit, predictors=data, target=data)
def test_fit_regression_stacker_mae_ten_bags(self): """Test regression stacking with metric mean absolute error and 20 bags""" stacker = LinearPredictorStacker(metric=metric_mae, max_iter=10, n_bags=20, colsample=.8, subsample=.8, seed=24698537) stacker.add_predictors_by_filename(files=[ get_path('noid_OOF_predictions_2.csv'), get_path('noid_OOF_predictions_3.csv'), get_path('noid_OOF_predictions_4.csv') ]) self.assertEqual(len(stacker.target), 1000) self.assertEqual(len(stacker.predictors), 1000) self.assertEqual(stacker.predictors.shape[1], 22) stacker.fit() self.assertAlmostEqual(stacker.score, 1179.2406088734808, places=4) # Old version self.assertAlmostEqual(stacker.score, 1187.6537373418842, places=4) self.assertAlmostEqual(stacker.mean_score, 1188.5725272161117, places=4)
def test_fit_regression_stacker_rmse_no_bagging_step_decrease(self): """Test regression stacking with metric root mean squared error, no bagging""" stacker = LinearPredictorStacker(metric=metric_rmse, max_iter=250, n_bags=1, colsample=1., subsample=1., verbose=0) stacker.add_predictors_by_filename(files=[ get_path('noid_OOF_predictions_2.csv'), get_path('noid_OOF_predictions_3.csv'), get_path('noid_OOF_predictions_4.csv') ]) self.assertEqual(len(stacker.target), 1000) self.assertEqual(len(stacker.predictors), 1000) self.assertEqual(stacker.predictors.shape[1], 22) stacker.fit() self.assertAlmostEqual(stacker.score, 1972.574584232116, places=4) # Old version self.assertAlmostEqual(stacker.score, 2030.5021340510675, places=4) self.assertAlmostEqual(stacker.mean_score, 2032.2110846499691, places=4)
def test_fit_regression_stacker_mae_no_bagging(self): """Test regression stacking with metric mean absolute error, no bagging""" stacker = LinearPredictorStacker(metric=metric_mae, max_iter=20, n_bags=1, colsample=1., subsample=1., verbose=0) stacker.add_predictors_by_filename(files=[ get_path('noid_OOF_predictions_2.csv'), get_path('noid_OOF_predictions_3.csv'), get_path('noid_OOF_predictions_4.csv') ]) self.assertEqual(len(stacker.target), 1000) self.assertEqual(len(stacker.predictors), 1000) self.assertEqual(stacker.predictors.shape[1], 22) stacker.fit() self.assertAlmostEqual(stacker.score, 1176.295406, places=4) # Old version self.assertAlmostEqual(stacker.score, 1187.1916616561432, places=4) self.assertAlmostEqual(stacker.mean_score, 1188.5725272161117, places=4)
def test_swapping_fit_regression_stacker_rmse_no_bagging(self): """Test regression stacking with metric root mean squared error, no bagging""" stacker = LinearPredictorStacker( metric=metric_rmse, algorithm=LinearPredictorStacker.SWAPPING, max_iter=20, n_bags=1, colsample=1., subsample=1., verbose=0) stacker.add_predictors_by_filename(files=[ get_path('noid_OOF_predictions_2.csv'), get_path('noid_OOF_predictions_3.csv'), get_path('noid_OOF_predictions_4.csv') ]) self.assertEqual(len(stacker.target), 1000) self.assertEqual(len(stacker.predictors), 1000) self.assertEqual(stacker.predictors.shape[1], 22) stacker.fit() self.assertAlmostEqual(stacker.score, 2008.0897782026507, places=4) self.assertAlmostEqual(stacker.mean_score, 2032.2110846499691, places=4)
def test_regression_r2_score_maximization(self): stacker = LinearPredictorStacker( metric=r2_score, maximize=True, algorithm=LinearPredictorStacker.STANDARD, normed_weights=False, max_iter=200, n_bags=1, colsample=1., subsample=1., verbose=2, eps=1e-4, seed=0) stacker.add_predictors_by_filename(files=[ get_path('noid_OOF_predictions_2.csv'), get_path('noid_OOF_predictions_3.csv'), get_path('noid_OOF_predictions_4.csv') ]) self.assertEqual(len(stacker.target), 1000) self.assertEqual(len(stacker.predictors), 1000) self.assertEqual(stacker.predictors.shape[1], 22) stacker.fit() self.assertAlmostEqual(stacker.score, 0.5478882, places=4) self.assertAlmostEqual(stacker.mean_score, 0.537271112149033, places=4)
def test_add_predictors_not_a_string_raise_TypeError(self): stacker = LinearPredictorStacker(metric=metric_rmse) self.assertRaises(TypeError, stacker.add_predictors_by_filename, files=[1, 2])
def test_add_predictors_file_error_raise_ValueError(self): stacker = LinearPredictorStacker(metric=metric_rmse) self.assertRaises(ValueError, stacker.add_predictors_by_filename, files=[get_path('does_not_exist.csv')])
def test_fit_without_predictors_raise_ValueError(self): stacker = LinearPredictorStacker(metric=metric_rmse) self.assertRaises(ValueError, stacker.fit)