Exemplo n.º 1
0
    def test_cife(self):
        # Given
        n_cols = 10
        n_rows = 100
        model = LogisticRegression()

        # When
        mg = MatrixGenerator()
        X, y, costs = mg.generate(n_rows=n_rows,
                                  n_basic_cols=n_cols,
                                  noise_sigmas=[2, 3],
                                  seed=2)
        lamb = 1
        beta = 0.5

        dvs = DiffVariableSelector()
        dvs.fit(data=X,
                target_variable=y,
                costs=costs,
                lamb=lamb,
                j_criterion_func='cife',
                budget=5,
                stop_budget=True,
                beta=beta)

        # Then
        self.assertGreater(len(costs), len(dvs.variables_selected_order))
Exemplo n.º 2
0
    def test_cife(self):
        # Given
        n_cols = 20
        n_rows = 1000
        model = LogisticRegression()

        # When
        mg = MatrixGenerator()
        X, y, costs = mg.generate(n_rows=n_rows, n_cols=n_cols, seed=2)
        lamb = 1
        beta = 0.5

        dvs = DiffVariableSelector()
        dvs.fit(data=X,
                target_variable=y,
                costs=costs,
                lamb=lamb,
                j_criterion_func='cife',
                beta=beta)
        dvs.scoreCV(model=model, cv=5)
        dvs.plot_scores(compare_no_cost_method=True, model=model)

        # Then
        self.assertIsInstance(dvs.variables_selected_order, list)
        self.assertEqual(len(dvs.variables_selected_order), len(costs))
        self.assertAlmostEqual(sum(costs),
                               sum(dvs.cost_variables_selected_order))
Exemplo n.º 3
0
    def test_theoretical_output(self):
        integer_matrix = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 2], [0, 1, 3],
                                   [1, 1, 5]])
        diverse_target = np.array([0, 0, 0, 0, 1])
        costs = [1, 1, 1]
        lamb = 1

        dvs = DiffVariableSelector()
        dvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                lamb=lamb,
                j_criterion_func='mim')

        self.assertEqual(dvs.variables_selected_order[0], 2)
Exemplo n.º 4
0
    def test_pandas_input(self):
        integer_matrix = pd.DataFrame(np.random.randint(0, 10, (100, 3)),
                                      columns=['AA', 'BB', 'CC'])
        diverse_target = pd.Series(np.random.randint(0, 2, (100)))
        costs = {'AA': 10, 'BB': 1, 'CC': 1.5}
        lamb = 1

        dvs = DiffVariableSelector()
        dvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                lamb=lamb,
                j_criterion_func='mim')

        self.assertIsInstance(dvs.variables_selected_order, list)
        self.assertEqual(len(dvs.variables_selected_order), len(costs))
Exemplo n.º 5
0
    def test_stop_budget(self):
        integer_matrix = pd.DataFrame(np.random.randint(0, 10, (100, 3)),
                                      columns=['AA', 'BB', 'CC'])
        diverse_target = pd.Series(np.random.randint(0, 2, (100)))
        costs = {'AA': 2, 'BB': 1.1, 'CC': 1.5}
        lamb = 1

        dvs = DiffVariableSelector()
        dvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                lamb=lamb,
                j_criterion_func='mim',
                budget=2,
                stop_budget=True)
        self.assertGreater(2, sum(dvs.cost_variables_selected_order))
        self.assertGreaterEqual(2, len(dvs.variables_selected_order))
Exemplo n.º 6
0
    def test_numpy_input(self):
        integer_matrix = np.random.randint(0, 10, (100, 10))
        diverse_target = np.random.randint(0, 10, (100))
        costs = [1.76, 0.19, 0.36, 0.96, 0.41, 0.17, 0.36, 0.75, 0.79, 1.38]
        lamb = 1

        dvs = DiffVariableSelector()
        dvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                lamb=lamb,
                j_criterion_func='mim')

        self.assertIsInstance(dvs.variables_selected_order, list)
        self.assertEqual(len(dvs.variables_selected_order), len(costs))
        self.assertAlmostEqual(sum(costs),
                               sum(dvs.cost_variables_selected_order))
Exemplo n.º 7
0
    def test_plot_comparision(self):
        mg = MatrixGenerator()
        X, y, costs = mg.generate(n_basic_cols=10, noise_sigmas=[0.1, 1])
        lamb = 1

        dvs = DiffVariableSelector()
        dvs.fit(data=X,
                target_variable=y,
                costs=costs,
                lamb=lamb,
                j_criterion_func='mim')

        model = LogisticRegression()
        dvs.score(model, scoring_function=roc_auc_score)
        dvs.plot_scores(compare_no_cost_method=True, budget=1, model=model)
Exemplo n.º 8
0
    def test_plot_without_comparision(self):
        integer_matrix = np.random.randint(0, 10, (100, 10))
        diverse_target = np.random.randint(0, 2, (100))
        costs = [1.76, 0.19, 0.36, 0.96, 0.41, 0.17, 0.36, 0.75, 0.79, 1.38]
        lamb = 1

        dvs = DiffVariableSelector()
        dvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                lamb=lamb,
                j_criterion_func='mim')

        model = LogisticRegression()
        dvs.score(model, scoring_function=roc_auc_score)
        dvs.plot_scores(budget=1)
Exemplo n.º 9
0
    def test_score(self):
        integer_matrix = np.random.randint(0, 10, (100, 10))
        diverse_target = np.random.randint(0, 2, (100))
        costs = [1.76, 0.19, -0.36, 0.96, 0.41, 0.17, -0.36, 0.75, 0.79, -1.38]
        lamb = 1

        dvs = DiffVariableSelector()
        dvs.fit(data=integer_matrix,
                target_variable=diverse_target,
                costs=costs,
                lamb=lamb,
                j_criterion_func='mim')

        model = LogisticRegression()
        dvs.score(model, scoring_function=roc_auc_score)

        self.assertEqual(len(dvs.total_scores), len(costs))
Exemplo n.º 10
0
 def test_run_score_before_fit(self):
     dvs = DiffVariableSelector()
     model = LogisticRegression()
     with self.assertRaises(AssertionError):
         dvs.score(model, scoring_function=roc_auc_score)