Пример #1
0
    def test_with_numpy_array(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        X_numpy = X.values.copy()

        with warnings.catch_warnings(record=True) as w:
            imputer.fit(X)
            self.assertEqual(len(w), 1)
            self.assertEqual(
                "The columns ['NaNs' 'PINF' 'NINF'] did not have any finite values. Filling with zeros.",
                str(w[0].message))

        selected_X = imputer.transform(X)

        # re-initialize for new dicts
        imputer = PerColumnImputer()
        with warnings.catch_warnings(record=True) as w:
            imputer.fit(X_numpy)
            self.assertEqual(len(w), 1)
            self.assertEqual(
                "The columns [0 1 2] did not have any finite values. Filling with zeros.",
                str(w[0].message))

        selected_X_numpy = imputer.transform(X_numpy)

        npt.assert_array_equal(selected_X.values, selected_X_numpy.values)

        self.assertTrue(selected_X_numpy.shape, (1, 100))
Пример #2
0
    def test_standard_replacement_behavior(self):
        imputer = PerColumnImputer()

        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
    def test_standard_replacement_behavior(self):
        imputer = PerColumnImputer()

        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
Пример #4
0
    def test_partial_preset_col_to_PINF_given(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        col_to_max = {"a": 100}
        imputer = PerColumnImputer(col_to_PINF_repl_preset=col_to_max)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
Пример #5
0
    def test_with_numpy_array(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        X_numpy = X.values

        imputer.fit(X)
        selected_X = imputer.transform(X)

        # re-initialize for new dicts
        imputer = PerColumnImputer()
        imputer.fit(X_numpy)
        selected_X_numpy = imputer.transform(X_numpy)

        npt.assert_array_equal(selected_X.values, selected_X_numpy.values)

        self.assertTrue(selected_X_numpy.shape, (1, 100))
Пример #6
0
    def test_only_nans_and_infs(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        self.assertTrue((selected_X.values == 0).all())
    def test_partial_preset_col_to_PINF_given(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        col_to_max = {"a": 100}
        imputer = PerColumnImputer(col_to_PINF_repl_preset=col_to_max)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
    def test_with_numpy_array(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        X_numpy = X.values

        imputer.fit(X)
        selected_X = imputer.transform(X)

        #re-initialize for new dicts
        imputer = PerColumnImputer()
        imputer.fit(X_numpy)
        selected_X_numpy = imputer.transform(X_numpy)

        npt.assert_array_equal(selected_X.values, selected_X_numpy.values)

        self.assertTrue(selected_X_numpy.shape, (1, 100))
    def test_only_nans_and_infs(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        self.assertTrue((selected_X.values == 0).all())
Пример #10
0
    def test_only_subset_of_columns_given(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth_a = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]
        truth_b = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data, "b": data})
        true_X = pd.DataFrame({"a": truth_a, "b": truth_b})

        col_to_median = {"a": 0}
        imputer = PerColumnImputer(col_to_NAN_repl_preset=col_to_median)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
Пример #11
0
    def test_preset_has_higher_priority_than_fit(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]

        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        col_to_median = {"a": 0}
        imputer = PerColumnImputer(col_to_NAN_repl_preset=col_to_median)
        imputer.fit(X)

        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
Пример #12
0
    def test_only_subset_of_columns_given(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth_a = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]
        truth_b = [-100.0, 100.0, 1.0, 100.0, -100.0, 1.0, 1.0]
        X = pd.DataFrame({"a": data, "b":data})
        true_X = pd.DataFrame({"a":truth_a, "b":truth_b})

        col_to_median = {"a": 0}
        imputer = PerColumnImputer(col_to_NAN_repl_preset=col_to_median)

        imputer.fit(X)
        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X,true_X)
Пример #13
0
    def test_preset_has_higher_priority_than_fit(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        truth = [-100.0, 100.0, 0.0, 100.0, -100.0, 1.0, 1.0]

        X = pd.DataFrame({"a": data})
        true_X = pd.DataFrame({"a": truth})

        col_to_median = {"a": 0}
        imputer = PerColumnImputer(col_to_NAN_repl_preset=col_to_median)
        imputer.fit(X)

        selected_X = imputer.transform(X)

        pdt.assert_frame_equal(selected_X, true_X)
Пример #14
0
    def test_only_parameters_of_last_fit_count(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        data_2 = [np.NINF, np.PINF, np.nan, 10.0, -10.0, 3.0, 3.0]
        truth_a = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]
        truth_b = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]

        X = pd.DataFrame({"a": data, "b": data})
        X_2 = pd.DataFrame({"a": data_2, "b": data_2})
        true_X = pd.DataFrame({"a": truth_a, "b": truth_b})

        imputer = PerColumnImputer()

        imputer.fit(X)
        imputer.fit(X_2)

        selected_X = imputer.transform(X_2)

        pdt.assert_frame_equal(selected_X, true_X)
Пример #15
0
    def test_only_parameters_of_last_fit_count(self):
        data = [np.NINF, np.PINF, np.nan, 100.0, -100.0, 1.0, 1.0]
        data_2 = [np.NINF, np.PINF, np.nan, 10.0, -10.0, 3.0, 3.0]
        truth_a = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]
        truth_b = [-10.0, 10.0, 3.0, 10.0, -10.0, 3.0, 3.0]

        X = pd.DataFrame({"a": data, "b": data})
        X_2 = pd.DataFrame({"a": data_2, "b": data_2})
        true_X = pd.DataFrame({"a": truth_a, "b": truth_b})

        imputer = PerColumnImputer()

        imputer.fit(X)
        imputer.fit(X_2)

        selected_X = imputer.transform(X_2)

        pdt.assert_frame_equal(selected_X, true_X)
Пример #16
0
    def test_only_nans_and_infs(self):
        imputer = PerColumnImputer()

        X = pd.DataFrame(index=list(range(100)))

        X["NaNs"] = np.nan * np.ones(100)
        X["PINF"] = np.PINF * np.ones(100)
        X["NINF"] = np.NINF * np.ones(100)

        with warnings.catch_warnings(record=True) as w:
            imputer.fit(X)
            self.assertEqual(len(w), 1)
            self.assertEqual(
                "The columns ['NaNs' 'PINF' 'NINF'] did not have any finite values. Filling with zeros.",
                str(w[0].message))

        selected_X = imputer.transform(X)

        self.assertTrue((selected_X.values == 0).all())