示例#1
0
    def test_rank1d_shapiro(self):
        """
        Test Rank1D using shapiro metric
        """
        X, _ = load_energy(return_dataset=True).to_numpy()
        oz = Rank1D(algorithm="shapiro")
        npt.assert_array_equal(oz.fit_transform(X), X)

        # Check Ranking
        expected = np.array(
            [
                0.93340671,
                0.94967198,
                0.92689574,
                0.7459445,
                0.63657606,
                0.85603625,
                0.84349269,
                0.91551381,
            ]
        )

        assert hasattr(oz, "ranks_")
        assert oz.ranks_.shape == (X.shape[1],)
        npt.assert_array_almost_equal(oz.ranks_, expected)

        # Image similarity comparison
        oz.finalize()
        self.assert_images_similar(oz)
示例#2
0
    def test_kendalltau(self):
        """
        Test results returned match expectations
        """
        X, _ = load_energy(return_dataset=True).to_numpy()

        expected = np.array([
            [1.0, -1.0, -0.2724275, -0.7361443, 0.7385489, 0.0, 0.0, 0.0],
            [-1.0, 1.0, 0.2724275, 0.7361443, -0.7385489, 0.0, 0.0, 0.0],
            [
                -0.2724275, 0.2724275, 1.0, -0.15192004, 0.19528337, 0.0, 0.0,
                0.0
            ],
            [
                -0.73614431, 0.73614431, -0.15192004, 1.0, -0.87518995, 0.0,
                0.0, 0.0
            ],
            [
                0.73854895, -0.73854895, 0.19528337, -0.87518995, 1.0, 0.0,
                0.0, 0.0
            ],
            [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.15430335],
            [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15430335, 1.0],
        ])
        actual = kendalltau(X)
        npt.assert_almost_equal(expected, actual)
示例#3
0
    def test_rank2d_kendalltau(self):
        """
        Test Rank2D using kendalltau metric
        """
        X, _ = load_energy(return_dataset=True).to_numpy()
        oz = Rank2D(algorithm="kendalltau")
        npt.assert_array_equal(oz.fit_transform(X), X)

        # Check Ranking
        expected = np.array(
            [
                [1.0, -1.0, -0.2724275, -0.73614431, 0.73854895, 0.0, 0.0, 0.0],
                [-1.0, 1.0, 0.2724275, 0.73614431, -0.73854895, 0.0, 0.0, 0.0],
                [-0.2724275, 0.2724275, 1.0, -0.15192004, 0.19528337, 0.0, 0.0, 0.0],
                [-0.73614431, 0.73614431, -0.15192004, 1.0, -0.87518995, 0.0, 0.0, 0.0],
                [0.73854895, -0.73854895, 0.19528337, -0.87518995, 1.0, 0.0, 0.0, 0.0],
                [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.15430335],
                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15430335, 1.0],
            ]
        )

        assert hasattr(oz, "ranks_")
        assert oz.ranks_.shape == (X.shape[1], X.shape[1])
        npt.assert_array_almost_equal(oz.ranks_, expected)

        # Image similarity comparision
        oz.finalize()
        self.assert_images_similar(oz, tol=0.1)
示例#4
0
    def test_rank2d_spearman(self):
        """
        Test Rank2D using spearman metric
        """
        X, _ = load_energy(return_dataset=True).to_numpy()
        oz = Rank2D(algorithm="spearman")
        npt.assert_array_equal(oz.fit_transform(X), X)

        # Check Ranking
        expected = np.array(
            [
                [1.0, -1.0, -0.25580533, -0.8708862, 0.86904819, 0.0, 0.0, 0.0],
                [-1.0, 1.0, 0.25580533, 0.8708862, -0.86904819, 0.0, 0.0, 0.0],
                [-0.25580533, 0.25580533, 1.0, -0.19345677, 0.22076336, 0.0, 0.0, 0.0],
                [-0.8708862, 0.8708862, -0.19345677, 1.0, -0.93704257, 0.0, 0.0, 0.0],
                [0.86904819, -0.86904819, 0.22076336, -0.93704257, 1.0, 0.0, 0.0, 0.0],
                [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0],
                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.18759162],
                [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18759162, 1.0],
            ]
        )

        assert hasattr(oz, "ranks_")
        assert oz.ranks_.shape == (X.shape[1], X.shape[1])
        npt.assert_array_almost_equal(oz.ranks_, expected)

        # Image similarity comparision
        oz.finalize()
        self.assert_images_similar(oz, tol=0.1)
示例#5
0
 def test_rank2d_unknown_algorithm(self):
     """
     Test that an error is raised for Rank2D with an unknown algorithm
     """
     X, _ = load_energy()
     msg = "'oscar' is unrecognized ranking method"
     with pytest.raises(YellowbrickValueError, match=msg):
         Rank2D(algorithm="Oscar").transform(X)
示例#6
0
 def test_rankdbase_unknown_algorithm(self):
     """
     Assert that unknown algorithms raise an exception
     """
     X, _ = load_energy(return_dataset=True).to_numpy()
     with pytest.raises(YellowbrickValueError,
                        match=".* is unrecognized ranking method") as e:
         oz = RankDBase(algorithm="unknown")
         oz.fit_transform(X)
         assert str(e.value) == "'unknown' is unrecognized ranking method"
示例#7
0
    def test_kendalltau_shape(self):
        """
        Assert that a square correlation matrix is returned
        """
        X, _ = load_energy(return_dataset=True).to_numpy()
        corr = kendalltau(X)
        assert corr.shape[0] == corr.shape[1]

        for (i, j), val in np.ndenumerate(corr):
            assert corr[j][i] == pytest.approx(val)
示例#8
0
    def test_rank1d_horizontal(self):
        """
        Test Rank1D using horizontal orientation
        """
        X, _ = load_energy(return_dataset=True).to_numpy()
        oz = Rank1D(orient="h")
        npt.assert_array_equal(oz.fit_transform(X), X)

        # Image similarity comparison
        oz.finalize()
        self.assert_images_similar(oz)
示例#9
0
def validation():
    X, y = load_energy()
    oz = ValidationCurve(
        DecisionTreeRegressor(),
        param_name="max_depth",
        param_range=np.arange(1, 11),
        cv=10,
        scoring="r2",
        ax=newfig(),
    )
    oz.fit(X, y)
    savefig(oz, "validation_curve")
    def test_quick_method(self):
        """
        Test the quick method producing a valid visualization
        """
        X, y = load_energy(return_dataset=True).to_numpy()

        visualizer = alphas(LassoCV(random_state=0),
                            X,
                            y,
                            is_fitted=False,
                            show=False)
        assert isinstance(visualizer, AlphaSelection)
        self.assert_images_similar(visualizer)
示例#11
0
    def test_quick_method_manual(self):
        """
        Test the manual alphas quick method producing a valid visualization
        """
        X, y = load_energy(return_dataset=True).to_numpy()

        visualizer = manual_alphas(ElasticNet(random_state=0),
                                   X,
                                   y,
                                   cv=3,
                                   is_fitted=False,
                                   show=False)
        assert isinstance(visualizer, ManualAlphaSelection)
        # Python 3.6 Travis images not similar (RMS 0.024)
        self.assert_images_similar(visualizer, tol=0.5)
    def test_residuals_plot_numpy(self):
        """
        Test NumPy real world dataset with image similarity on Lasso
        """
        _, ax = plt.subplots()

        # Load the occupancy dataset from fixtures
        data = load_energy(return_dataset=True)
        X, y = data.to_numpy()

        # Create train/test splits
        splits = tts(X, y, test_size=0.2, random_state=231)
        X_train, X_test, y_train, y_test = splits

        visualizer = ResidualsPlot(Lasso(random_state=44), ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()

        self.assert_images_similar(visualizer, tol=1.5)
    def test_prediction_error_numpy(self):
        """
        Test NumPy real world dataset with image similarity on Ridge
        """
        _, ax = plt.subplots()

        # Load the occupancy dataset from fixtures
        data = load_energy(return_dataset=True)
        X, y = data.to_numpy()

        # Create train/test splits
        splits = tts(X, y, test_size=0.2, random_state=8873)
        X_train, X_test, y_train, y_test = splits

        visualizer = PredictionError(Ridge(random_state=22), ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()

        self.assert_images_similar(visualizer, tol=1, remove_legend=True)
    def test_residuals_with_fitted(self):
        """
        Test that ResidualsPlot properly handles an already-fitted model
        """
        X, y = load_energy(return_dataset=True).to_numpy()

        model = Ridge().fit(X, y)

        with mock.patch.object(model, "fit") as mockfit:
            oz = ResidualsPlot(model)
            oz.fit(X, y)
            mockfit.assert_not_called()

        with mock.patch.object(model, "fit") as mockfit:
            oz = ResidualsPlot(model, is_fitted=True)
            oz.fit(X, y)
            mockfit.assert_not_called()

        with mock.patch.object(model, "fit") as mockfit:
            oz = ResidualsPlot(model, is_fitted=False)
            oz.fit(X, y)
            mockfit.assert_called_once_with(X, y)
示例#15
0
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
import pandas as pd

from yellowbrick.datasets import load_concrete
from yellowbrick.regressor import prediction_error

from sklearn.linear_model import LassoCV
from yellowbrick.regressor.alphas import alphas

from yellowbrick.datasets import load_energy

# Load dataset
X, y = load_energy() # make our dataset read as x and y axis values somehow and replace this dataset with ours
# X = []                         # makes a list
# y = []                         # makes a list
# data = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\cleaned_encoded_COVID_Data_Copy.csv')
# for row in data:
#     X.append(row[1])     # selects data from the ith row
#     y.append(row[2])     # selects data from the ith row

# Use the quick method and immediately show the figure
alphas(LassoCV(random_state=0), X, y)

# Load a regression dataset
X, y = load_concrete()  # same as above

#X = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\training_data.csv')
#y = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\test_data.csv')

# X_train = []                         # makes a list
示例#16
0
def cvscores():
    X, y = load_energy()
    oz = CVScores(Ridge(), scoring="r2", cv=10, ax=newfig())
    oz.fit(X, y)
    savefig(oz, "cv_scores")
示例#17
0
def learning():
    X, y = load_energy()
    sizes = np.linspace(0.3, 1.0, 10)
    oz = LearningCurve(RidgeCV(), train_sizes=sizes, scoring="r2", ax=newfig())
    oz.fit(X, y)
    savefig(oz, "learning_curve")
示例#18
0
    def test_rank2d_pearson(self):
        """
        Test Rank2D using pearson metric
        """
        X, _ = load_energy(return_dataset=True).to_numpy()
        oz = Rank2D(algorithm="pearson")
        npt.assert_array_equal(oz.fit_transform(X), X)

        # Check Ranking
        expected = np.array(
            [
                [
                    1.00000000e00,
                    -9.91901462e-01,
                    -2.03781680e-01,
                    -8.68823408e-01,
                    8.27747317e-01,
                    0.00000000e00,
                    1.11706815e-16,
                    -1.12935670e-16,
                ],
                [
                    -9.91901462e-01,
                    1.00000000e00,
                    1.95501633e-01,
                    8.80719517e-01,
                    -8.58147673e-01,
                    0.00000000e00,
                    -2.26567708e-16,
                    -3.55861251e-16,
                ],
                [
                    -2.03781680e-01,
                    1.95501633e-01,
                    1.00000000e00,
                    -2.92316466e-01,
                    2.80975743e-01,
                    0.00000000e00,
                    7.87010445e-18,
                    0.00000000e00,
                ],
                [
                    -8.68823408e-01,
                    8.80719517e-01,
                    -2.92316466e-01,
                    1.00000000e00,
                    -9.72512237e-01,
                    0.00000000e00,
                    -3.27553310e-16,
                    2.20057668e-16,
                ],
                [
                    8.27747317e-01,
                    -8.58147673e-01,
                    2.80975743e-01,
                    -9.72512237e-01,
                    1.00000000e00,
                    0.00000000e00,
                    -1.24094525e-18,
                    0.00000000e00,
                ],
                [
                    0.00000000e00,
                    0.00000000e00,
                    0.00000000e00,
                    0.00000000e00,
                    0.00000000e00,
                    1.00000000e00,
                    -2.42798319e-19,
                    0.00000000e00,
                ],
                [
                    1.11706815e-16,
                    -2.26567708e-16,
                    7.87010445e-18,
                    -3.27553310e-16,
                    -1.24094525e-18,
                    -2.42798319e-19,
                    1.00000000e00,
                    2.12964221e-01,
                ],
                [
                    -1.12935670e-16,
                    -3.55861251e-16,
                    0.00000000e00,
                    2.20057668e-16,
                    0.00000000e00,
                    0.00000000e00,
                    2.12964221e-01,
                    1.00000000e00,
                ],
            ]
        )

        assert hasattr(oz, "ranks_")
        assert oz.ranks_.shape == (X.shape[1], X.shape[1])
        npt.assert_array_almost_equal(oz.ranks_, expected)

        # Image similarity comparision
        oz.finalize()
        # Travis Python 3.6 images not close (RMS 0.112)
        self.assert_images_similar(oz, tol=0.5)
示例#19
0
    def test_rank2d_covariance(self):
        """
        Test Rank2D using covariance metric
        """
        X, _ = load_energy(return_dataset=True).to_numpy()
        oz = Rank2D(algorithm="covariance")
        npt.assert_array_equal(oz.fit_transform(X), X)

        # Check Ranking
        expected = np.array(
            [
                [
                    1.11888744e-02,
                    -9.24206867e00,
                    -9.40391134e-01,
                    -4.15083877e00,
                    1.53324641e-01,
                    0.00000000e00,
                    1.57414282e-18,
                    -1.85278419e-17,
                ],
                [
                    -9.24206867e00,
                    7.75916384e03,
                    7.51290743e02,
                    3.50393655e03,
                    -1.32370274e02,
                    0.00000000e00,
                    -2.65874531e-15,
                    -4.86170571e-14,
                ],
                [
                    -9.40391134e-01,
                    7.51290743e02,
                    1.90326988e03,
                    -5.75989570e02,
                    2.14654498e01,
                    0.00000000e00,
                    4.57406096e-17,
                    0.00000000e00,
                ],
                [
                    -4.15083877e00,
                    3.50393655e03,
                    -5.75989570e02,
                    2.03996306e03,
                    -7.69178618e01,
                    0.00000000e00,
                    -1.97089918e-15,
                    1.54151644e-14,
                ],
                [
                    1.53324641e-01,
                    -1.32370274e02,
                    2.14654498e01,
                    -7.69178618e01,
                    3.06649283e00,
                    0.00000000e00,
                    -2.89497529e-19,
                    0.00000000e00,
                ],
                [
                    0.00000000e00,
                    0.00000000e00,
                    0.00000000e00,
                    0.00000000e00,
                    0.00000000e00,
                    1.25162973e00,
                    -3.61871912e-20,
                    0.00000000e00,
                ],
                [
                    1.57414282e-18,
                    -2.65874531e-15,
                    4.57406096e-17,
                    -1.97089918e-15,
                    -2.89497529e-19,
                    -3.61871912e-20,
                    1.77477184e-02,
                    4.40026076e-02,
                ],
                [
                    -1.85278419e-17,
                    -4.86170571e-14,
                    0.00000000e00,
                    1.54151644e-14,
                    0.00000000e00,
                    0.00000000e00,
                    4.40026076e-02,
                    2.40547588e00,
                ],
            ]
        )

        assert hasattr(oz, "ranks_")
        assert oz.ranks_.shape == (X.shape[1], X.shape[1])
        npt.assert_array_almost_equal(oz.ranks_, expected, decimal=5)

        # Image similarity comparision
        oz.finalize()
        self.assert_images_similar(oz, tol=0.1)