def test_clean_data_X_only():
    """Test that nan-containing X rows are removed without y."""
    X = np.array([[1, 2, np.nan], [4, 5, 6], [np.nan, np.nan, np.nan]])

    expected = np.array([[4, 5, 6]])
    observed = filter_missing(X)

    np.testing.assert_array_equal(expected, observed)
def test_clean_data_X_only_no_nans():
    """Test that an array with no nulls is returned intact."""
    X = np.array([
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9],
    ])

    observed = filter_missing(X)
    np.testing.assert_array_equal(X, observed)
def test_clean_data_X_only_no_nans():
    """Test that an array with no nulls is returned intact."""
    X = np.array([
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9],
    ])

    observed = filter_missing(X)
    np.testing.assert_array_equal(X, observed)
def test_clean_data_clean_X_dirty_y():
    """Test that nan-containing X, y rows are removed when y contains nans."""
    X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
    y = np.array([np.nan, 44, np.nan, 66])

    expected_X = np.array([[4, 5, 6], [10, 11, 12]])
    expected_y = np.array([44, 66])
    observed_X, observed_y = filter_missing(X, y)

    np.testing.assert_array_equal(expected_X, observed_X)
    np.testing.assert_array_equal(expected_y, observed_y)
def test_clean_data_dirty_X_clean_y():
    """Test that nan-containing X, y rows are removed when X contains nans."""
    X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, np.nan], [np.nan, np.nan, np.nan]])
    y = np.array([33, 44, 55, 66])

    expected_X = np.array([[1, 2, 3], [4, 5, 6]])
    expected_y = np.array([33, 44])
    observed_X, observed_y = filter_missing(X, y)

    np.testing.assert_array_equal(expected_X, observed_X)
    np.testing.assert_array_equal(expected_y, observed_y)
def test_clean_data_X_only():
    """Test that nan-containing X rows are removed without y."""
    X = np.array([
        [1, 2, np.nan],
        [4, 5, 6],
        [np.nan, np.nan, np.nan],
    ])

    expected = np.array([
        [4, 5, 6]
    ])
    observed = filter_missing(X)

    np.testing.assert_array_equal(expected, observed)
def test_clean_data_dirty_X_dirty_y():
    """Test that nan-containing X, y rows are removed when both contain nans."""
    X = np.array([
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, np.nan],
        [np.nan, np.nan, np.nan],
    ])
    y = np.array([33, np.nan, 44, np.nan])

    expected_X = np.array([
        [1, 2, 3],
    ])
    expected_y = np.array([33])
    observed_X, observed_y = filter_missing(X, y)

    np.testing.assert_array_equal(expected_X, observed_X)
    np.testing.assert_array_equal(expected_y, observed_y)
def test_clean_data_clean_X_dirty_y():
    """Test that nan-containing X, y rows are removed when y contains nans."""
    X = np.array([
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9],
        [10, 11, 12]
    ])
    y = np.array([np.nan, 44, np.nan, 66])

    expected_X = np.array([
        [4, 5, 6],
        [10, 11, 12]
    ])
    expected_y = np.array([44, 66])
    observed_X, observed_y = filter_missing(X, y)

    np.testing.assert_array_equal(expected_X, observed_X)
    np.testing.assert_array_equal(expected_y, observed_y)
示例#9
0
    def draw(self, X, y, **kwargs):
        """
        Called from the fit method, this method creates the radviz canvas and
        draws each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # Convert from dataframe
        if is_dataframe(X):
            X = X.values

        # Clean out nans and warn that the user they aren't plotted
        nan_warnings.warn_if_nans_exist(X)
        X, y = nan_warnings.filter_missing(X, y)

        # Get the shape of the data
        nrows, ncols = X.shape

        # Set the axes limits
        self.ax.set_xlim([-1, 1])
        self.ax.set_ylim([-1, 1])

        # Create a data structure to hold scatter plot representations
        to_plot = {label: [[], []] for label in self.classes_}

        # Compute the arcs around the circumference for each feature axis
        # TODO: make this an independent function for override
        s = np.array([
            (np.cos(t), np.sin(t))
            for t in [2.0 * np.pi * (i / float(ncols)) for i in range(ncols)]
        ])

        # Compute the locations of the scatter plot for each class
        # Normalize the data first to plot along the 0, 1 axis
        for i, row in enumerate(self.normalize(X)):
            row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
            xy = (s * row_).sum(axis=0) / row.sum()
            label = self._label_encoder[y[i]]

            to_plot[label][0].append(xy[0])
            to_plot[label][1].append(xy[1])

        # Add the scatter plots from the to_plot function
        # TODO: store these plots to add more instances to later
        # TODO: make this a separate function
        for label in self.classes_:
            color = self.get_colors([label])[0]
            self.ax.scatter(to_plot[label][0],
                            to_plot[label][1],
                            color=color,
                            label=label,
                            alpha=self.alpha,
                            **kwargs)

        # Add the circular axis path
        # TODO: Make this a seperate function (along with labeling)
        self.ax.add_patch(
            patches.Circle(
                (0.0, 0.0),
                radius=1.0,
                facecolor="none",
                edgecolor="grey",
                linewidth=0.5,
            ))

        # Add the feature names
        for xy, name in zip(s, self.features_):
            # Add the patch indicating the location of the axis
            self.ax.add_patch(
                patches.Circle(xy, radius=0.025, facecolor="#777777"))

            # Add the feature names offset around the axis marker
            if xy[0] < 0.0 and xy[1] < 0.0:
                self.ax.text(
                    xy[0] - 0.025,
                    xy[1] - 0.025,
                    name,
                    ha="right",
                    va="top",
                    size="small",
                )
            elif xy[0] < 0.0 and xy[1] >= 0.0:
                self.ax.text(
                    xy[0] - 0.025,
                    xy[1] + 0.025,
                    name,
                    ha="right",
                    va="bottom",
                    size="small",
                )
            elif xy[0] >= 0.0 and xy[1] < 0.0:
                self.ax.text(
                    xy[0] + 0.025,
                    xy[1] - 0.025,
                    name,
                    ha="left",
                    va="top",
                    size="small",
                )
            elif xy[0] >= 0.0 and xy[1] >= 0.0:
                self.ax.text(
                    xy[0] + 0.025,
                    xy[1] + 0.025,
                    name,
                    ha="left",
                    va="bottom",
                    size="small",
                )

        self.ax.axis("equal")
        return self.ax
示例#10
0
    def draw(self, X, y, **kwargs):
        """
        Called from the fit method, this method creates the radviz canvas and
        draws each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # Convert from dataframe
        if is_dataframe(X):
            X = X.values

        # Clean out nans and warn that the user they aren't plotted
        nan_warnings.warn_if_nans_exist(X)
        X, y = nan_warnings.filter_missing(X, y)

        # Get the shape of the data
        nrows, ncols = X.shape

        # Set the axes limits
        self.ax.set_xlim([-1, 1])
        self.ax.set_ylim([-1, 1])

        # Create the colors
        # TODO: Allow both colormap, listed colors, and palette definition
        # TODO: Make this an independent function or property for override!
        color_values = resolve_colors(n_colors=len(self.classes_),
                                      colormap=self.colormap,
                                      colors=self.color)
        self._colors = dict(zip(self.classes_, color_values))

        # Create a data structure to hold scatter plot representations
        to_plot = {}
        for kls in self.classes_:
            to_plot[kls] = [[], []]

        # Compute the arcs around the circumference for each feature axis
        # TODO: make this an independent function for override
        s = np.array([
            (np.cos(t), np.sin(t))
            for t in [2.0 * np.pi * (i / float(ncols)) for i in range(ncols)]
        ])

        # Compute the locations of the scatter plot for each class
        # Normalize the data first to plot along the 0, 1 axis
        for i, row in enumerate(self.normalize(X)):
            row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
            xy = (s * row_).sum(axis=0) / row.sum()
            kls = self.classes_[y[i]]

            to_plot[kls][0].append(xy[0])
            to_plot[kls][1].append(xy[1])

        # Add the scatter plots from the to_plot function
        # TODO: store these plots to add more instances to later
        # TODO: make this a separate function
        for i, kls in enumerate(self.classes_):
            self.ax.scatter(to_plot[kls][0],
                            to_plot[kls][1],
                            color=self._colors[kls],
                            label=str(kls),
                            alpha=self.alpha,
                            **kwargs)

        # Add the circular axis path
        # TODO: Make this a seperate function (along with labeling)
        self.ax.add_patch(
            patches.Circle((0.0, 0.0),
                           radius=1.0,
                           facecolor='none',
                           edgecolor='grey',
                           linewidth=.5))

        # Add the feature names
        for xy, name in zip(s, self.features_):
            # Add the patch indicating the location of the axis
            self.ax.add_patch(
                patches.Circle(xy, radius=0.025, facecolor='#777777'))

            # Add the feature names offset around the axis marker
            if xy[0] < 0.0 and xy[1] < 0.0:
                self.ax.text(xy[0] - 0.025,
                             xy[1] - 0.025,
                             name,
                             ha='right',
                             va='top',
                             size='small')
            elif xy[0] < 0.0 and xy[1] >= 0.0:
                self.ax.text(xy[0] - 0.025,
                             xy[1] + 0.025,
                             name,
                             ha='right',
                             va='bottom',
                             size='small')
            elif xy[0] >= 0.0 and xy[1] < 0.0:
                self.ax.text(xy[0] + 0.025,
                             xy[1] - 0.025,
                             name,
                             ha='left',
                             va='top',
                             size='small')
            elif xy[0] >= 0.0 and xy[1] >= 0.0:
                self.ax.text(xy[0] + 0.025,
                             xy[1] + 0.025,
                             name,
                             ha='left',
                             va='bottom',
                             size='small')

        self.ax.axis('equal')
示例#11
0
    def draw(self, X, y, **kwargs):
        """
        Called from the fit method, this method creates the radviz canvas and
        draws each instance as a class or target colored point, whose location
        is determined by the feature data set.
        """
        # Convert from dataframe
        if is_dataframe(X):
            X = X.values

        # Clean out nans and warn that the user they aren't plotted
        nan_warnings.warn_if_nans_exist(X)
        X, y = nan_warnings.filter_missing(X, y)

        # Get the shape of the data
        nrows, ncols = X.shape

        # Set the axes limits
        self.ax.set_xlim([-1,1])
        self.ax.set_ylim([-1,1])

        # Create the colors
        # TODO: Allow both colormap, listed colors, and palette definition
        # TODO: Make this an independent function or property for override!
        color_values = resolve_colors(
            n_colors=len(self.classes_), colormap=self.colormap, colors=self.color
        )
        self._colors = dict(zip(self.classes_, color_values))

        # Create a data structure to hold scatter plot representations
        to_plot = {}
        for kls in self.classes_:
            to_plot[kls] = [[], []]

        # Compute the arcs around the circumference for each feature axis
        # TODO: make this an independent function for override
        s = np.array([
                (np.cos(t), np.sin(t))
                for t in [
                    2.0 * np.pi * (i / float(ncols))
                    for i in range(ncols)
                ]
            ])

        # Compute the locations of the scatter plot for each class
        # Normalize the data first to plot along the 0, 1 axis
        for i, row in enumerate(self.normalize(X)):
            row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
            xy   = (s * row_).sum(axis=0) / row.sum()
            kls = self.classes_[y[i]]

            to_plot[kls][0].append(xy[0])
            to_plot[kls][1].append(xy[1])

        # Add the scatter plots from the to_plot function
        # TODO: store these plots to add more instances to later
        # TODO: make this a separate function
        for i, kls in enumerate(self.classes_):
            self.ax.scatter(
                to_plot[kls][0], to_plot[kls][1], color=self._colors[kls],
                label=str(kls), alpha=self.alpha, **kwargs
            )

        # Add the circular axis path
        # TODO: Make this a seperate function (along with labeling)
        self.ax.add_patch(patches.Circle(
            (0.0, 0.0), radius=1.0, facecolor='none', edgecolor='grey', linewidth=.5
        ))

        # Add the feature names
        for xy, name in zip(s, self.features_):
            # Add the patch indicating the location of the axis
            self.ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='#777777'))

            # Add the feature names offset around the axis marker
            if xy[0] < 0.0 and xy[1] < 0.0:
                self.ax.text(xy[0] - 0.025, xy[1] - 0.025, name, ha='right', va='top', size='small')
            elif xy[0] < 0.0 and xy[1] >= 0.0:
                self.ax.text(xy[0] - 0.025, xy[1] + 0.025, name, ha='right', va='bottom', size='small')
            elif xy[0] >= 0.0 and xy[1] < 0.0:
                self.ax.text(xy[0] + 0.025, xy[1] - 0.025, name, ha='left', va='top', size='small')
            elif xy[0] >= 0.0 and xy[1] >= 0.0:
                self.ax.text(xy[0] + 0.025, xy[1] + 0.025, name, ha='left', va='bottom', size='small')

        self.ax.axis('equal')