Exemplo n.º 1
0
    def score(self, X, y, sample_weight=None):
        """
        Returns the mean accuracy on the given test data and labels.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            Test samples.
        y : array, shape (n_samples,)
            True labels for X.
        sample_weight : array, shape (n_samples,), optional
            Sample weights.

        Returns
        -------
        score : float
            Mean accuracy of self.predict(X) w.r.t. y.
        """
        # Check input arrays
        X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)
        y = check_array(y.astype(int), ndim=1, dtype='numeric',
                        force_all_finite=True)

        # Check input arrays are same length
        if X.shape[0] != y.size:
            raise ValueError("Number of samples in X and y must be the same: "
                             "{} vs {}".format(X.shape[0], y.size))

        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
Exemplo n.º 2
0
    def transform(self, X, Y=None):
        """
        Apply the dimension reduction learned on the train data.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            Input X data.
        Y : array, shape (n_samples, n_targets) or None (default=None)
            Input Y data. If Y=None, then only the transformed X data are
            returned.

        Returns
        -------
        X_new : array, shape (n_samples, n_components)
            Transformed X data.
        Y_new : array, shape (n_samples, n_components)
            Transformed Y data. If Y=None, only X_new is returned.
        """
        self._check_is_fitted()

        X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)

        if Y is None:
            return self.cca.transform(X, Y=None, copy=True)
        else:
            Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True)
            X_new, Y_new = self.cca.transform(X, Y=Y, copy=True)

            # If n_components=1, reshape Y_new so it is 2D
            if self.n_components_ == 1:
                n_samples = Y_new.shape[0]
                Y_new = Y_new.reshape((n_samples, 1))
            return X_new, Y_new
Exemplo n.º 3
0
    def inverse_transform(self, X, Y=None):
        """
        Transform data back to its original space.

        Note: This is not exact!

        Parameters
        ----------
        X : array, shape (n_samples, n_components)
            Transformed X data.
        Y : array, shape (n_samples, n_components) or None (default=None)
            Transformed Y data. If Y=None, only the X data are transformed back
            to the original space.

        Returns
        -------
        X_original : array, shape (n_samples, n_features)
            X data transformed back into original space.
        Y_original : array, shape (n_samples, n_targets)
            Y data transformed back into original space. If Y=None, only
            X_original is returned.
        """
        self._check_is_fitted()

        # Check X is in transformed space
        X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)
        if X.shape[1] != self.n_components_:
            raise ValueError("X has {} features per sample."
                             "Expecting {}".format(X.shape[1],
                                                   self.n_components_))

        # Invert X into original space
        X_original = np.dot(X, self.components_) + self.mean_

        if Y is None:
            return X_original
        else:
            # Check Y is in transformed space
            Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True)
            if Y.shape[1] != self.n_components_:
                raise ValueError("Y has {} features per sample."
                                 "Expecting {}".format(Y.shape[1],
                                                       self.n_components_))

            # Invert Y into original space
            Y_original = np.dot(Y, self.components_y_) + self.mean_y_

            return X_original, Y_original
Exemplo n.º 4
0
    def fit(self, X, y):
        """
        Fit PLDA model according to the given training data and parameters.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            Training data.
        y : array, shape (n_samples,)
            Target values.
        """
        # Check input arrays
        X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)
        y = check_array(y.astype(int), ndim=1, dtype='numeric',
                        force_all_finite=True)

        # Check input arrays are same length
        if X.shape[0] != y.size:
            raise ValueError("Number of samples in X and y must be the same: "
                             "{} vs {}".format(X.shape[0], y.size))

        # Check that n_components does not exceed maximum possible
        max_components = min(X.shape)
        if self.n_components_ is None:
            self.n_components_ = max_components
        elif self.n_components_ > max_components:
            self.n_components_ = max_components
            warnings.warn("n_components exceeds maximum possible components. "
                          "Setting n_components = {}".format(max_components))

        # Set useful data attributes
        self.classes_ = np.unique(y)
        self.mean_ = X.mean(axis=0)
        self.class_means_ = self._class_means(X, y)

        self._solve_eigen(X, y)

        # Adjust coefficients and intercept for binary classification problems
        if self.classes_.size == 2:
            self.coef_ = np.array(self.coef_[1,:] - self.coef_[0,:], ndmin=2)
            self.intercept_ = np.array(self.intercept_[1]-self.intercept_[0],
                                       ndmin=1)

        # Transform data so we can get the explained variance
        self.explained_variance_ = np.dot(X, self.components_.T).var(axis=0)

        self.is_fitted = True
        return
Exemplo n.º 5
0
    def decision_function(self, X):
        """
        Predict confidence scores for samples.

        The confidence score for a sample is the signed distance of that
        sample to the hyperplane.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            Input data.

        Returns
        -------
        scores : array, shape=(n_samples,) if n_classes == 2
                 else (n_samples, n_classes)
            Confidence scores per (sample, class) combination. In the binary
            case, confidence score for self.classes_[1] where >0 means this
            class would be predicted.
        """
        self._check_is_fitted()

        # Check input array
        X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)

        # Check number of features is correct
        n_feat = self.components_.shape[1]
        if X.shape[1] != n_feat:
            raise ValueError("X has {} features per sample."
                             "Expecting {}".format(X.shape[1], n_feat))

        scores = np.dot(X, self.coef_.T) + self.intercept_
        return scores.ravel() if scores.shape[1] == 1 else scores
Exemplo n.º 6
0
    def inverse_transform(self, X):
        """
        Transform data back to its original space.

        Note: If n_components is less than the maximum, information will be
        lost, so reconstructed data will not exactly match the original data.

        Parameters
        ----------
        X : array shape (n_samples, n_components)
            New data.

        Returns
        -------
        X_original : array, shape (n_samples, n_features)
            Data transformed back into original space.
        """
        self._check_is_fitted()

        X = check_array(X, ndim=2)

        # Check data dimensions
        if X.shape[1] != self.n_components_:
            raise ValueError("X has {} features per sample."
                             "Expecting {}".format(X.shape[1],
                             self.n_components_))

        # Inverse transform
        X_original = self.mean_ + np.dot(X, self.components_)

        return X_original
Exemplo n.º 7
0
    def transform(self, X):
        """
        Transform data.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            Input data.

        Returns
        -------
        X_new : array, shape (n_samples, n_components)
            Transformed data.
        """
        self._check_is_fitted()

        # Check input arrays
        X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)

        # Check number of features is correct
        n_feat = self.components_.shape[1]
        if X.shape[1] != n_feat:
            raise ValueError("X has {} features per sample."
                             "Expecting {}".format(X.shape[1], n_feat))

        # Transform data
        X_new = np.dot(X, self.components_.T)

        return X_new[:,:self.n_components_]
Exemplo n.º 8
0
    def fit(self, X, Y):
        """
        Fit model to data.

        Parameters
        ----------
        X : array, shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples and
            n_features is the number of predictors.
        Y : array, shape (n_samples, n_targets)
            Target vectors, where n_samples is the number of samples and
            n_targets is the number of response variables.
        """
        X = check_array(X, ndim=2, dtype='numeric', force_all_finite=True)
        Y = check_array(Y, ndim=2, dtype='numeric', force_all_finite=True)

        if X.shape[0] != Y.shape[0]:
            raise ValueError("Number of samples in X and Y must be the same: "
                             "{} vs {}".format(X.shape[0], Y.shape[0]))

        if self.n_components_ > X.shape[1]:
            raise ValueError("n_components exceeds number of features in X: "
                             "{} > {}".format(self.n_components_, X.shape[1]))

        if self.n_components_ > Y.shape[1]:
            raise ValueError("n_components exceeds number of targets in Y: "
                             "{} > {}".format(self.n_components_, Y.shape[1]))

        self.cca.fit(X, Y)

        self.components_ = self.cca.x_weights_.T
        self.components_y_ = self.cca.y_weights_.T
        self.mean_ = self.cca.x_mean_
        self.mean_y_ = self.cca.y_mean_

        # Get the explained variance of the transformed data
        self.explained_variance_ = self.cca.x_scores_.var(axis=0)
        self.explained_variance_y_ = self.cca.y_scores_.var(axis=0)

        self.is_fitted = True
        return
Exemplo n.º 9
0
    def predict_transformed(self, X_trans):
        """
        Predict class labels for data that have already been transformed by
        self.transform(X).

        This is useful for plotting classification boundaries.
        Note: Due to arithemtic discrepancies, this may return slightly
        different class labels to self.predict(X).

        Parameters
        ----------
        X_trans : array, shape (n_samples, n_components)
            Test samples that have already been transformed into PLDA space.

        Returns
        -------
        y : array, shape (n_samples,)
            Predicted class labels for X_trans.
        """
        self._check_is_fitted()

        # Check input array
        X_trans = check_array(X_trans, ndim=2)

        # Make sure this is a set of transformed data
        if X_trans.shape[1] != self.n_components_:
            raise ValueError("Number of features in X_trans must match "
                             "n_components: {}".format(self.n_components_))

        # Transform class means into PLDA space
        mean_trans = self.transform(self.class_means_)

        # Initialize useful values
        n_samples = X_trans.shape[0]
        n_classes = mean_trans.shape[0]
        dists = np.zeros((n_samples,n_classes))

        # Compute the distance between each data sample and each class mean
        for i,mean in enumerate(mean_trans):
            dists[:,i] = np.linalg.norm(X_trans-np.tile(mean,(n_samples,1)),
                                        axis=1)

        # Classification based on shortest distance to class mean
        ind = dists.argmin(axis=1)

        return self.classes_[ind]