def fit(self, y_train, priors=(0.5, 0.5), ε=1e-6, n_epoch=100, **kwargs):
     self.θ = priors
     for idx_epoch in range(n_epoch):
         if self.single_iter(y_train, **kwargs) < ε:
             return idx_epoch
     raise ConvergenceWarning(
         'Did not converge within {} epochs'.format(n_epoch))
     return n_epoch
示例#2
0
    def fit(self, X, y, sample_weight=None, zero_coef_alert=True):
        """Fit non-negative linear model.
        Parameters
        ----------
        X : numpy array or sparse matrix of shape [n_samples, n_features]
            Training data
        y : numpy array of shape [n_samples,]
            Target values
        sample_weight : numpy array of shape [n_samples]
            Individual weights for each sample
        Returns
        -------
        self : returns an instance of self.
        """
        X, y = check_X_y(X, y, y_numeric=True, multi_output=False)

        if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
            raise ValueError("Sample weights must be 1D array or scalar")

        X, y, X_offset, y_offset, X_scale = self._preprocess_data(
            X,
            y,
            fit_intercept=self.fit_intercept,
            normalize=self.normalize,
            copy=self.copy_X,
            sample_weight=sample_weight)

        if sample_weight is not None:
            # Sample weight can be implemented via a simple rescaling.
            X, y = _rescale_data(X, y, sample_weight)

        self.coef_, result = nnls(X, y.squeeze())

        if np.all(self.coef_ == 0) and zero_coef_alert == True:
            raise ConvergenceWarning("All coefficients estimated to be zero in"
                                     " the non-negative least squares fit.")

        self._set_intercept(X_offset, y_offset, X_scale)
        self.opt_result_ = OptimizeResult(success=True,
                                          status=0,
                                          x=self.coef_,
                                          fun=result)
        return self
示例#3
0
    def _partial_fit(self, currentItem, X, topK):
        ConvergenceWarning('ignore')
        model = ElasticNet(alpha=1.0,
                           l1_ratio=self.l1_ratio,
                           positive=self.positive_only,
                           fit_intercept=False,
                           copy_X=False,
                           precompute=True,
                           selection='random',
                           max_iter=100,
                           tol=1e-4)

        # WARNING: make a copy of X to avoid race conditions on column j
        # TODO: We can probably come up with something better here.
        X_j = X.copy()
        # get the target column
        y = X_j[:, currentItem].toarray()
        # set the j-th column of X to zero
        X_j.data[X_j.indptr[currentItem]:X_j.indptr[currentItem + 1]] = 0.0
        # fit one ElasticNet model per column
        model.fit(X_j, y)
        # self.model.coef_ contains the coefficient of the ElasticNet model
        # let's keep only the non-zero values
        # nnz_idx = model.coef_ > 0.0

        relevant_items_partition = (-model.coef_).argpartition(topK)[0:topK]
        relevant_items_partition_sorting = np.argsort(
            -model.coef_[relevant_items_partition])
        ranking = relevant_items_partition[relevant_items_partition_sorting]

        notZerosMask = model.coef_[ranking] > 0.0
        ranking = ranking[notZerosMask]

        values = model.coef_[ranking]
        rows = ranking
        cols = [currentItem] * len(ranking)

        return values, rows, cols
示例#4
0
    def _final_centroids(self, Xs, centroids):
        r'''
        Computes the final cluster centroids based on consensus samples across
        both views. Consensus samples are those that are assigned to the same
        partition in both views.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
            - Xs length: n_views
            - Xs[i] shape: (n_samples, n_features_i)

            This list must be of size 2, corresponding to the two views of
            the data. The two views can each have a different number of
            features, but they must have the same number of samples.

        centroids : list of array-likes
            - centroids length: n_views
            - centroids[i] shape: (n_clusters, n_features_i)

            The cluster centroids for each of the two views. centroids[0]
            corresponds to the centroids of view 1 and centroids[1] corresponds
            to the centroids of view 2. These are not yet the final cluster
            centroids.
        '''

        # Compute consensus vectors for final clustering
        v1_consensus = list()
        v2_consensus = list()

        for clust in range(self.n_clusters):
            v1_distances = self._compute_dist(Xs[0], centroids[0])
            v1_partitions = np.argmin(v1_distances, axis=1).flatten()
            v2_distances = self._compute_dist(Xs[1], centroids[1])
            v2_partitions = np.argmin(v2_distances, axis=1).flatten()

            # Find data points in the same partition in both views
            part_indices = (v1_partitions == clust) * (v2_partitions == clust)

            # Recompute centroids based on these data points
            if (np.sum(part_indices) != 0):
                cent1 = np.mean(Xs[0][part_indices], axis=0)
                v1_consensus.append(cent1)

                cent2 = np.mean(Xs[1][part_indices], axis=0)
                v2_consensus.append(cent2)

        # Check if there are no consensus vectors
        self.centroids_ = [None, None]
        if (len(v1_consensus) == 0):
            msg = 'No distinct cluster centroids have been found.'
            raise ConvergenceWarning(msg)
        else:
            self.centroids_[0] = np.vstack(v1_consensus)
            self.centroids_[1] = np.vstack(v2_consensus)

            # Check if the number of consensus clusters is less than n_clusters
            if (self.centroids_[0].shape[0] < self.n_clusters):
                msg = ('Number of distinct cluster centroids (' +
                       str(self.centroids_[0].shape[0]) +
                       ') found is smaller than n_clusters (' +
                       str(self.n_clusters) + ').')
                raise ConvergenceWarning(msg)

            # Updates k if number of consensus clusters less than original
            # n_clusters value
            self.n_clusters = self.centroids_[0].shape[0]
import logging
from copy import deepcopy
from typing import Callable, Tuple

import numpy as np
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import LogisticRegression

from pandas_ml_common import Typing
from pandas_ml_utils.ml.data.extraction import FeaturesAndLabels
from pandas_ml_utils.ml.summary import Summary
from .base_model import Model

_log = logging.getLogger(__name__)
ConvergenceWarning('ignore')


class SkModel(Model):
    def __init__(self,
                 skit_model,
                 features_and_labels: FeaturesAndLabels,
                 summary_provider: Callable[[Typing.PatchedDataFrame],
                                            Summary] = Summary,
                 **kwargs):
        super().__init__(features_and_labels, summary_provider, **kwargs)
        self.skit_model = skit_model
        self.label_shape = None

    def fit_fold(self, fold_nr: int, x: np.ndarray, y: np.ndarray,
                 x_val: np.ndarray, y_val: np.ndarray,