Exemplo n.º 1
0
    def fit(self, X, y, src_index, tgt_index,
            tgt_index_labeled=None, **fit_params):
        """
        Fit KLIEP.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)
        
        if tgt_index_labeled is None:
            Xs = X[src_index]
            ys = y[src_index]
        else:
            Xs = X[np.concatenate(
                (src_index, tgt_index_labeled)
            )]
            ys = y[np.concatenate(
                (src_index, tgt_index_labeled)
            )]
        Xt = X[tgt_index]
        
        self.j_scores_ = []
        
        if hasattr(self.sigmas, "__len__") and len(self.sigmas) > 1:
            for sigma in self.sigmas:
                split = int(len(tgt_index) / self.cv)
                j_scores = []
                for i in range(self.cv):
                    if i == self.cv-1:
                        test_index = tgt_index[i * split:]
                    else:
                        test_index = tgt_index[i * split:
                                               (i + 1) * split]
                    train_index = np.array(
                        list(set(tgt_index) - set(test_index))
                    )

                    alphas, centers = self._fit(Xs,
                                                X[train_index],
                                                sigma)
                    
                    j_score = (1 / len(test_index)) * np.sum(np.log(
                        np.dot(
                            np.transpose(alphas),
                            pairwise.rbf_kernel(centers,
                                                X[test_index],
                                                sigma)
                        )
                    ))
                    j_scores.append(j_score)
                self.j_scores_.append(np.mean(j_score))
            self.sigma_ = self.sigmas[np.argmax(self.j_scores_)]
        else:
            try:
                self.sigma_ = self.sigmas[0]
            except:
                self.sigma_ = self.sigmas
        
        self.alphas_, self.centers_ = self._fit(Xs, Xt, self.sigma_)
        
        self.weights_ = np.dot(
            np.transpose(self.alphas_),
            pairwise.rbf_kernel(self.centers_, Xs, self.sigma_)
            ).ravel()
        
        self.estimator_ = check_estimator(self.get_estimator, **self.kwargs)
        
        try:
            self.estimator_.fit(Xs, ys, 
                                sample_weight=self.weights_,
                                **fit_params)
        except:
            bootstrap_index = np.random.choice(
            len(Xs), size=len(Xs), replace=True,
            p=self.weights_ / self.weights_.sum())
            self.estimator_.fit(Xs[bootstrap_index], ys[bootstrap_index],
                          **fit_params)
        return self
Exemplo n.º 2
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            sample_weight=None,
            **fit_params):
        """
        Perfrom correlation alignement on input source data to match 
        input target data (given by ``tgt_index``).
        Then fit estimator on the aligned source data and the labeled
        target ones (given by ``tgt_index_labeled``).

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        sample_weight : numpy array, optional (default=None)
            Individual weights for each sample.

        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        Xs = X[src_index]
        ys = y[src_index]
        Xt = X[tgt_index]
        yt = y[tgt_index]

        self.estimator_ = check_estimator(self.get_estimator, **self.kwargs)

        self.Cs_ = np.cov(Xs,
                          rowvar=False) + self.lambdap * np.eye(Xs.shape[1])
        self.Ct_ = np.cov(Xt,
                          rowvar=False) + self.lambdap * np.eye(Xt.shape[1])

        Xs = np.matmul(Xs, linalg.inv(linalg.sqrtm(self.Cs_)))
        Xs = np.matmul(Xs, linalg.sqrtm(self.Ct_))

        if tgt_index_labeled is None:
            X = Xs
            y = ys
        else:
            X = np.concatenate((Xs, X[tgt_index_labeled]))
            y = np.concatenate((ys, y[tgt_index_labeled]))

        if sample_weight is None:
            self.estimator_.fit(X, y, **fit_params)
        else:
            if tgt_index_labeled is None:
                sample_weight = sample_weight[src_index]
            else:
                sample_weight = np.concatenate(
                    (sample_weight[src_index],
                     sample_weight[tgt_index_labeled]))
            self.estimator_.fit(X,
                                y,
                                sample_weight=sample_weight,
                                **fit_params)

        return self
Exemplo n.º 3
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            fit_params_src=None,
            **fit_params_tgt):
        """
        Fit RegularTransferLR
        
        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data. Binary {-1, 1}

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
        
        fit_params_src : dict, optional (default=None)
            Arguments given to the fit method of the
            source estimator (epochs, batch_size...).
            If None, ``fit_params_src = fit_params_tgt``
        
        fit_params_tgt : key, value arguments
            Arguments given to the fit method of the
            target estimator (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        if fit_params_src is None:
            fit_params_src = fit_params_tgt

        check_indexes(src_index, tgt_index)

        if not np.all(
                np.isin(y[np.concatenate((src_index, tgt_index))], [-1., 1.])):
            raise ValueError("y values should be in {-1, 1}")

        self.estimator_src_ = check_estimator(self.get_estimator,
                                              **self.kwargs)
        if (not isinstance(self.estimator_src_, LogisticRegression)
                and not isinstance(self.estimator_src_, RidgeClassifier)):
            raise ValueError("'get_estimator' should return a"
                             " LogisticRegression or RidgeClassifier"
                             " instance.")
        if self.fit_source:
            self.estimator_src_.fit(X[src_index], y[src_index],
                                    **fit_params_src)

        if self.intercept:
            beta_src = np.concatenate(
                (np.array([self.estimator_src_.intercept_[0]]),
                 self.estimator_src_.coef_[0]))
            Xt = np.concatenate((np.ones((len(tgt_index), 1)), X[tgt_index]),
                                axis=1)
        else:
            beta_src = self.estimator_src_.coef_[0]
            Xt = X[tgt_index]
        yt = y[tgt_index]

        #         assert False, "%s"%str(beta_src)

        def func(beta):
            return (np.sum(
                np.log(1 + np.exp(-yt * Xt.dot(beta.reshape(-1, 1)).ravel())))
                    + self.lambdap * np.linalg.norm(beta - beta_src)**2)

        beta_tgt = minimize(func, beta_src)['x']

        if self.intercept:
            self.intercept_ = beta_tgt[0]
            self.coef_ = beta_tgt[1:]
        else:
            self.intercept_ = 0.
            self.coef_ = beta_tgt
        return self
Exemplo n.º 4
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            fit_params_src=None,
            **fit_params_tgt):
        """
        Fit RegularTransferLR
        
        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
        
        fit_params_src : dict, optional (default=None)
            Arguments given to the fit method of the
            source estimator (epochs, batch_size...).
            If None, ``fit_params_src = fit_params_tgt``
        
        fit_params_tgt : key, value arguments
            Arguments given to the fit method of the
            target estimator (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        if fit_params_src is None:
            fit_params_src = fit_params_tgt

        check_indexes(src_index, tgt_index)

        self.estimator_src_ = check_estimator(self.get_estimator,
                                              **self.kwargs)
        if (not isinstance(self.estimator_src_, LinearRegression)
                and not isinstance(self.estimator_src_, Ridge)):
            raise ValueError("'get_estimator' should return a"
                             " LinearRegression or Ridge instance.")
        if self.fit_source:
            self.estimator_src_.fit(X[src_index], y[src_index],
                                    **fit_params_src)

        if self.intercept:
            beta_src = np.concatenate(
                (np.array([self.estimator_src_.intercept_]),
                 self.estimator_src_.coef_))
            Xt = np.concatenate((np.ones((len(tgt_index), 1)), X[tgt_index]),
                                axis=1)
        else:
            beta_src = self.estimator_src_.coef_
            Xt = X[tgt_index]
        yt = y[tgt_index]

        def func(beta):
            return (np.linalg.norm(Xt.dot(beta.reshape(-1, 1)) - yt)**2 +
                    self.lambdap * np.linalg.norm(beta - beta_src)**2)

        beta_tgt = minimize(func, beta_src)['x']

        if self.intercept:
            self.intercept_ = beta_tgt[0]
            self.coef_ = beta_tgt[1:]
        else:
            self.intercept_ = 0.
            self.coef_ = beta_tgt
        return self
Exemplo n.º 5
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            fit_params_ae=None,
            **fit_params):
        """
        Fit mSDA.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params_ae : dict, optional (default=None)
            Arguments given to the fit process of the autoencoder
            (epochs, batch_size...).
            If None, ``fit_params_ae = fit_params``
        
        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        if fit_params_ae is None:
            fit_params_ae = fit_params

        ae_index = np.concatenate((src_index, tgt_index))
        if tgt_index_labeled is None:
            task_index = src_index
        else:
            task_index = np.concatenate((src_index, tgt_index_labeled))

        self.encoder_ = check_network(self.get_encoder,
                                      "get_encoder",
                                      input_shape=X.shape[1:],
                                      **self.enc_params)
        self.decoder_ = check_network(
            self.get_decoder,
            "get_decoder",
            input_shape=self.encoder_.output_shape[1:],
            output_shape=X.shape[1:],
            **self.dec_params)
        self.estimator_ = check_estimator(self.get_estimator,
                                          **self.est_params)

        inputs = Input(X.shape[1:])
        noised = GaussianNoise(self.noise_lvl)(inputs)
        encoded = self.encoder_(noised)
        decoded = self.decoder_(encoded)
        self.autoencoder_ = Model(inputs, decoded, name="AutoEncoder")

        compil_params = copy.deepcopy(self.compil_params)
        if not "loss" in compil_params:
            compil_params["loss"] = "mean_squared_error"
        if not "optimizer" in compil_params:
            compil_params["optimizer"] = "adam"

        self.autoencoder_.compile(**compil_params)

        self.autoencoder_.fit(X[ae_index], X[ae_index], **fit_params_ae)
        self.estimator_.fit(self.encoder_.predict(X[task_index]),
                            y[task_index], **fit_params)
        return self
Exemplo n.º 6
0
    def fit(self,
            X,
            y,
            src_index,
            tgt_index,
            tgt_index_labeled=None,
            **fit_params):
        """
        Fit KMM.

        Parameters
        ----------
        X : numpy array
            Input data.

        y : numpy array
            Output data.

        src_index : iterable
            indexes of source labeled data in X, y.

        tgt_index : iterable
            indexes of target unlabeled data in X, y.
            
        tgt_index_labeled : iterable, optional (default=None)
            indexes of target labeled data in X, y.

        fit_params : key, value arguments
            Arguments given to the fit method of the estimator
            (epochs, batch_size...).

        Returns
        -------
        self : returns an instance of self
        """
        check_indexes(src_index, tgt_index, tgt_index_labeled)

        if tgt_index_labeled is None:
            Xs = X[src_index]
            ys = y[src_index]
        else:
            Xs = X[np.concatenate((src_index, tgt_index_labeled))]
            ys = y[np.concatenate((src_index, tgt_index_labeled))]
        Xt = X[tgt_index]

        n_s = len(Xs)
        n_t = len(Xt)

        # Get epsilon
        if self.epsilon is None:
            self.epsilon = (np.sqrt(n_s) - 1) / np.sqrt(n_s)

        # Compute Kernel Matrix
        K = pairwise.pairwise_kernels(Xs,
                                      Xs,
                                      metric=self.kernel,
                                      **self.kernel_params)
        K = (1 / 2) * (K + K.transpose())

        # Compute q
        kappa = pairwise.pairwise_kernels(Xs,
                                          Xt,
                                          metric=self.kernel,
                                          **self.kernel_params)
        kappa = (n_s / n_t) * np.dot(kappa, np.ones((n_t, 1)))

        constraints = LinearConstraint(np.ones((1, n_s)),
                                       lb=n_s * (1 - self.epsilon),
                                       ub=n_s * (1 + self.epsilon))

        def func(x):
            return (1 / 2) * x.T @ (K @ x) - kappa.T @ x

        weights = minimize(func,
                           x0=np.ones((n_s, 1)),
                           bounds=[(0, self.B)] * n_s,
                           constraints=constraints)['x']

        self.weights_ = np.array(weights).ravel()

        self.estimator_ = check_estimator(self.get_estimator, **self.kwargs)

        try:
            self.estimator_.fit(Xs,
                                ys,
                                sample_weight=self.weights_,
                                **fit_params)
        except:
            bootstrap_index = np.random.choice(len(Xs),
                                               size=len(Xs),
                                               replace=True,
                                               p=self.weights_ /
                                               self.weights_.sum())
            self.estimator_.fit(Xs[bootstrap_index], ys[bootstrap_index],
                                **fit_params)
        return self
Exemplo n.º 7
0
    def _boost(self, iboost, X, y, src_index, tgt_index, sample_weight_src,
               sample_weight_tgt, **fit_params):

        index = np.concatenate((src_index, tgt_index))
        sample_weight = np.concatenate((sample_weight_src, sample_weight_tgt))

        estimator = check_estimator(self.get_estimator, **self.kwargs)

        try:
            estimator.fit(X[index],
                          y[index],
                          sample_weight=sample_weight,
                          **fit_params)
        except:
            bootstrap_index = np.random.choice(index,
                                               size=len(index),
                                               replace=True,
                                               p=sample_weight)
            estimator.fit(X[bootstrap_index], y[bootstrap_index], **fit_params)

        error_vect_src = np.abs(
            estimator.predict(X[src_index]).ravel() - y[src_index])
        error_vect_tgt = np.abs(
            estimator.predict(X[tgt_index]).ravel() - y[tgt_index])
        error_vect = np.concatenate((error_vect_src, error_vect_tgt))

        if isinstance(self, TrAdaBoostR2) or isinstance(self, _AdaBoostR2):
            error_max = error_vect.max()
            if error_max != 0:
                error_vect /= error_max
                error_vect_src /= error_max
                error_vect_tgt /= error_max

        if isinstance(self, _AdaBoostR2):
            estimator_error = (sample_weight * error_vect).sum()
        else:
            estimator_error = ((sample_weight_tgt * error_vect_tgt).sum() /
                               (2 * sample_weight_tgt.sum()))

        assert estimator_error < 0.5, (
            "est: %s, %s, %s" % (str(error_vect_tgt), str(
                y[tgt_index]), str(estimator.predict(X[tgt_index]).ravel())))

        if estimator_error >= 0.5:
            return None, None

        beta_t = estimator_error / (1. - estimator_error)

        beta_s = 1. / (
            1. + np.sqrt(2. * np.log(len(src_index)) / self.n_estimators))

        if not iboost == self.n_estimators - 1:
            if isinstance(self, _AdaBoostR2):
                sample_weight_tgt = (sample_weight_tgt *
                                     np.power(beta_t, (1 - error_vect_tgt)))

                sample_weight_tgt *= ((1. - sample_weight_src.sum()) /
                                      sample_weight_tgt.sum())
            else:
                # Source updating weights
                sample_weight_src *= np.power(beta_s, error_vect_src)

                # Target updating weights
                sample_weight_tgt *= np.power(beta_t, -error_vect_tgt)

        self.estimators_.append(estimator)
        self.estimator_errors_.append(estimator_error)

        return sample_weight_src, sample_weight_tgt