Exemplos de TSNE.set_params em Python, exemplos de sklearn.manifold.TSNE.set_params em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: clusteringUntrained.py Projeto: mchernyavsky/car-classification

def show_clusters(data, y, name, params=None):
    model = TSNE(n_components=2, random_state=0)
    np.set_printoptions(suppress=True)
    if params is not None:
        model.set_params(**params)
    X = model.fit_transform(data)
    print X
    p = model.get_params()
    print "X.shape = ", X.shape
    print "y.shape = ", y.shape
    print y
    plt.scatter(X[:,0], X[:,1], c=y)
    plt.gray()
    plt.axis('off')
    plt.show()
    plt.savefig("ClustersUntrained{}.png".format(name), dpi=600)
    plt.clf()
    return p

Exemplo n.º 2

0

Exibir arquivo

Arquivo: manifold.py Projeto: mayalenE/image_representation

class TSNERepresentation(Representation):
    @staticmethod
    def default_config():
        default_config = Representation.default_config()

        # parameters
        default_config.parameters = Dict()
        default_config.parameters.perplexity = 30.0
        default_config.parameters.init = "random"
        default_config.parameters.random_state = None

        return default_config

    def __init__(self, n_features=28 * 28, n_latents=10, config={}, **kwargs):
        Representation.__init__(self, config=config, **kwargs)

        # input size (flatten)
        self.n_features = n_features
        # latent size
        self.n_latents = n_latents
        # feature range
        self.feature_range = (0.0, 1.0)

        self.algorithm = TSNE(n_components=self.n_latents)
        self.update_algorithm_parameters()

    def fit(self, X_train, update_range=True):
        ''' 
        X_train: array-like (n_samples, n_features)
        '''
        X_train = np.nan_to_num(X_train)
        if update_range:
            self.feature_range = (X_train.min(axis=0), X_train.max(axis=0))  # save (min, max) for normalization
        X_train = (X_train - self.feature_range[0]) / (self.feature_range[1] - self.feature_range[0])
        self.algorithm.fit(X_train)

    def calc_embedding(self, x):
        x = (x - self.feature_range[0]) / (self.feature_range[1] - self.feature_range[0])
        x = self.algorithm.transform(x)
        return x

    def update_algorithm_parameters(self):
        self.algorithm.set_params(**self.config.parameters, verbose=False)

Exemplo n.º 3

0

Exibir arquivo

def dimensionality_reduction(TrainFeatures, TestFeatures, Method, params):
    """ It performs dimensionality reduction of a training and a test features matrix
            stored in a .h5 file each.
            It's possible to use 5 different methods for dimensionality reduction.
        _____________________________________________________________________________________

        Parameters:
                - TrainFeatures: string
                        It is the path of an .h5 file of the training features.
                        It contains at least the following datasets:
                        - 'feats':   array-like, shape (n_samples, n_features)
                        - 'labels':  array-like, shape (n_samples, )
                        - 'img_ids': array-like, shape (n_samples, )
                - TestFeatures: string
                        It is the path of an .h5 file of the test features.
                        It contains at least the same datasets.
                - Method: string
                        Possible value are:
                                -'PCA': Principal component analysis
                                -'t-SNE': t-distributed Stochastic Neighbor Embedding
                                -'TruncatedSVD': Truncated SVD
                                -'NMF': Non-Negative Matrix Factorization
                                -'LDA': Linear Discriminant Analysis
                - params: dict
                        It is a dictionary containig parameters for the selected estimator.
                        Keys and possible values are listed on the following websites:
                        http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
                        http://scikit-learn.org/stable/modules/generated/sklearn.discriminant_analysis.LinearDiscriminantAnalysis.html
                        http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html
                        http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.NMF.html
                        http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.TruncatedSVD.html
                        For t-SNE, an additional key is needed: params['reduce'] with possible values 'TruncatedSVD','PCA','None'.
                        It is highly recommended to use another dimensionality reduction method (e.g. PCA for dense data or TruncatedSVD
                        for sparse data) to reduce the number of dimensions to a reasonable amount (e.g. 50) if the number of features is
                        very high. This will suppress some noise and speed up the computation of pairwise distances between samples.
                        - params['reduce']='TruncatedSVD' : Truncated SVD --> t-SNE
                        - params['reduce']='PCA' : PCA --> t-SNE
                        - params['reduce']='None' : t-SNE directly

        Returns:
                - X_train: array-like, shape (n_samples, n_components) 
                - X_test:  array-like, shape (n_samples, n_components) 
                - ax: matplotlib.axes._subplots.AxesSubplot object (if n_components<=3) or None (if n_components>3)    
                Furthermore, automatically 2 new .h5 files containing 3 datasets each (one for reduced features, one for labels and one for img_ids)
                are generated in the folder Results/ReducedFeatures and also if n_components is <= 3 a scatter plot is saved in the folder
                Results/Plots

        Example usage:
                import FeaturesReduction as fr
                import matplotlib.pyplot as plt
                params={'n_components':3}
                X_train,X_test,ax=fr.dimensionality_reduction('TrainingFeatures.h5','TestFeatures.h5','PCA',params)
                plt.show()
        """

    s = os.sep
    # Load training features file
    train = h5py.File(TrainFeatures, 'r')
    train_features = train['feats']
    train_labels = train['labels']
    train_labels = np.squeeze(train_labels)
    train_img_ids = train['img_id']

    # Get categories of the training set from features ids
    categories = mf.get_categories(train_img_ids)

    # Load test features file
    test = h5py.File(TestFeatures, 'r')
    test_features = test['feats']
    test_labels = test['labels']
    test_labels = np.squeeze(test_labels)
    test_img_ids = test['img_id']

    n_comp = params['n_components']

    if Method != 'NMF':
        # Standardize features by removing the mean and scaling to unit variance
        scaler = StandardScaler().fit(train_features)
        train_features = scaler.transform(train_features)
        test_features = scaler.transform(test_features)

    if Method == 'PCA':
        # Get PCA model
        pca = PCA()
        # Set parameters
        pca.set_params(**params)
        # Fit the model with the training features and
        # apply dimensional reduction to training features
        X_train = pca.fit_transform(train_features)
        # Apply dimensional reduction to test features
        X_test = pca.transform(test_features)

    elif Method == 'NMF':
        params['verbose'] = True
        # Get NMF model
        nmf = NMF()
        # Set parameters
        nmf.set_params(**params)
        # Fit the model with the training features and
        # apply dimensional reduction to training features
        X_train = nmf.fit_transform(train_features)
        # Apply dimensional reduction to test features
        X_test = nmf.transform(test_features)

    elif Method == 'LDA':
        # Get LDA model
        lda = LDA()
        # Set parameters
        lda.set_params(**params)
        # Fit the model with the training features
        #lda.fit(train_features,train_labels)
        # apply dimensional reduction to training features
        #X_train = lda.transform(train_features)

        X_train = lda.fit_transform(train_features, train_labels)
        # apply dimensional reduction to training features
        #X_train = lda.transform(train_features)
        # Apply dimensional reduction to test features
        X_test = lda.transform(test_features)

    elif Method == 't-SNE':
        red = params['reduce']
        del params['reduce']
        print(red)
        params['verbose'] = True

        # Use another dimensionality reduction method (PCA for dense
        # data or TruncatedSVD for sparse data) to reduce the number of
        # dimensions to a reasonable amount (e.g. 50) if the number of
        # features is very high. This will suppress some noise and speed
        # up the computation of pairwise distances between samples.
        if n_comp < 50:
            K = 50
        else:
            K = n_comp * 2
        if red == 'TruncatedSVD':
            # Get TruncatedSVD model
            svd = TruncatedSVD(n_components=K)
            # Fit the model with the training features and
            # apply dimensional reduction to training features
            train_features = svd.fit_transform(train_features)
            # Apply dimensional reduction to test features
            test_features = svd.transform(test_features)
        elif red == 'PCA':
            # Get PCA model
            pca = PCA(n_components=K)
            # Fit the model with the training features and
            # apply dimensional reduction to training features
            train_features = pca.fit_transform(train_features)
            # Apply dimensional reduction to test features
            test_features = pca.transform(test_features)
        else:
            pass

        # Get t-SNE model
        tsne = TSNE()
        # Set parameters
        tsne.set_params(**params)
        # Concatenate training and test set
        n_train = train_features.shape[0]
        features = np.concatenate((train_features, test_features), axis=0)

        # Fit the model with the data and apply dimensional reduction
        X = tsne.fit_transform(features)

        # Separate training and test set
        X_train = X[:n_train, :]
        X_test = X[n_train:, :]

    elif Method == 'TruncatedSVD':
        # Get TruncatedSVD model
        svd = TruncatedSVD()
        # Set parameters
        svd.set_params(**params)
        # Fit the model with the training features and
        # apply dimensional reduction to training features
        X_train = svd.fit_transform(train_features)
        # Apply dimensional reduction to test features
        X_test = svd.transform(test_features)

    else:
        raise TypeError(
            "Invalid method: possible methods are 'PCA', 't-SNE', 'TruncatedSVD', 'NMF' and 'LDA'"
        )

    # Create folder in which save reduced features
    mf.folders_creator('Results', ['ReducedFeatures'])

    # Create an .h5 file and store in it reduced training set
    name = 'Results' + s + 'ReducedFeatures' + s + Method + str(
        n_comp) + '_' + TrainFeatures.split(s)[-1].split('.')[0] + '.h5'
    f = h5py.File(name, "w")
    f.create_dataset('img_id', data=train_img_ids[:], dtype="S40")
    f.create_dataset('labels', data=train_labels.T, compression="gzip")
    if Method == 'PCA':
        f.create_dataset('pca', data=X_train.T, compression="gzip")
    elif Method == 't-SNE':
        f.create_dataset('tsne', data=X_train.T, compression="gzip")
    elif Method == 'TruncatedSVD':
        f.create_dataset('tsvd', data=X_train.T, compression="gzip")
    elif Method == 'LDA':
        f.create_dataset('lda', data=X_train.T, compression="gzip")
    elif Method == 'NMF':
        f.create_dataset('nmf', data=X_train.T, compression="gzip")
    f.close()

    # Create an .h5 file and store in it reduced test set
    name = 'Results' + s + 'ReducedFeatures' + s + Method + str(
        n_comp) + '_' + TestFeatures.split(s)[-1].split('.')[0] + '.h5'
    f = h5py.File(name, "w")
    f.create_dataset('img_id', data=test_img_ids[:], dtype="S40")
    f.create_dataset('labels', data=test_labels.T, compression="gzip")
    if Method == 'PCA':
        f.create_dataset('pca', data=X_test.T, compression="gzip")
    elif Method == 't-SNE':
        f.create_dataset('tsne', data=X_test.T, compression="gzip")
    elif Method == 'TruncatedSVD':
        f.create_dataset('tsvd', data=X_test.T, compression="gzip")
    elif Method == 'LDA':
        f.create_dataset('lda', data=X_test.T, compression="gzip")
    elif Method == 'NMF':
        f.create_dataset('nmf', data=X_test.T, compression="gzip")
    f.close()

    if n_comp < 4:

        # Get folders list of the test set from features ids
        test_folders = mf.get_categories(test_img_ids)
        # Get number of folders
        n_folders_test = len(test_folders)
        # Make some names for the plot legend
        tf = []
        for i in range(n_folders_test):
            tf.append('Test' + str(i))

        # Define a list of colors in exadecimal format
        if len(categories) + n_folders_test < 9:
            colors = [
                '#FF0000', '#00FF00', '#0000FF', '#FFFF00', '#00FFFF',
                '#808080', '#FF00FF', '#000000'
            ]
        else:
            n = 250
            max_value = 255**3
            interval = int(max_value / n)
            colors = [
                '#' + hex(i)[2:].zfill(6)
                for i in range(0, max_value, interval)
            ]
            colors = colors[:int((n + 1) / 10 * 9)]
            random.shuffle(colors)

        # Create a folder to save images
        mf.folders_creator('Results', ['Plots'])

        # Create a name to save image
        name = Method + str(n_comp) + '_' + TrainFeatures.split(s)[-1].split(
            '.')[0]
        name = name.split('_')
        name = '_'.join(name[:-1])

        print(X_train.shape)
        print(X_test.shape)

        if n_comp == 1:
            # Plot 1D Data with different colors
            fig, ax = plt.subplots()
            for i in range(len(categories)):
                ax.scatter(X_train[train_labels == i, 0],
                           np.ones(X_train[train_labels == i, 0].shape),
                           c=colors[i],
                           label=categories[i])
            k = len(categories)
            for i in range(n_folders_test):
                ax.scatter(X_test[test_labels == i, 0],
                           np.ones(X_test[test_labels == i, 0].shape),
                           c=colors[k],
                           label=tf[i])
                k += 1
            ax.legend()

            # Save image in .png format
            plt.savefig('Results' + s + 'Plots' + s + name + '.png')

        if n_comp == 2:
            # Plot 2D Data with different colors
            fig, ax = plt.subplots()
            for i in range(len(categories)):
                ax.scatter(X_train[train_labels == i, 0],
                           X_train[train_labels == i, 1],
                           c=colors[i],
                           label=categories[i])
            k = len(categories)
            for i in range(n_folders_test):
                ax.scatter(X_test[test_labels == i, 0],
                           X_test[test_labels == i, 1],
                           c=colors[k],
                           label=tf[i])
                k += 1
            ax.legend()

            # Save image in .png format
            plt.savefig('Results' + s + 'Plots' + s + name + '.png')

            # Remove outliers
            out_train = mf.is_outlier(X_train, thresh=3.5)
            out_test = mf.is_outlier(X_test, thresh=3.5)
            out_train = np.logical_not(out_train)
            out_test = np.logical_not(out_test)

            X_train2 = X_train[out_train, :]
            X_test2 = X_test[out_test, :]

            if X_train2.shape[0] != X_train.shape[0] or X_test2.shape[
                    0] != X_test.shape[0]:

                train_labels2 = train_labels[out_train]
                test_labels2 = test_labels[out_test]

                # Plot 2D Data without outliers with different colors
                fig, ax = plt.subplots()
                for i in range(len(categories)):
                    ax.scatter(X_train2[train_labels2 == i, 0],
                               X_train2[train_labels2 == i, 1],
                               c=colors[i],
                               label=categories[i])
                k = len(categories)
                for i in range(n_folders_test):
                    ax.scatter(X_test2[test_labels2 == i, 0],
                               X_test2[test_labels2 == i, 1],
                               c=colors[k],
                               label=tf[i])
                    k += 1
                ax.legend()

                # Save image in .png format
                plt.savefig('Results' + s + 'Plots' + s + name +
                            '_noOutliers.png')

        if n_comp == 3:
            mf.folders_creator('Results' + s + 'Plots', ['tmp'])
            # Plot 3-D Data with different colors
            ax = plt.subplot(111, projection='3d')
            for i in range(len(categories)):
                ax.scatter(X_train[train_labels == i, 0],
                           X_train[train_labels == i, 1],
                           X_train[train_labels == i, 2],
                           c=colors[i],
                           label=categories[i])
            k = len(categories)
            for i in range(n_folders_test):
                ax.scatter(X_test[test_labels == i, 0],
                           X_test[test_labels == i, 1],
                           X_test[test_labels == i, 2],
                           c=colors[k],
                           label=tf[i])
                k += 1
            ax.legend(loc='upper left',
                      numpoints=1,
                      ncol=3,
                      fontsize=8,
                      bbox_to_anchor=(0, 0))

            # Rotate for 360° and save every 10°
            for angle in range(0, 360, 10):
                ax.view_init(30, angle)
                plt.savefig('Results' + s + 'Plots' + s + 'tmp' + s + name +
                            str(angle) + '.png')
            # Save as a .gif image
            mf.imagesfolder_to_gif('Results' + s + 'Plots' + s + name + '.gif',
                                   'Results' + s + 'Plots' + s + 'tmp', 0.2)
            shutil.rmtree('Results' + s + 'Plots' + s + 'tmp')

    else:
        ax = None

    return X_train, X_test, ax

Exemplo n.º 4

0

Exibir arquivo

Arquivo: 11. Basic_template.py Projeto: idahopotato1/Template

with open(pkl_filename, 'wb') as file:
    pickle.dump(model, file)

# Load from file
with open(pkl_filename, 'rb') as file:
    pickle_model = pickle.load(file)

#################################################
############# Ensemble #############
# bagging - changing seed & averaging
n_bags = 10
seed = 1

bagged_pred = np.zeros(test.shape[0])
for i in range(n_bags):
    model.set_params(random_state = seed + i)
    model.fit(tr_X, y)
    preds = model.predict(te)
    bagged_pred += preds
bagged_pred /= n_bags

# stacking
# prediction on the valid set
valid_pred_1 = model_1.predict(valid)
valid_pred_2 = model_2.predict(valid)
stacked_valid = np.column_stack((valid_pred_1, valid_pred_2))

# prediction on the test set
te_pred_1 = model_1.predict(te)
te_pred_2 = model_2.predict(te)
stacked_te = np.column_stack((te_pred_1, te_pred_2))

Exemplo n.º 5

0

Exibir arquivo

class Kmeans_fine_grained():
    def __init__(self, axis, action_name, data_category):

        self.axis = axis
        self.action_name = action_name
        self.data_category = data_category

        # if data_category == 'train':
        #     fusion_features_path = f'src/fine_grained_features/conv1d_2d_features/{data_category}_features_{axis}_{action_name}.npy'
        #     self.fusion_features = np.load(fusion_features_path)
        # else:
        #     fusion_features_path = f'src/fine_grained_features/conv1d_2d_features/test_features_{axis}_{action_name}.npy'
        #     self.fusion_features = np.load(fusion_features_path)

        fusion_features_path = f'src/fine_grained_features/conv1d_2d_features/{data_category}_features_{axis}_{action_name}.npy'
        self.fusion_features = np.load(fusion_features_path)

        self.Tsne = TSNE(n_components=3, init='pca', random_state=0)

        self.Kmeans = KMeans(n_clusters=7, random_state=0)

    def get_tsne_data(self):
        """
        获取降维后的节点数据
        :return:
        """
        print(
            f'======== 获取tsne降维数据 {self.data_category}_{self.axis}_{self.action_name} ============='
        )
        data = []
        targets = []
        for fusion_feature in self.fusion_features:  # 动作数据
            fusion_feature = fusion_feature.reshape(-1, int(self.axis[0]),
                                                    36)  # (7, 6, 36)
            for label, feature in enumerate(fusion_feature):
                feature = feature.flatten()
                targets.append(label)
                data.append(feature)

        data = np.array(data)
        targets = np.array(targets)
        print(data.shape)

        if self.data_category == 'train':
            tsne_fit = self.Tsne.fit(data)
            tsne_params = tsne_fit.get_params()
            np.save(
                f'src/fine_grained_features/tsne_model/tsne_params_{self.axis}.npy',
                tsne_params)
        else:
            tsne_params_value = np.load(
                f'src/fine_grained_features/tsne_model/tsne_params_{self.axis}.npy',
                allow_pickle=True).item()

            self.Tsne.set_params(**tsne_params_value)

        tsne_data = self.Tsne.fit_transform(data)

        # self.matplotlib(tsne_data)

        tsne_data_targets = []
        for index, tsne in enumerate(tsne_data):
            data_target = np.append(tsne, targets[index])
            tsne_data_targets.append(data_target.tolist())

        tsne_data_targets = np.array(tsne_data_targets)

        print(tsne_data_targets.shape)
        print(
            f'{self.data_category}_tsne_data_{self.axis}_{self.action_name} shape:{tsne_data_targets.shape}'
        )
        np.save(
            f'src/fine_grained_features/tsne_data/{self.data_category}_tsne_data_{self.axis}_{self.action_name}.npy',
            tsne_data_targets)

    def train_kmeans(self):
        data_targets_path = f'src/fine_grained_features/tsne_data/train_tsne_data_{self.axis}_{self.action_name}.npy'
        data_targets = np.load(data_targets_path)
        tsne_data = data_targets[:, :3]
        tsne_targets = data_targets[:, 3]

        tsne_data = minmax_scale(tsne_data)

        kmeans_model = self.Kmeans.fit(tsne_data)
        joblib.dump(
            kmeans_model,
            f'src/fine_grained_features/kmeans_model/kmeans_model_{self.axis}_{self.action_name}.pkl'
        )

        kmeans_cluster = kmeans_model.cluster_centers_  # 聚类的核心
        predicted = kmeans_model.predict(tsne_data)
        kmeans_cluster_label_dict = {}  # 保存聚类后的簇心，和标签值

        # 聚类结果
        kmeans_data = np.c_[tsne_data, predicted]

        # 排列标签
        labels = np.zeros_like(predicted)
        for i in range(7):
            mask = (predicted == i)
            labels[mask] = mode(tsne_targets[mask])[0]
            kmeans_cluster_label = int(mode(tsne_targets[mask])[0][0])
            kmeans_cluster_label_dict[
                f'sensor-{kmeans_cluster_label}'] = kmeans_cluster[i]
        # print(kmeans_cluster_label_dict)

        np.save(
            f'src/fine_grained_features/cluster_label_dict/cluster_label_dict_{self.axis}_{self.action_name}.npy',
            kmeans_cluster_label_dict)
        np.save(
            f'src/fine_grained_features/kmeans_data/{self.data_category}_kmeans_data_{self.axis}_{self.action_name}.npy',
            kmeans_data)
        # 计算准确度
        accuracy = accuracy_score(tsne_targets, labels)
        print(f'train_{self.axis}_{self.action_name} accuracy:{accuracy}')

    def predict_kmeans(self):
        data_targets_path = f'src/fine_grained_features/tsne_data/test_tsne_data_{self.axis}_{self.action_name}.npy'
        data_targets = np.load(data_targets_path)
        tsne_data = data_targets[:, :3]
        tsne_targets = data_targets[:, 3]

        tsne_data = minmax_scale(tsne_data)

        kmeans_model = joblib.load(
            f'src/fine_grained_features/kmeans_model/kmeans_model_{self.axis}_{self.action_name}.pkl'
        )

        predicted = kmeans_model.predict(tsne_data)

        # 聚类结果
        kmeans_data = np.c_[tsne_data, predicted]

        # 排列标签
        labels = np.zeros_like(predicted)
        for i in range(7):
            mask = (predicted == i)
            labels[mask] = mode(tsne_targets[mask])[0]

        # 计算准确度
        accuracy = accuracy_score(tsne_targets, labels)
        print(f'test_{self.axis}_{self.action_name} accuracy:{accuracy}')
        np.save(
            f'src/fine_grained_features/kmeans_data/{self.data_category}_kmeans_data_{self.axis}_{self.action_name}.npy',
            kmeans_data)

Exemplo n.º 6

0

Exibir arquivo

class LORAS(BaseOverSampler):
    """Localized Random Affine Shadowsampling (LoRAS).

    This class implements the LoRAS oversampling technique for imbalanced
    datasets. This technique generates Gaussian noise in small neighborhoods
    around the minority class samples and then the finaly synthetic samples
    are obtained by a convex combination of multiple noisy data points
    (shadowsamples).

    Parameters
    ----------
    {sampling_strategy}
    n_neighbors : int or estimator object, default=None
        If ``int``, number of nearest neighbours to used to construct synthetic
        samples. If object, an estimator that inherits from
        :class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
        find the k_neighbors.
    n_shadow : int, default=None
        The number of shadow samples to generate per minority class data point.
    std : float or sequence, default=0.005
        The standard deviation of the Normal distribution to add to each
        feature when generating shadow samples. If the input is a sequence, its
        size must be equal to the number of features of ``X`` when calling
        the ``fit_resample`` method. If ``float``, then same standard deviation
        will be used for all shadow samples generated.
    n_affine : int, default=None
        The number of shadow samples to use when generating the synthetic
        samples through random affine combinations. If given, the value must be
        between ``2`` and the number of features used in the fitting data.
        If not given, the value will be set to the total number of features in
        fitting data.
    manifold_learner : object, default=None
        An instance of an object that to perform a 2-dimensional embedding of
        a dataset. It must implement the scikit-learn Estimator interface,
        ``fit_transform`` and ``set_params`` methods must be implemented.
        If not given, the :class:`~sklearn.manifold.TSNE` class is used to
        obtain the 2d manifold of the data. Defaults to None.
    manifold_learner_params : dict, default=None
        A dictionary of additional parameters to pass to the instance of the
        ``manifold_learner`` (or TSNE if ``manifold_learner`` is None) when
        creating a 2D manifold of the fitting data. The keys are the parameter
        names and the values are the values. If not given, the default values
        are used.
    {random_state}
    {n_jobs}

    References
    ----------
    .. [1] Bej, S., Davtyan, N., Wolfien, M. et al. LoRAS: an oversampling
       approach for imbalanced datasets. Mach Learn 110, 279–301 (2021).
       https://doi.org/10.1007/s10994-020-05913-4

    Examples
    --------
    >>> from pyloras import LORAS
    >>> from sklearn.datasets import make_classification
    >>> from collections import Counter
    >>> l = LORAS()
    >>> X, y = make_classification(n_classes=3, class_sep=3,
    ... weights=[0.7, 0.2, 0.1], n_informative=3, n_redundant=1, flip_y=0,
    ... n_features=20, n_clusters_per_class=1, n_samples=2000, random_state=10)
    >>> print('Original dataset shape %s' % Counter(y))
    Original dataset shape Counter({{1: 400, 2: 200, 0: 1400}})
    >>> X_res, y_res = l.fit_resample(X, y)
    >>> print(f"Resampled dataset shape % Counter(y_res))
    Resampled dataset shape Counter({{1: 1400, 2: 1400, 0: 1400}})

    """
    def __init__(self,
                 *,
                 sampling_strategy="auto",
                 n_neighbors=None,
                 n_shadow=None,
                 std=0.005,
                 n_affine=None,
                 manifold_learner=None,
                 manifold_learner_params=None,
                 random_state=None,
                 n_jobs=None):
        super().__init__(sampling_strategy=sampling_strategy)
        self.n_neighbors = n_neighbors
        self.n_shadow = n_shadow
        self.std = std
        self.n_affine = n_affine
        self.manifold_learner = manifold_learner
        self.manifold_learner_params = manifold_learner_params
        self.random_state = random_state
        self.n_jobs = n_jobs

    def _check_2d_manifold_learner(self):
        if (not hasattr(self.manifold_learner, "fit_transform")
                or not hasattr(self.manifold_learner, "set_params")):
            raise ValueError(
                "The 2d manifold learner must implement the ``fit_transform`` "
                "and ``set_params`` methods")

        return clone(self.manifold_learner)

    def _initialize_params(self, X, y, rng):
        """Initialize the parameter values to their appropriate values."""
        f_size = X.shape[1]
        self.n_affine_ = f_size if self.n_affine is None else self.n_affine

        if self.manifold_learner:
            self.manifold_learner_ = self._check_2d_manifold_learner()
        else:
            self.manifold_learner_ = TSNE(n_components=2)
        if self.manifold_learner_params is not None:
            self.manifold_learner_.set_params(**self.manifold_learner_params)
        try:
            self.manifold_learner_.set_params(
                random_state=safe_random_state(rng))
        except ValueError:
            pass

        _, y_counts = np.unique(y, return_counts=True)
        if self.n_neighbors is None:
            n_neighbors = 30 if y_counts.min() >= 100 else 5
        else:
            n_neighbors = self.n_neighbors
        self.nn_ = check_neighbors_object("n_neighbors", n_neighbors)
        if self.n_jobs is not None:
            self.nn_.set_params(n_jobs=self.n_jobs)

        if self.n_shadow is None:
            self.n_shadow_ = max(ceil(2 * f_size / self.nn_.n_neighbors), 40)
        else:
            self.n_shadow_ = self.n_shadow

        if self.n_affine_ >= self.nn_.n_neighbors * self.n_shadow_:
            raise ValueError(
                "The number of shadow samples used to create an affine random "
                "combination must be less than `n_neighbors * n_shadow`.")

        try:
            iter(self.std)
            self.std_ = self.std
        except TypeError:
            self.std_ = [self.std] * f_size

    def _fit_resample(self, X, y):
        random_state = check_random_state(self.random_state)
        self._initialize_params(X, y, random_state)
        n_features = X.shape[1]

        X_res = [X.copy()]
        y_res = [y.copy()]
        dirichlet_param = [1] * self.n_affine_
        loras_samples = defaultdict(lambda: [])

        for minority_class, samples_to_make in self.sampling_strategy_.items():
            if samples_to_make == 0:
                continue
            X_minority = X[y == minority_class]
            X_embedded = self.manifold_learner_.fit_transform(X_minority)
            self.nn_.fit(X_embedded)
            neighborhoods = self.nn_.kneighbors(X_embedded,
                                                return_distance=False)
            num_loras = ceil(samples_to_make / X_embedded.shape[0])
            for neighbor_group in neighborhoods:
                shadow_sample_size = (self.n_shadow_, self.nn_.n_neighbors,
                                      n_features)
                total_shadow_samples = (
                    X_minority[neighbor_group] + random_state.normal(
                        scale=self.std_, size=shadow_sample_size)).reshape(
                            self.n_shadow_ * self.nn_.n_neighbors, n_features)
                random_index = random_state.integers(
                    0,
                    total_shadow_samples.shape[0],
                    size=(num_loras, self.n_affine_))
                weights = random_state.dirichlet(dirichlet_param,
                                                 size=num_loras)
                loras_samples[minority_class].append(
                    (weights[:, None]
                     @ total_shadow_samples[random_index]).reshape(
                         num_loras, n_features))
            # keep only ``samples_to_make`` synthetic samples from the generated.
            samples_to_drop = X_embedded.shape[0] * num_loras - samples_to_make
            random_state.shuffle(loras_samples[minority_class])
            X_res.append(
                np.concatenate(
                    loras_samples[minority_class])[samples_to_drop:])
            y_res.append([minority_class] * samples_to_make)

        return np.concatenate(X_res), np.concatenate(y_res)