def preprocess(x_train: np.ndarray, y_train: np.ndarray, x_test: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Prepocesses data. :param x_train: the training data. :param y_train: the training labels. :param x_test: the test data. :return: Preprocessed x_train and x_test. """ logger.log('Prepocessing...') # Scale data. logger.log('\tScaling data with params:') scaler = MinMaxScaler() logger.log('\t{}'.format(scaler.get_params())) x_train = scaler.fit_transform(x_train) x_test = scaler.transform(x_test) # Apply LLE. logger.log('\tApplying LLE with params:') embedding = LocallyLinearEmbedding(n_neighbors=100, n_jobs=-1, random_state=0) embedding_params = embedding.get_params() logger.log('\t' + str(embedding_params)) x_train = embedding.fit_transform(x_train) x_test = embedding.transform(x_test) # Plot the graph embedding result. if PLOTTING_MODE != 'none': plotter.subfolder = 'graphs/LLE' plotter.filename = 'embedding' plotter.xlabel = 'first feature' plotter.ylabel = 'second feature' plotter.title = 'LLE' plotter.scatter(x_train, y_train, class_labels=helpers.datasets.get_gene_name) return x_train, x_test
#Use iso_model.transform(x_test) to fit the isomap from the training set onto the test set ''' ------------------------------------------------------------------------------- -------------------------------Modified LLE------------------------------------ ------------------------------------------------------------------------------- ''' #Apply modified LLE, keeping n components < the number of original features #method = 'standard' for LLE, 'hessian' for HELLE, or 'modified' for modified LLE mlle_model = LocallyLinearEmbedding(n_neighbors=5, n_components=2, method='modified', random_state=seed) mlle_model.fit_transform(x_std) print(mlle_model.get_params()) mlle_dim = mlle_model.embedding_ print(mlle_dim.shape) #There should be 2 latent variables represented #Plot first 2 extracted features and the observation class plt.figure(figsize=(10, 5)) plt.xlabel('Latent Variable 1 (explains most variance)') plt.ylabel('Latent Variable 2 (explains second most variance)') plt.title('Modified LLE 2-Dimension Plot with Observation Class, 5 neighbors') plt.scatter(mlle_dim[:, 0], mlle_dim[:, 1], c=y) plt.colorbar() plt.show() #Try a different number of neighbors mlle_model = LocallyLinearEmbedding(n_neighbors=15, n_components=2,