def run(self): load_fit = False col = 'wvl' method = self.chooseMethodComboBox.currentText() datakey = self.chooseDataComboBox.currentText() if method == 'LDA' or method == 'LFDA': params, modelkey, ycol = self.getMethodParams(self.chooseMethodComboBox.currentIndex()) df, dimred_obj = dim_red(self.data[datakey].df, col, method, [], params, load_fit, ycol=ycol) else: params, modelkey = self.getMethodParams(self.chooseMethodComboBox.currentIndex()) df, dimred_obj = dim_red(self.data[datakey].df, col, method, [], params, load_fit) dimredkey = datakey+'-'+method self.dimredkeys.append(dimredkey) self.dimred[dimredkey] = dimred_obj
def dim_red(self, col, method, params, kws, load_fit): self.df, self.dim_red = dim_red.dim_red(self.df, col=col, method=method, params=params, kws=kws, load_fit=load_fit)
def test_dimred_LDA(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) kws = { 'n_clusters': 5, 'n_init': 10, 'max_iter': 100, 'tol': 0.01, 'n_jobs': 1, 'random_state': 1 } cluster.cluster(df, 'wvl', 'K-Means', [], kws) params = {'n_components': 3} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'LDA', [], params, ycol='K-Means') expected_coefs = [ -0.02209121, -0.0016516, -0.01139357, -0.06448139, 0.07085655 ] expected_scores = [-11.89340048, 0.41598425, 0.22964169] assert df['LDA'].shape == (103, 3) np.testing.assert_array_almost_equal(expected_coefs, dimred_obj.coef_[:, 0]) np.testing.assert_array_almost_equal(expected_scores, np.array(df['LDA'].iloc[0, :]))
def run(self): method = self.chooseMethodComboBox.currentText() datakey = self.chooseDataComboBox.currentText() # xvars = [str(x.text()) for x in self.xVariableList.selectedItems()] params, modelkey = self.getMethodParams(self.chooseMethodComboBox.currentIndex()) load_fit = False col = 'wvl' df, PCA_obj = dim_red(self.data[datakey].df, col, method, [], params, load_fit)
def test_dimred_PCA(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_components': 3} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'PCA', [], params) expected_expl_var = [0.96051211, 0.01683739, 0.01471955] expected_scores = [10092.96265442, -628.16699776, -359.06894452] assert df['PCA'].shape == (103, 3) np.testing.assert_array_almost_equal(expected_expl_var, dimred_obj.explained_variance_ratio_) np.testing.assert_array_almost_equal(expected_scores, np.array(df['PCA'].iloc[0, :]))
def test_dimred_FastICA(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_components': 3, 'random_state': 1} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'FastICA', [], params) expected_comps = [-2.190278e-05, 1.498101e-06, 9.082887e-07] expected_scores = [0.03252833, -0.03749623, -0.11434307] assert df['FastICA'].shape == (103, 3) np.testing.assert_array_almost_equal(expected_comps, dimred_obj.components_[:, 0]) np.testing.assert_array_almost_equal(expected_scores, np.array(df['FastICA'].iloc[0, :]))
def test_dimred_NMF(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) df['wvl'] = df[ 'wvl'] - 1000 #make some values negative to test adding a constant dim_red.check_positive(df['wvl']) params = {'n_components': 3, 'random_state': 0, 'add_constant': True} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'NMF', [], params) expected_comps = [10.27191532, 34.62489686, 3.06822373] expected_scores = [49.42458628, 3.9910722, 27.03100371] assert df['NMF'].shape == (103, 3) np.testing.assert_array_almost_equal(expected_comps, dimred_obj.components_[:, 0]) np.testing.assert_array_almost_equal(expected_scores, np.array(df['NMF'].iloc[0, :]))
def test_dimred_JADE(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_components': 3} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'JADE-ICA', [], params) expected_loadings = [0.56247385, 0.19292341, 3.42289881] expected_scores = [174708.34499912, 125682.55985134, 145155.40758151] assert df['JADE-ICA'].shape == (103, 3) np.testing.assert_almost_equal( expected_loadings, np.squeeze(np.array(dimred_obj.ica_jade_loadings[:, 0]))) np.testing.assert_array_almost_equal(expected_scores, np.array(df['JADE-ICA'].iloc[0, :]))
def test_dimred_LLE(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_components': 3, 'n_neighbors': 10, 'reg': 1e-3} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'LLE', [], params) expected_err = 2.0687806439705738e-05 expected_scores = [0.11088153, 0.01215013, -0.03551393] assert df['LLE'].shape == (103, 3) np.testing.assert_almost_equal(expected_err, dimred_obj.reconstruction_error_) np.testing.assert_array_almost_equal(np.abs(expected_scores), np.abs(np.array( df['LLE'].iloc[0, :])), decimal=4)
def test_dimred_tSNE(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = { 'n_components': 2, 'learning_rate': 200.0, 'n_iter': 1000, 'n_iter_without_progress': 300, 'perplexity': 30, 'init': 'pca' } df, dimred_obj = dim_red.dim_red(df, 'wvl', 't-SNE', [], params) expected_div = 0.38829776644706726 expected_scores = [9938.469727, -802.161682] assert df['t-SNE'].shape == (103, 2) np.testing.assert_almost_equal(expected_div, dimred_obj.kl_divergence_) np.testing.assert_array_almost_equal(expected_scores, np.array(df['t-SNE'].iloc[0, :]))