def test_feature_agglomeration(): n_clusters = 1 X = np.array([0, 0, 1]).reshape(1, 3) # (n_samples, n_features) agglo_mean = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.mean) agglo_median = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.median) assert_no_warnings(agglo_mean.fit, X) assert_no_warnings(agglo_median.fit, X) assert np.size(np.unique(agglo_mean.labels_)) == n_clusters assert np.size(np.unique(agglo_median.labels_)) == n_clusters assert np.size(agglo_mean.labels_) == X.shape[1] assert np.size(agglo_median.labels_) == X.shape[1] # Test transform Xt_mean = agglo_mean.transform(X) Xt_median = agglo_median.transform(X) assert Xt_mean.shape[1] == n_clusters assert Xt_median.shape[1] == n_clusters assert Xt_mean == np.array([1 / 3.]) assert Xt_median == np.array([0.]) # Test inverse transform X_full_mean = agglo_mean.inverse_transform(Xt_mean) X_full_median = agglo_median.inverse_transform(Xt_median) assert np.unique(X_full_mean[0]).size == n_clusters assert np.unique(X_full_median[0]).size == n_clusters assert_array_almost_equal(agglo_mean.transform(X_full_mean), Xt_mean) assert_array_almost_equal(agglo_median.transform(X_full_median), Xt_median)
def fa_dim_red(x_train_scaled, dataset_name, features_num = 2): z=0 losses = [] for k in range(1, x_train_scaled.shape[1]+1): fa = FeatureAgglomeration(n_clusters=k) fa_result = fa.fit_transform(x_train_scaled) x_projected_fa = fa.inverse_transform(fa_result) loss = ((x_train_scaled - x_projected_fa) ** 2).mean() losses.append(loss) np_feature_losses_percent = np.multiply(100, losses/np.sum(losses)) print('num of clustrs < 10% loss') for i in range(len(np_feature_losses_percent)): z=z+np_feature_losses_percent[i] if z>90: print(i+1) break print(np_feature_losses_percent) plt.bar(list(range(1,len(np_feature_losses_percent)+1)),np_feature_losses_percent) plt.title("FeatureAgglomeration Projection Losses % ("+str(dataset_name)+")") plt.ylabel("Mean Squared Error (% of Total)") plt.xlabel("Features") plt.savefig((str(dataset_name))+' fa analysis.png') plt.show() fa = FeatureAgglomeration(n_clusters=features_num) fa_result = fa.fit_transform(x_train_scaled, y_train) print(fa_result.shape) x_projected_fa = fa.inverse_transform(fa_result) print(x_projected_ica.shape) print(x_train_scaled.shape) loss = ((x_train_scaled - x_projected_fa) ** 2).mean() print('loss') print(loss) return fa_result,x_projected_fa
def test_feature_agglomeration(): n_clusters = 1 X = np.array([0, 0, 1]).reshape(1, 3) # (n_samples, n_features) agglo_mean = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.mean) agglo_median = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.median) agglo_mean.fit(X) agglo_median.fit(X) assert_true(np.size(np.unique(agglo_mean.labels_)) == n_clusters) assert_true(np.size(np.unique(agglo_median.labels_)) == n_clusters) assert_true(np.size(agglo_mean.labels_) == X.shape[1]) assert_true(np.size(agglo_median.labels_) == X.shape[1]) # Test transform Xt_mean = agglo_mean.transform(X) Xt_median = agglo_median.transform(X) assert_true(Xt_mean.shape[1] == n_clusters) assert_true(Xt_median.shape[1] == n_clusters) assert_true(Xt_mean == np.array([1 / 3.])) assert_true(Xt_median == np.array([0.])) # Test inverse transform X_full_mean = agglo_mean.inverse_transform(Xt_mean) X_full_median = agglo_median.inverse_transform(Xt_median) assert_true(np.unique(X_full_mean[0]).size == n_clusters) assert_true(np.unique(X_full_median[0]).size == n_clusters) assert_array_almost_equal(agglo_mean.transform(X_full_mean), Xt_mean) assert_array_almost_equal(agglo_median.transform(X_full_median), Xt_median)
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rng = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rng.randn(50, 100) connectivity = grid_to_graph(*mask.shape) assert_warns(DeprecationWarning, WardAgglomeration) with ignore_warnings(): ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert_array_equal(agglo.labels_, ward.labels_) assert_true(np.size(np.unique(agglo.labels_)) == 5) X_red = agglo.transform(X) assert_true(X_red.shape[1] == 5) X_full = agglo.inverse_transform(X_red) assert_true(np.unique(X_full[0]).size == 5) assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError assert_raises(ValueError, agglo.fit, X[:0])
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) assert_warns(DeprecationWarning, WardAgglomeration) with ignore_warnings(): ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert_array_equal(agglo.labels_, ward.labels_) assert_true(np.size(np.unique(agglo.labels_)) == 5) X_red = agglo.transform(X) assert_true(X_red.shape[1] == 5) X_full = agglo.inverse_transform(X_red) assert_true(np.unique(X_full[0]).size == 5) assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError assert_raises(ValueError, agglo.fit, X[:0])
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) assert_warns(DeprecationWarning, WardAgglomeration) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", DeprecationWarning) if hasattr(np, 'VisibleDeprecationWarning'): # Let's not catch the numpy internal DeprecationWarnings warnings.simplefilter('ignore', np.VisibleDeprecationWarning) ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) assert_equal(len(warning_list), 1) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert_array_equal(agglo.labels_, ward.labels_) assert_true(np.size(np.unique(agglo.labels_)) == 5) X_red = agglo.transform(X) assert_true(X_red.shape[1] == 5) X_full = agglo.inverse_transform(X_red) assert_true(np.unique(X_full[0]).size == 5) assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError assert_raises(ValueError, agglo.fit, X[:0])
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ rnd = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rnd.randn(50, 100) connectivity = grid_to_graph(*mask.shape) assert_warns(DeprecationWarning, WardAgglomeration) with warnings.catch_warnings(record=True) as warning_list: warnings.simplefilter("always", DeprecationWarning) if hasattr(np, 'VisibleDeprecationWarning'): # Let's not catch the numpy internal DeprecationWarnings warnings.simplefilter('ignore', np.VisibleDeprecationWarning) ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) assert_equal(len(warning_list), 1) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert_array_equal(agglo.labels_, ward.labels_) assert_true(np.size(np.unique(agglo.labels_)) == 5) X_red = agglo.transform(X) assert_true(X_red.shape[1] == 5) X_full = agglo.inverse_transform(X_red) assert_true(np.unique(X_full[0]).size == 5) assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError assert_raises(ValueError, agglo.fit, X[:0])
def test_feature_agglomeration(): n_clusters = 1 X = np.array([0, 0, 1]).reshape(1, 3) # (n_samples, n_features) agglo_mean = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.mean) agglo_median = FeatureAgglomeration(n_clusters=n_clusters, pooling_func=np.median) with pytest.warns(None) as record: agglo_mean.fit(X) assert not [w.message for w in record] with pytest.warns(None) as record: agglo_median.fit(X) assert not [w.message for w in record] assert np.size(np.unique(agglo_mean.labels_)) == n_clusters assert np.size(np.unique(agglo_median.labels_)) == n_clusters assert np.size(agglo_mean.labels_) == X.shape[1] assert np.size(agglo_median.labels_) == X.shape[1] # Test transform Xt_mean = agglo_mean.transform(X) Xt_median = agglo_median.transform(X) assert Xt_mean.shape[1] == n_clusters assert Xt_median.shape[1] == n_clusters assert Xt_mean == np.array([1 / 3.0]) assert Xt_median == np.array([0.0]) # Test inverse transform X_full_mean = agglo_mean.inverse_transform(Xt_mean) X_full_median = agglo_median.inverse_transform(Xt_median) assert np.unique(X_full_mean[0]).size == n_clusters assert np.unique(X_full_median[0]).size == n_clusters assert_array_almost_equal(agglo_mean.transform(X_full_mean), Xt_mean) assert_array_almost_equal(agglo_median.transform(X_full_median), Xt_median)
def test_ward_agglomeration(): # Check that we obtain the correct solution in a simplistic case rng = np.random.RandomState(0) mask = np.ones([10, 10], dtype=np.bool) X = rng.randn(50, 100) connectivity = grid_to_graph(*mask.shape) agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity) agglo.fit(X) assert np.size(np.unique(agglo.labels_)) == 5 X_red = agglo.transform(X) assert X_red.shape[1] == 5 X_full = agglo.inverse_transform(X_red) assert np.unique(X_full[0]).size == 5 assert_array_almost_equal(agglo.transform(X_full), X_red) # Check that fitting with no samples raises a ValueError with pytest.raises(ValueError): agglo.fit(X[:0])
goods = goods + [goodness] goods = pd.DataFrame(goods) avg = pd.concat([avg,goods],axis=1) print(avg) ''' ''' fa = FeatureAgglomeration(n_clusters=7).fit(X) newdata = fa.fit_transform(X) newdata = pd.DataFrame(newdata) print(X.head(10)) print(newdata.head(10)) ''' fa = FeatureAgglomeration(n_clusters=5).fit(X) newdata = fa.transform(X) recon = fa.inverse_transform(newdata) recon = pd.DataFrame(recon) print(reconError(X, recon)) print(pd.DataFrame(fa.labels_)) print(fa.n_leaves_) print(fa.n_components) print(pd.DataFrame(fa.children_)) ''' #Finds the K that maximizes AR score goods = [] for i in range(2,20): labels = KMeans(n_clusters=i).fit(newdata).labels_ labels_true = Y.tolist() goodness = metrics.adjusted_rand_score(labels_true,labels) goods.append([i,goodness]) print(pd.DataFrame(goods))
first_plot = plot_roi(labels_img, mean_func_img, title="Ward parcellation", display_mode='xz') # labels_img is a Nifti1Image object, it can be saved to file with the # following code: labels_img.to_filename('parcellation.nii') # Display the original data plot_epi(nifti_masker.inverse_transform(fmri_masked[0]), cut_coords=first_plot.cut_coords, title='Original (%i voxels)' % fmri_masked.shape[1], display_mode='xz') # A reduced data can be create by taking the parcel-level average: # Note that, as many objects in the scikit-learn, the ward object exposes # a transform method that modifies input features. Here it reduces their # dimension fmri_reduced = ward.transform(fmri_masked) # Display the corresponding data compressed using the parcellation fmri_compressed = ward.inverse_transform(fmri_reduced) compressed_img = nifti_masker.inverse_transform(fmri_compressed[0]) plot_epi(compressed_img, cut_coords=first_plot.cut_coords, title='Compressed representation (2000 parcels)', display_mode='xz') plt.show()
# Second, we illustrate the effect that the clustering has on the # signal. We show the original data, and the approximation provided by # the clustering by averaging the signal on each parcel. # # As you can see below, this approximation is very good, although there # are only 2000 parcels, instead of the original 60000 voxels # Display the original data plot_epi(nifti_masker.inverse_transform(fmri_masked[0]), cut_coords=cut_coords, title='Original (%i voxels)' % fmri_masked.shape[1], vmax=fmri_masked.max(), vmin=fmri_masked.min(), display_mode='xz') # A reduced data can be create by taking the parcel-level average: # Note that, as many objects in the scikit-learn, the ward object exposes # a transform method that modifies input features. Here it reduces their # dimension fmri_reduced = ward.transform(fmri_masked) # Display the corresponding data compressed using the parcellation fmri_compressed = ward.inverse_transform(fmri_reduced) compressed_img = nifti_masker.inverse_transform(fmri_compressed[0]) plot_epi(compressed_img, cut_coords=cut_coords, title='Compressed representation (2000 parcels)', vmax=fmri_masked.max(), vmin=fmri_masked.min(), display_mode='xz') show()