def var(): #特征选择,去掉方差值小的特征,去掉特征方差值小于threshold的那些特征 data = [[0,2,0,3], [0,1,4,3], [0,1,1,3]] va = VarianceThreshold(threshold=0.5) data = va.fit_transform(data) print(data) data = va.inverse_transform(data) print(data)
def test_VarianceThreshold(): ''' test the method of VarianceThreshold :return: None ''' X = [[100, 1, 2, 3], [100, 4, 5, 6], [100, 7, 8, 9], [101, 11, 12, 13]] selector = VarianceThreshold(1) selector.fit(X) print("Variances is %s" % selector.variances_) print("After transform is %s" % selector.transform(X)) print("The surport is %s" % selector.get_support(True)) print("After reverse transform is %s" % selector.inverse_transform(selector.transform(X)))
def varianceFilter(train_data, train_classes, threshold): #if True: # return frequencyFilter(train_data, train_classes, threshold) ''' Variance filter ''' vectorizer = DictVectorizer() # Fit and transform the train data. x_train = vectorizer.fit_transform(train_data) #y_train = train_classes sel = VarianceThreshold(threshold=(threshold * (1 - threshold))) x_new = sel.fit_transform(x_train) return vectorizer.inverse_transform(sel.inverse_transform(x_new))
def test_varianceThreshold(): from sklearn.feature_selection import VarianceThreshold x = [ [100, 1, 2, 3], [100, 4, 5, 6], [100, 7, 8, 9], [101, 11, 12, 13] ] selector = VarianceThreshold(1) selector.fit(x) print("Variances is %s" % selector.variances_) print("After transform is %s" % selector.transform(x)) print("The support is %s" % selector.get_support(True)) print("After reverse transform is %s" % selector.inverse_transform(selector.transform(x)))
def test_VarianceThreshold(): ''' 测试 VarianceThreshold 的用法 :return: None ''' X = [[100, 1, 2, 3], [100, 4, 5, 6], [100, 7, 8, 9], [101, 11, 12, 13]] selector = VarianceThreshold(threshold=1) #方差低于threshold的属性将被剔除 selector.fit(X) print("Variances is %s" % selector.variances_) print("After transform is %s" % selector.transform(X)) print("The surport is %s" % selector.get_support(True)) print("After reverse transform is %s" % selector.inverse_transform(selector.transform(X)))
def test_VarianceThreshold(): ''' 测试 VarianceThreshold 的用法 :return: None ''' X=[[100,1,2,3], [100,4,5,6], [100,7,8,9], [101,11,12,13]] #共四个特征 selector=VarianceThreshold(1) selector.fit(X) print("Variances is %s"%selector.variances_) #Variances is [ 0.1875 13.6875 13.6875 13.6875] print("After transform is %s"%selector.transform(X)) #第1个特征被剔除了 print("The surport is %s"%selector.get_support(True)) #保留特征的索引下标 print("After reverse transform is %s"% selector.inverse_transform(selector.transform(X))) #被剔除的特征填充为0
def variance_threshold(): x = [[1, -2, 3, 4, 5.], [3, 4, -5, 6, 7], [1, 7, 2, -6, 2], [3, 8, 6, 2, -8]] print(x) selector = VarianceThreshold(threshold=2) selector.fit(x) print() print(selector.variances_) print() print(selector.transform(x)) print() print(selector.get_support(True)) print() print(selector.inverse_transform(selector.transform(x))) pass
def variance(train, validate, test): ''' test the method of VarianceThreshold :return: None ''' selector=VarianceThreshold(1) selector.fit(train) train1 = selector.transform(train) print("Variances is %s"%selector.variances_) print("The support is %s"%selector.get_support(True)) print("After transform is %s"%train1) print("After reverse transform is %s" %selector.inverse_transform(selector.transform(train))) validate = selector.transform(validate) test = selector.transform(test) return train1, validate, test
oasis_dataset.white_matter_maps[0]) # 3D data ############################################################################# # Preprocess data # ---------------- nifti_masker = NiftiMasker(standardize=False, smoothing_fwhm=2, memory='nilearn_cache') # cache options gm_maps_masked = nifti_masker.fit_transform(gm_imgs_train) # The features with too low between-subject variance are removed using # :class:`sklearn.feature_selection.VarianceThreshold`. from sklearn.feature_selection import VarianceThreshold variance_threshold = VarianceThreshold(threshold=.01) gm_maps_thresholded = variance_threshold.fit_transform(gm_maps_masked) gm_maps_masked = variance_threshold.inverse_transform(gm_maps_thresholded) # Then we convert the data back to the mask image in order to use it for # decoding process mask = nifti_masker.inverse_transform(variance_threshold.get_support()) ############################################################################ # Prediction pipeline with ANOVA and SVR using # :class:`nilearn.decoding.DecoderRegressor` Object # In nilearn we can benefit from the built-in DecoderRegressor object to # do ANOVA with SVR instead of manually defining the whole pipeline. # This estimator also uses Cross Validation to select best models and ensemble # them. Furthermore, you can pass n_jobs=<some_high_value> to the # DecoderRegressor class to take advantage of a multi-core system. # To save time (because these are anat images with many voxels), we include
('anova', feature_selection), ('svr', svr)]) ### Fit and predict anova_svr.fit(gm_maps_masked, age) age_pred = anova_svr.predict(gm_maps_masked) ############################################################################# # Visualization # -------------- # Look at the SVR's discriminating weights coef = svr.coef_ # reverse feature selection coef = feature_selection.inverse_transform(coef) # reverse variance threshold coef = variance_threshold.inverse_transform(coef) # reverse masking weight_img = nifti_masker.inverse_transform(coef) # Create the figure from nilearn.plotting import plot_stat_map, show bg_filename = gray_matter_map_filenames[0] z_slice = 0 fig = plt.figure(figsize=(5.5, 7.5), facecolor='k') # Hard setting vmax to highlight weights more display = plot_stat_map(weight_img, bg_img=bg_filename, display_mode='z', cut_coords=[z_slice], figure=fig, vmax=1) display.title('SVM weights', y=1.2)
gm_maps_masked = NiftiMasker().fit_transform(gray_matter_map_filenames) data = variance_threshold.fit_transform(gm_maps_masked) # Statistical inference from nilearn.mass_univariate import permuted_ols neg_log_pvals, t_scores_original_data, _ = permuted_ols( age, data, # + intercept as a covariate by default n_perm=2000, # 1,000 in the interest of time; 10000 would be better verbose=1, # display progress bar n_jobs=1) # can be changed to use more CPUs signed_neg_log_pvals = neg_log_pvals * np.sign(t_scores_original_data) signed_neg_log_pvals_unmasked = nifti_masker.inverse_transform( variance_threshold.inverse_transform(signed_neg_log_pvals)) # Show results threshold = -np.log10(0.1) # 10% corrected fig = plt.figure(figsize=(5.5, 7.5), facecolor='k') display = plot_stat_map(signed_neg_log_pvals_unmasked, bg_img=bg_filename, threshold=threshold, cmap=plt.cm.RdBu_r, display_mode='z', cut_coords=[z_slice], figure=fig) title = ('Negative $\\log_{10}$ p-values' '\n(Non-parametric + max-type correction)')
print(df.head()) print(df.shape) print(df.columns) array = df.values X = array[:, :13] Y = array[:, 13] #VarianceThreshold is Feature selector that removes all low-variance features. #This feature selection algorithm looks only at the features (X), not the desired outputs (y), #and can thus be used for unsupervised learning. from sklearn.feature_selection import VarianceThreshold sel = VarianceThreshold(threshold=(.9 * (1 - .9))) # for 90% threshhold varTh = sel.fit(X, Y) numpy.set_printoptions(precision=3) print(sel.variances_) for i in range(len(sel.variances_)): print(sel.variances_[i], end=" ") print(X.shape) featTransformed = sel.transform(X) print(df.columns) print(featTransformed.shape) #print(featTransformed[0:5,:]) featBack = sel.inverse_transform(featTransformed) #print(featBack[0:5,:])
# -*-coding:utf-8-*- # @auth ivan # @time 20200611 # @goal test 054.Test_Feature_selection from sklearn.feature_selection import VarianceThreshold from sklearn.feature_selection import SelectKBest, f_classif X = [[100, 1, 2, 3], [100, 4, 5, 6], [100, 7, 8, 9], [101, 11, 12, 13]] selector = VarianceThreshold(1) selector.fit(X) print('Variances is %s' % selector.variances_) print('After transform is \n%s' % selector.transform(X)) print('The surport is %s' % selector.get_support(True)) print('The surport is %s' % selector.get_support(False)) print('After reverse transform is \n%s' % selector.inverse_transform(selector.transform(X))) # Variances is [ 0.1875 13.6875 13.6875 13.6875] # After transform is # [[ 1 2 3] # [ 4 5 6] # [ 7 8 9] # [11 12 13]] # The surport is [1 2 3] # The surport is [False True True True] # After reverse transform is # [[ 0 1 2 3] # [ 0 4 5 6] # [ 0 7 8 9] # [ 0 11 12 13]] X = [[1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [3, 3, 3, 3, 3], [1, 1, 1, 1, 1]]
__author__ = 'ctiwary' import numpy as np from sklearn.feature_selection import VarianceThreshold # http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection # user guide http://scikit-learn.org/stable/modules/feature_selection.html#feature-selection # feature selection based on VarianceThreshold from sklearn.feature_selection import VarianceThreshold X = [[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 1, 0], [0, 1, 1]] print np.shape(X) sel = VarianceThreshold(threshold=(.8 * (1 - .8))) features_list = sel.fit_transform(X) print features_list print sel.inverse_transform(features_list) print sel.get_support() print dir(sel) # Univariate feature selection from sklearn.datasets import load_iris from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import chi2 iris = load_iris() X, y = iris.data, iris.target print X.shape X_new = SelectKBest(chi2, k=2).fit_transform(X, y) print X_new.shape
from sklearn.feature_selection import VarianceThreshold import DataReader import pandas as pd control_data = DataReader.DataShow("./data/control_rawdata.npy") control_data.load_data() features = ['C', 'I', 'O', 'F', 'M', 'LTXE', 'CIOFM', 'CIOFMLTXE', 'Scorr', 'Var', 'EyeMvt'] data = pd.DataFrame(control_data.data[0][0][17:, 3:14], columns=features) selector = VarianceThreshold(1) X = data.to_numpy() selector.fit(X) print('Variances is %s'%selector.variances_) print('After transform is \n%s'%selector.transform(X)) print('The surport is %s'%selector.get_support(True)) # 如果为True那么返回的是被选中的特征的下标 print('The surport is %s'%selector.get_support(False)) # 如果为FALSE那么返回的是布尔类型的列表,反应是否选中这列特征 print('The feature is %s' % [x for (index, x) in enumerate(features) if index in selector.get_support(True)]) print('After reverse transform is \n%s'%selector.inverse_transform(selector.transform(X)))
y = boston.target from sklearn.linear_model import LinearRegression p = df['CHAS'].sum() / df.shape[0] p * (1 - p) get_ipython().run_line_magic('pinfo', 'VarianceThreshold') vt = VarianceThreshold(50) x_lt = vt.fit_transform(X) x_lt x_lt.shape lr = LinearRegression() lr.fit(x_lt, y) get_ipython().run_line_magic('pinfo', 'lr.score') lr.score(x_lt, y) get_ipython().run_line_magic('pinfo', 'VarianceThreshold') vt.variances_ x_lv = vt.inverse_transform(X) X.shape vt.fit_transform(X) x_lv = vt.inverse_transform(X) get_ipython().run_line_magic('pinfo', 'vt.inverse_transform') get_ipython().run_line_magic('ls', '') get_ipython().run_line_magic('pinfo', 'VarianceThreshold') vt vt.get_support vt.get_support(np.arange(X.shape[1])) import numpy as np vt.get_support(np.arange(X.shape[1])) vt.get_support() x_lv = X[:, _] x_lv.shape get_ipython().run_line_magic('whos', '')