def Laplacian_score(diheds): import scipy.io import numpy import os #os.chdir('/home/anu/Downloads/scikit-feature-1.0.0') from skfeature.function.similarity_based import lap_score from skfeature.utility import construct_W from numpy import mean kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1 } idx = [] #change the path for every system to be run. #os.chdir('/home/anu/Downloads/traj_benz_trypsin/') for i in range(len(diheds)): X = diheds[i] W = construct_W.construct_W(X, **kwargs_W) score = lap_score.lap_score(X, W=W) idx.append(score) col_mean = mean(idx, axis=0) imp_features = numpy.argsort(col_mean) return col_mean, imp_features
def utilize_selection_method(self, options): logging.info(' Unsupervised Feature Selection : Start') self.parse_options(options) normalize_feature = SupervisedFs.normalize_feature(self.data_feature) feature_amount = len(self.data_feature[0]) selection_result = {} if self.options['v'] == 1: widget = [ 'Calculating Variance : ', pb.Percentage(), ' ', pb.Bar(marker=pb.RotatingMarker()), ' ', pb.ETA() ] timer = pb.ProgressBar(widgets=widget, maxval=feature_amount).start() variance = [] for n in range(0, feature_amount): variance.append([np.var(normalize_feature[:, n]), n + 1]) timer.update(n) timer.finish() selection_result['variance'] = sorted(variance, reverse=True) if self.options['l'] == 1: logging.info(' -----Calculating Laplacian score---- ') kwargs_w = { 'metric': 'euclidean', 'neighbor': 'knn', 'weight_mode': 'heat_kernel', 'k': 5, 't': 1 } W = construct_W.construct_W(self.data_feature, **kwargs_w) score = lap_score.lap_score(self.data_feature, W=W) lap = [] for n in range(0, feature_amount): lap.append([score[n], n + 1]) selection_result['laplacian'] = sorted(lap, reverse=False) logging.info(' -----Calculating Laplacian score---- ==> Done') if self.options['s'] == 1: logging.info(' -----Calculating Spectral score---- ') kwargs_w = { 'metric': 'euclidean', 'neighbor': 'knn', 'weight_mode': 'heat_kernel', 'k': 5, 't': 1 } W = construct_W.construct_W(self.data_feature, **kwargs_w) kwargs_s = {'style': 2, 'W': W} score = SPEC.spec(self.data_feature, **kwargs_s) spec = [] for n in range(0, feature_amount): spec.append([score[n], n + 1]) selection_result['spectral'] = sorted(spec, reverse=True) logging.info(' -----Calculating Spectral score---- ==> Done') return selection_result
def selectFeatureLapScore(filename, num_feature, num_cluster): # Recupero del pickle salvato su disco con i sample e TUTTE le feature estratte da TSFresh. SU QUESTO LAVOREREMO NOI all_features_train = pd.read_pickle( "./pickle/feature_complete/TRAIN/{0}_TRAIN_FeatureComplete.pkl".format( filename)) all_features_test = pd.read_pickle( "./pickle/feature_complete/TEST/{0}_TEST_FeatureComplete.pkl".format( filename)) # Elimino colonne con valori NaN all_features_train = all_features_train.dropna(axis=1) all_features_test = all_features_test.dropna(axis=1) # Costruisco matrice W da dare a NDFS kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1 } W = construct_W.construct_W(all_features_train.values, **kwargs_W) # Esecuzione dell'algoritmo NDFS. Otteniamo il peso delle feature per cluster. featurePesate = lap_score.lap_score(all_features_train.values, W=W) # ordinamento delle feature in ordine discendente idx = lap_score.feature_ranking(featurePesate) idxSelected = idx[0: num_feature] # seleziono il numero di feature che voglio # Estraggo i nomi delle feature che ho scelto nomiFeatureSelezionate = [] for i in idxSelected: nomiFeatureSelezionate.append(all_features_train.columns[i]) # Creo il dataframe con solo le feature che ho selezionato dataframeFeatureSelezionate = all_features_train.loc[:, nomiFeatureSelezionate] # Aggiusto anche il dataset di test con solo le feature scelte all_features_test = all_features_test.loc[:, nomiFeatureSelezionate] # Estraggo le classi conosciute labelConosciute = estrattoreClassiConosciute.estraiLabelConosciute( "./UCRArchive_2018/{0}/{0}_TEST.tsv".format(filename)) # K-means su feature selezionate print("\nRisultati con feature selezionate da noi con Lap Score") print("Numero feature: {0}".format(all_features_test.shape[1])) testFeatureSelection(X_selected=dataframeFeatureSelezionate.values, X_test=all_features_test.values, num_clusters=num_cluster, y=labelConosciute)
def lap(): before = datetime.datetime.now() result = lap_score.lap_score(data.copy(), labels.copy(), mode="index") # prepisuje vstup, preto ho kopirujem after = datetime.datetime.now() print("Laplacian") result = result[:treshold] print(len(result)) print("cas: " + str(after - before)) print('\n') if len(result) < len(header): transform_and_save(result, "Laplacian")
def calc_lap_score(data): kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1 } W = construct_W.construct_W(data, **kwargs_W) return lap_score.lap_score(data, W=W)
def SKF_lap(X, y): # construct affinity matrix kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1 } W = construct_W(X, **kwargs_W) # obtain the scores of features score = lap_score.lap_score(X, W=W) return lap_score.feature_ranking(score)
def get_lap_score(data, k=5, t=1,top_feature = 30): kwargs_W = {"metric":"euclidean","neighbor_mode":"knn","weight_mode":"heat_kernel","k":k,'t':t} W = construct_W.construct_W(data, **kwargs_W) score = lap_score.lap_score(data, W=W) #print(score) ranking = lap_score.feature_ranking(score) #print(idx) dfscores = pd.DataFrame(score) dfcolumns = pd.DataFrame(data.columns) #df_rank = pd.DataFrame(idx) featureScores = pd.concat([dfcolumns,dfscores],axis=1) featureScores.columns = ['Feature','Score'] #naming the dataframe columns #print(featureScores.nlargest(k,'Score')) #print 20 best features result = featureScores.nlargest(top_feature,'Score') return result, ranking
def laplacian_score(X, y=None, **kwargs): # construct affinity matrix kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1 } W = construct_W.construct_W(X, **kwargs_W) # obtain the scores of features score = lap_score.lap_score(X, W=W) # sort the feature scores in an ascending order according to the feature scores idx = lap_score.feature_ranking(score) return idx
def main(): # load data mat = scipy.io.loadmat('../data/COIL20.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] # construct affinity matrix kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1 } W = construct_w.construct_w(X, **kwargs_W) # obtain the scores of features score = lap_score.lap_score(X, W=W) # sort the feature scores in an ascending order according to the feature scores idx = lap_score.feature_ranking(score) # perform evaluation on clustering task num_fea = 100 # number of selected features num_cluster = 20 # number of clusters, it is usually set as the number of classes in the ground truth # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # perform kmeans clustering based on the selected features and repeats 20 times nmi_total = 0 acc_total = 0 for i in range(0, 20): nmi, acc = unsupervised_evaluation.evaluation( X_selected=selected_features, n_clusters=num_cluster, y=y) nmi_total += nmi acc_total += acc # output the average NMI and average ACC print('NMI:', old_div(float(nmi_total), 20)) print('ACC:', old_div(float(acc_total), 20))
def lap_score_filtering(self, vt_data, num_features): vt_numpy = vt_data.to_numpy() # construct affinity matrix kwargs_W = { "metric": "cosine", "neighbor_mode": "knn", "weight_mode": "cosine", "k": 40, 't': 500 } print( "We perform Laplacian score filtering using the following parameters: " + str(kwargs_W)) W = construct_W.construct_W(vt_numpy, **kwargs_W) score = lap_score.lap_score(vt_numpy, W=W) idx = lap_score.feature_ranking(score) # rank features filtered_data = vt_data.iloc[:, idx[0:num_features]].copy() print("\nThe data now has " + str(len(filtered_data.T)) + " features after Laplacian score filtering.") return filtered_data
def plot_ls_after_vt_filtering(self, threshold): data = self.test_reddy_dataset.expression_data.copy() vt_data = self.variance_threshold_selector(data, threshold) # perform ls filtering vt_numpy = vt_data.to_numpy() # construct affinity matrix kwargs_W = { "metric": "cosine", "neighbor_mode": "knn", "weight_mode": "cosine", "k": 40, 't': 500 } print( "We plot the Laplacian scores of the features using the following affinity matrix parameters: " + str(kwargs_W)) W = construct_W.construct_W(vt_numpy, **kwargs_W) # compute lap score of each remaining features score = lap_score.lap_score(vt_numpy, W=W) self.plot_lap_scores(score)
def main(): # load data mat = scipy.io.loadmat("../data/COIL20.mat") X = mat["X"] # data X = X.astype(float) y = mat["Y"] # label y = y[:, 0] # construct affinity matrix kwargs_W = {"metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, "t": 1} W = construct_W.construct_W(X, **kwargs_W) # obtain the scores of features score = lap_score.lap_score(X, W=W) # sort the feature scores in an ascending order according to the feature scores idx = lap_score.feature_ranking(score) # perform evaluation on clustering task num_fea = 100 # number of selected features num_cluster = 20 # number of clusters, it is usually set as the number of classes in the ground truth # obtain the dataset on the selected features selected_features = X[:, idx[0:num_fea]] # perform kmeans clustering based on the selected features and repeats 20 times nmi_total = 0 acc_total = 0 for i in range(0, 20): nmi, acc = unsupervised_evaluation.evaluation(X_selected=selected_features, n_clusters=num_cluster, y=y) nmi_total += nmi acc_total += acc # output the average NMI and average ACC print "NMI:", float(nmi_total) / 20 print "ACC:", float(acc_total) / 20
def predict(self, X): """ :param X: shape [n_row*n_clm, n_band] :return: """ # n_row, n_column, __n_band = X.shape # XX = X.reshape((n_row * n_column, -1)) # n_sample * n_band XX = X kwargs_W = {"metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1} W = construct_W.construct_W(XX, **kwargs_W) # obtain the scores of features score = lap_score.lap_score(X, W=W) # sort the feature scores in an ascending order according to the feature scores idx = lap_score.feature_ranking(score) # obtain the dataset on the selected features selected_features = X[:, idx[0:self.n_band]] # selected_features.reshape((self.n_band, n_row, n_column)) # selected_features = np.transpose(selected_features, axes=(1, 2, 0)) return selected_features
print('fs') ########################### Apply Feature Selection methods :ReliefF, Laplacian score & Fisher #ReliefF score_rel = reliefF.reliefF(X_train, y_train) idx_rel = reliefF.feature_ranking(score_rel) #Laplacian score kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "k": 7, 't': 1, 'reliefF': True } W = construct_W.construct_W(X_train, **kwargs_W) score_lap = lap_score.lap_score(X_train, W=W) idx_lap = lap_score.feature_ranking(score_lap) #Fisher score_fish = fisher_score.fisher_score(X_train, y_train) print(score_fish) idx_fish = fisher_score.feature_ranking(score_fish) ###################################### Feature Integration idxM = idx_rel[:threshold] idxN = idx_lap[:threshold] idxO = idx_fish[:threshold] if combination_method == 1: #AND idx_and = reduce(np.intersect1d, (idxO, idxM, idxN)) idx = idx_and print("number of selectes features (bins) = ", idx.shape[0])
def lapscore_main(): # iterate the whole process for 10 times for index, subsample in enumerate(X_testset): # construct affinity matrix kwargs_W = {"metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1} W = construct_W.construct_W(subsample, **kwargs_W) # obtain the scores of features idx = lap_score.lap_score(subsample, mode="rank", W=W) # obtian the array of variables through ranking X_col_list = X_test_full.columns.values.tolist() prepare_list['lap_ranked_Xtestset' + str(index)] = get_variable_rank(idx, X_col_list) ranked_var_filename = 'lap_ranked_Xtestset' + str(index) + '.txt' f_rank = open(ranked_var_filename, 'w') f_rank.write(str(prepare_list['lap_ranked_Xtestset' + str(index)])) f_rank.close() # perform evaluation on clustering task range_num_fea = range(10, 210, 10) # number of selected features range_n_clusters = [3, 4, 5, 6, 7, 8, 9, 10] # number of clusters # dynamic generating dictionaries to store results prepare_list['lapscore_criteria' + str(index)] = {'silhouette_score': [], 'ch_score': [], 'db_score': []} # deciding optimal value for num_cluster and the optimal number of selected features for n_cluster in range_n_clusters: for num_features in range_num_fea: # obtain the dataset on the selected features selected_features = subsample[:, idx[0:num_features]] # initialize the clusterer with n_clusters value and a random generator # seed of 10 for reproducbility clusterer = KMeans( n_clusters=n_cluster, random_state=10) cluster_labels = clusterer.fit_predict(selected_features) # the silhouette_score gives the average value for all the samples # this gives a perspective into the density and separation of the formed clusters silhouette_avg = metrics.silhouette_score( selected_features, cluster_labels, metric='euclidean') # write the content into the dict prepare_list['lapscore_criteria' + str(index)]['silhouette_score'].append(silhouette_avg) # in normal usage, the Calinski-Harabasz index is applied to the results of a cluster analysis ch_idx = metrics.calinski_harabasz_score( selected_features, cluster_labels) # write the content into the dict prepare_list['lapscore_criteria' + str(index) ]['ch_score'].append(ch_idx) # in normal usage, the Davies-Bouldin index is applied to the results of a cluster analysis db_idx = davies_bouldin_score( selected_features, cluster_labels) # write the content into the dict prepare_list['lapscore_criteria' + str(index)]['db_score'].append(db_idx) print("subset No.", index, "," "For n_clusters =", n_cluster, "," "For num_features =", num_features, "," "the average silhouette_score is: ", silhouette_avg, "," "the Calinski-Harabasz index is: ", ch_idx, "," "the Davies-Bouldin index is: ", db_idx) lapscore_silhouette_score = generate_criteria_tb( dict_name='lapscore_criteria', col_name='silhouette_score') lapscore_Calinski_Harabasz_index = generate_criteria_tb( dict_name='lapscore_criteria', col_name='ch_score') lapscore_Davies_Bouldin_index = generate_criteria_tb( dict_name='lapscore_criteria', col_name='db_score') lapscore_silhouette_score.to_csv( 'lapscore_silhouette_score.csv', index=False) lapscore_Calinski_Harabasz_index.to_csv( 'lapscore_Calinski_Harabasz_index.csv', index=False) lapscore_Davies_Bouldin_index.to_csv( 'lapscore_Davies_Bouldin_index.csv', index=False)
def bench(self, X, X_norm, y, n=2): num_feats = 20 output_data = {'method': list(), 'features': list(), 'time': list(), self.test_att: list(), 'supervised': list()} # ---------------------------------------------------------------- # CFS # start = time.perf_counter() # idx = cfs(X_norm.to_numpy(), y.to_numpy())[0] # print(idx) # selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist() # output_data['method'].append('CFS') # output_data['time'].append(time.perf_counter() - start) # output_data['features'].append(selected_features) # output_data[self.test_att].append(self.train_real_data(selected_features, X)) # LA: Laplacian Score start = time.perf_counter() kwargs_W = {"metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1} W = construct_W.construct_W(X_norm.to_numpy(), **kwargs_W) score = lap_score.lap_score(X_norm.to_numpy(), W=W) idx = lap_score.feature_ranking(score) selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist() output_data['method'].append('Laplacian Score') output_data['time'].append(time.perf_counter() - start) output_data['features'].append(selected_features) output_data['supervised'].append(False) output_data[self.test_att].append(self.train_real_data(selected_features, X)) print(output_data) # FCBF: Feature correlation based filter # start = time.perf_counter() # idx = fcbf(X_norm.to_numpy(), y.to_numpy(), n_selected_features=num_feats)[0] # selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist() # output_data['method'].append('FCBF') # output_data['time'].append(time.perf_counter() - start) # output_data['features'].append(selected_features) # output_data['supervised'].append(True) # output_data[self.test_att].append(self.train_real_data(selected_features, X)) # print(output_data) # output_data['method'].append('FCBF') # output_data['time'].append(9999999) # output_data['features'].append([]) # output_data['supervised'].append(True) # output_data[self.test_att].append(0.0) # UDFS: Unsupervised Discriminative Feature Selection start = time.perf_counter() Weight = udfs(X_norm.to_numpy(), gamma=0.1, n_clusters=n) idx = feature_ranking(Weight) selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist() output_data['method'].append('UDFS') output_data['time'].append(time.perf_counter() - start) output_data['features'].append(selected_features) output_data['supervised'].append(False) output_data[self.test_att].append(self.train_real_data(selected_features, X)) print(output_data) # SPEC: Spectral Feature Selection start = time.perf_counter() score = spec(X_norm.to_numpy()) idx = feature_ranking_spec(score) selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist() output_data['method'].append('SPEC') output_data['time'].append(time.perf_counter() - start) output_data['features'].append(selected_features) output_data['supervised'].append(False) output_data[self.test_att].append(self.train_real_data(selected_features, X)) print(output_data) # Mrmr: minimum redundency maximum relevance start = time.perf_counter() mrmr = pymrmr.mRMR(X_norm, 'MIQ', num_feats) output_data['method'].append('MRMR(MIQ)') output_data['time'].append(time.perf_counter() - start) output_data['features'].append(mrmr) output_data['supervised'].append(False) output_data[self.test_att].append(self.train_real_data(mrmr, X)) print(output_data) # Mrmr: minimum redundency maximum relevance start = time.perf_counter() mrmr = pymrmr.mRMR(X_norm, 'MID', num_feats) output_data['method'].append('MRMR(MID)') output_data['time'].append(time.perf_counter() - start) output_data['features'].append(mrmr) output_data['supervised'].append(False) output_data[self.test_att].append(self.train_real_data(mrmr, X)) print(output_data) # recursive feature elimination(RFE): from sklearn.feature_selection import RFE from sklearn.linear_model import LogisticRegression rfe_selector = RFE(estimator=LogisticRegression(), n_features_to_select=num_feats, step=10, verbose=5) start = time.perf_counter() rfe_selector.fit(X_norm, y) rfe_support = rfe_selector.get_support() rfe_feature = X_norm.loc[:, rfe_support].columns.tolist() output_data['method'].append('RFE') output_data['time'].append(time.perf_counter() - start) output_data['features'].append(rfe_feature) output_data['supervised'].append(True) output_data[self.test_att].append(self.train_real_data(rfe_feature, X)) print(output_data) # ---------------------------------------------------------------- # Lasso: SelectFromModel: from sklearn.feature_selection import SelectFromModel from sklearn.linear_model import LogisticRegression embeded_lr_selector = SelectFromModel(LogisticRegression(penalty="l1"), max_features=num_feats) start = time.perf_counter() embeded_lr_selector.fit(X_norm, y) embeded_lr_support = embeded_lr_selector.get_support() embeded_lr_feature = X_norm.loc[:, embeded_lr_support].columns.tolist() output_data['method'].append('Lasso') output_data['time'].append(time.perf_counter() - start) output_data['features'].append(embeded_lr_feature) output_data['supervised'].append(True) output_data[self.test_att].append(self.train_real_data(embeded_lr_feature, X)) print(output_data) print(str(len(embeded_lr_feature)), 'selected features') # ----------------------------------------------------------------------------- # Tree - based: SelectFromModel: from sklearn.feature_selection import SelectFromModel from sklearn.ensemble import RandomForestClassifier embeded_rf_selector = SelectFromModel(RandomForestClassifier(n_estimators=100), max_features=num_feats) start = time.perf_counter() embeded_rf_selector.fit(X_norm, y) embeded_rf_support = embeded_rf_selector.get_support() embeded_rf_feature = X_norm.loc[:, embeded_rf_support].columns.tolist() output_data['method'].append('Tree_Based_RF') output_data['time'].append(time.perf_counter() - start) output_data['features'].append(embeded_rf_feature) output_data['supervised'].append(True) output_data[self.test_att].append(self.train_real_data(embeded_rf_feature, X)) print(output_data) print(str(len(embeded_rf_feature)), 'selected features') # ------------------------------------------------------------------------------- # also tree based: from sklearn.feature_selection import SelectFromModel from lightgbm import LGBMClassifier lgbc = LGBMClassifier(n_estimators=500, learning_rate=0.05, num_leaves=32, colsample_bytree=0.2, reg_alpha=3, reg_lambda=1, min_split_gain=0.01, min_child_weight=40) embeded_lgb_selector = SelectFromModel(lgbc, max_features=num_feats) start = time.perf_counter() embeded_lgb_selector.fit(X_norm, y) embeded_lgb_support = embeded_lgb_selector.get_support() embeded_lgb_feature = X_norm.loc[:, embeded_lgb_support].columns.tolist() output_data['method'].append('Tree_Based_lightGBM') output_data['time'].append(time.perf_counter() - start) output_data['supervised'].append(True) output_data['features'].append(embeded_lgb_feature) output_data[self.test_att].append(self.train_real_data(embeded_lgb_feature, X)) print(output_data) print(str(len(embeded_lgb_feature)), 'selected features') return output_data
def Scoreseries(): # Score like algorithm # init n = 120 test_alpha = 0.325 f_features = SelectKBest(f_classif, k=n).fit_transform(X_transed, y) mi_features = SelectKBest(mutual_info_classif, k=n).fit_transform(X_transed, y) lap_featureindex = lap_score.lap_score(X_transed, y) lap_features = X_transed[:, lap_featureindex[0:n]] fdr_features = SelectFdr(alpha=0.335).fit_transform(X_transed, y) print("fdr_features shape:", fdr_features.shape) fpr_features = SelectFpr(alpha=0.33).fit_transform(X_transed, y) print("fpr_features shape:", fpr_features.shape) fwe_features = SelectFwe(alpha=test_alpha).fit_transform(X_transed, y) print("fwe_features shape:", fwe_features.shape) baseresult = cross_val_score(cls, X, y, cv=5, scoring='accuracy') # chi2result = cross_val_score(cls, chi2_features, y, cv = 5, scoring = 'accuracy' ) # print(baseresult,sum(baseresult)/5) # print(chi2result,sum(chi2result)/5) print("f") fresult = cross_val_score(cls, f_features, y, cv=5, scoring='accuracy') print(baseresult, sum(baseresult) / 5) print(fresult, sum(fresult) / 5) print("mutual information") miresult = cross_val_score(cls, mi_features, y, cv=5, scoring='accuracy') print(baseresult, sum(baseresult) / 5) print(miresult, sum(miresult) / 5) print("lap score") lapresult = cross_val_score(cls, lap_features, y, cv=5, scoring='accuracy') print(baseresult, sum(baseresult) / 5) print(lapresult, sum(lapresult) / 5) print("fdr") if fdr_features.shape[1] > 0: fdrresult = cross_val_score(cls, fdr_features, y, cv=5, scoring='accuracy') print(baseresult, sum(baseresult) / 5) print(fdrresult, sum(fdrresult) / 5) print("fpr") if fpr_features.shape[1] > 0: fprresult = cross_val_score(cls, fpr_features, y, cv=5, scoring='accuracy') print(baseresult, sum(baseresult) / 5) print(fprresult, sum(fprresult) / 5) if fwe_features.shape[1] > 0: print("fwe") fweresult = cross_val_score(cls, fwe_features, y, cv=5, scoring='accuracy') print(baseresult, sum(baseresult) / 5) print(fweresult, sum(fweresult) / 5) return
def compare_methods(x, y, num_select, pctg=0.1, pack_size=1, num_clusters=5, two_sided=False): n, d = x.shape idx = np.random.permutation(n) x, y = x[idx], y[idx] ######### split train and test ######### X = x Y = y train_num = int(n * 0.7) test_num = n - int(n * 0.7) x = X[:train_num, :] y = Y[:train_num] x_test = X[-test_num:, :] y_test = Y[-test_num:] ########### other methods ###################### ''' Similarity based: lap_score SPEC ''' start_time = time.clock() lap_score_result = lap_score.lap_score(x) lap_score_result = np.argsort(lap_score_result)[:num_select] print('lap_score running time:', time.clock() - start_time) # _,stepwise = backward_distance_selection(x,num_select,pctg,pack_size) #pctg controls sensitivity to outliers start_time = time.clock() rf_result = random_selection(x, num_select, N=300, num_use=int(d / 2), pctg=pctg, two_sided=two_sided) print('rf running time:', time.clock() - start_time) start_time = time.clock() SPEC_result = SPEC.spec(x) print('SPEC running time:', time.clock() - start_time) SPEC_result = np.argsort(SPEC_result)[:num_select] #find minimum start_time = time.clock() CSPEC_result = cut_spec(x, pctg=0.15) print('cut-SPEC running time:', time.clock() - start_time) CSPEC_result = np.argsort(CSPEC_result)[:num_select] #find minimum '''sparse learning based''' start_time = time.clock() MCFS_W = MCFS.mcfs(x, num_select) print('MCFS running time:', time.clock() - start_time) MCFS_result = [np.max(np.abs(x)) for x in MCFS_W] #find maximum MCFS_result = np.argsort(MCFS_result)[-num_select:] # start_time = time.clock() # NDFS_W = NDFS.ndfs(x,**{'n_clusters':num_clusters}) # print('NDFS running time:',time.clock()-start_time) # NDFS_result = [np.sqrt(np.sum(x**2)) for x in NDFS_W] #find maximum # NDFS_result= np.argsort(NDFS_result)[-num_select:] # # start_time = time.clock() # UDFS_W = UDFS.udfs(x,**{'n_clusters':num_clusters}) # print('UDFS running time:',time.clock()-start_time) # UDFS_result = [np.sqrt(np.sum(x**2)) for x in UDFS_W] #find minimum ?????????????????????? # UDFS_result= np.argsort(UDFS_result)[:num_select] # prop_x = x[:,list(stepwise)] rf_x = x[:, list(rf_result)] lap_score_x = x[:, list(lap_score_result)] SPEC_x = x[:, list(SPEC_result)] CSPEC_x = x[:, list(CSPEC_result)] MCFS_x = x[:, list(MCFS_result)] # NDFS_x = x[:,list(NDFS_result)] # UDFS_x = x[:,list(UDFS_result)] print('\n') print('Class Seperability') # print('prop', ef.class_seperability(prop_x,y)) print('rf', ef.class_seperability(rf_x, y)) print('lap_score', ef.class_seperability(lap_score_x, y)) print('SPEC', ef.class_seperability(SPEC_x, y)) print('cut-SPEC', ef.class_seperability(CSPEC_x, y)) print('MCFS', ef.class_seperability(MCFS_x, y)) # print('NDFS',ef.class_seperability(NDFS_x,y)) # print('UDFS',ef.class_seperability(UDFS_x,y)) print('\n') print('KNN accuracy') # print('prop', ef.knn_accuracy(prop_x,y)) print('rf', ef.knn_accuracy(x_test, y_test, rf_result)) print('lap_score', ef.knn_accuracy(x_test, y_test, lap_score_result)) print('SPEC', ef.knn_accuracy(x_test, y_test, SPEC_result)) print('cut-SPEC', ef.knn_accuracy(x_test, y_test, CSPEC_result)) print('MCFS', ef.knn_accuracy(x_test, y_test, MCFS_result)) # print('NDFS',ef.knn_accuracy(x_test,y_test,NDFS_result)) # print('UDFS',ef.knn_accuracy(x_test,y_test,UDFS_result),'\n') print('\n') print('connectivity') # print('prop', ef.knn_accuracy(prop_x,y)) print('rf', ef.connectivity(x, rf_x, pctg, two_sided)) print('lap_score', ef.connectivity(x, lap_score_x, pctg, two_sided)) print('SPEC', ef.connectivity(x, SPEC_x, pctg, two_sided)) print('cut-SPEC', ef.connectivity(x, CSPEC_x, pctg, two_sided)) print('MCFS', ef.connectivity(x, MCFS_x, pctg, two_sided))
# URL for the Pima Indians Diabetes dataset (UCI Machine Learning Repository) url = "http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data" # download the file raw_data = urllib2.urlopen(url) # load the CSV file as a numpy matrix dataset = np.loadtxt(raw_data, delimiter=",") X = dataset[:,:9] y = dataset[:,8] kwargs_W = {"metric":"euclidean","neighbor_mode":"knn","weight_mode":"heat_kernel","k":5,'t':1} W = construct_W.construct_W(X, **kwargs_W) from skfeature.function.similarity_based import lap_score score = lap_score.lap_score(X, W=W) print score idx = lap_score.feature_ranking(score) fig = plt.figure() plt.plot(score, label='Laplacian Score') plt.legend(loc='upper middle', shadow=True) plt.show() print idx num_fea = 3 #selected_features = X[:, idx[0:num_fea]] #print selected_features #print selected_features selected_features1 = X[:, 0:1]
process = LinearCombination.kernel_GramSchmidtProcess(rbf_kernel) #np.random.seed(42) #data = np.random.random(size=[10,5]) #process.fit(data) #print(process.kmatrix) #print(process.basisweight) Xg,yg = datasets.make_gaussian_quantiles(n_features=10,random_state=42) print(yg.shape) Xp,yp = datasets.make_multilabel_classification(n_features=10,random_state=42,n_classes= 1) print(yp.shape) Rf1 = relief.Relief() print(Rf1.fit(Xg,yg).w_) Rf2 = relief.ReliefF() print(Rf2.fit(Xg,yg).w_) Rf3 = relief.RReliefF() print(Rf3.fit(Xg,yg).w_) L_score = lap_score.lap_score(Xg) print(L_score) MI = feature_selection.mutual_info_classif(Xg,yg) print(MI) 数学 = "aaa" print(数学) # a = 154476802108746166441951315019919837485664325669565431700026634898253202035277999 # b = 36875131794129999827197811565225474825492979968971970996283137471637224634055579 # c = 4373612677928697257861252602371390152816537558161613618621437993378423467772036 # print( (a/(b+c)) + (b/(a+c)) + (c/(a+b)) )
def lap_ours(train, test, K): scores = lap_score(train[0]) indices = lap_score_ranking(scores)[:K] return train[0][:, indices], test[0][:, indices]
def laplacian_score(data): W = construct_W(data) return lap_score(data, W=W)
def compare_methods(x,y,num_select,pctg=0.5,sample_pctg=1, num_clusters=5,zero_mean=False,dim=1,t=0.8,thresh=0.1): if zero_mean == False: x = normalize(x,axis=0) else: x = standardize_feature(x) n,d = x.shape # idx = np.random.permutation(n) # x,y = x[idx], y[idx] # # ######### split train and test ######### # X=x;Y=y # train_num = int(n*0.6) # test_num = n-int(n*0.6) # x=X[:train_num,:]; y=Y[:train_num] # x_test = X[-test_num:,:];y_test = Y[-test_num:] ########### calculate ###################### start_time = time.clock() rf_result = random_selection(x, num_select, N=500, num_use=int(0.5*d),pctg=pctg, two_sided=False) print('rf running time:',time.clock()-start_time) start_time = time.clock() rank_result,l1,l2,lmax= ranking_selection(x, num_select, N=500, num_use=int(0.5*d),sample_pctg=1, preserve_pctg=pctg) print('rank running time:',time.clock()-start_time) start_time = time.clock() lap_score_result = lap_score.lap_score(x) lap_score_result= np.argsort(lap_score_result)[:num_select] #find minimum print('lap_score running time:',time.clock()-start_time) start_time = time.clock() SPEC_result = SPEC.spec(x) print('SPEC running time:',time.clock()-start_time) SPEC_result= np.argsort(SPEC_result)[:num_select] #find minimum '''sparse learning based''' start_time = time.clock() MCFS_W = MCFS.mcfs(x,num_select,**{'n_clusters':num_clusters}) print('MCFS running time:',time.clock()-start_time) MCFS_result = [np.max(np.abs(x)) for x in MCFS_W] #find maximum MCFS_result= np.argsort(MCFS_result)[-num_select:] # start_time = time.clock() # NDFS_W = NDFS.ndfs(x,**{'n_clusters':num_clusters}) # print('NDFS running time:',time.clock()-start_time) # NDFS_result = [np.sqrt(np.sum(x**2)) for x in NDFS_W] #find maximum # NDFS_result= np.argsort(NDFS_result)[-num_select:] # # start_time = time.clock() # UDFS_W = UDFS.udfs(x,**{'n_clusters':num_clusters}) # print('UDFS running time:',time.clock()-start_time) # UDFS_result = [np.sqrt(np.sum(x**2)) for x in UDFS_W] #find minimum ?????????????????????? # UDFS_result= np.argsort(UDFS_result)[:num_select] # prop_x = x[:,list(stepwise)] rf_x = x[:,list(rf_result)] rank_x = x[:,list(rank_result)] l1_x = x[:,list(l1)] l2_x = x[:,list(l2)] lmax_x = x[:,list(lmax)] lap_score_x = x[:,list(lap_score_result)] SPEC_x = x[:,list(SPEC_result)] MCFS_x = x[:,list(MCFS_result)] # NDFS_x = x[:,list(NDFS_result)] # UDFS_x = x[:,list(UDFS_result)] # '''[KNN purity NMI dgm0 dgm1], each one is a matrix''' # methods = ['rf','rank','lap_score','SPEC','MCFS'] # for method in methods: # if method=='rf': # selected_feature = list(rf_result).reverse() # elif method=='rank': # selected_feature = list(rank_result).reverse() # elif method=='lap_score': # selected_feature = list(lap_score_result) # elif method=='SPEC': # selected_feature = list(SPEC_result) # else: # selected_feature = list(MCFS_result).reverse() # # if num_select<=50: # the dimension # start_dim = 5; step = 2 # else: # start_dim = 10; step = 5 print('KNN accuracy') print('rf', ef.knn_accuracy(x,y,rf_result)) print('rank', ef.knn_accuracy(x,y,rank_result)) print('l1', ef.knn_accuracy(x,y,l1)) print('l2', ef.knn_accuracy(x,y,l2)) print('lmax', ef.knn_accuracy(x,y,lmax)) print('lap_score', ef.knn_accuracy(x,y,lap_score_result)) print('SPEC', ef.knn_accuracy(x,y,SPEC_result)) print('MCFS',ef.knn_accuracy(x,y,MCFS_result)) # print('NDFS',ef.knn_accuracy(x_test,y_test,NDFS_result)) # print('UDFS',ef.knn_accuracy(x_test,y_test,UDFS_result),'\n') # print('connectivity') # print('rf', ef.connectivity(x,rf_x,pctg, two_sided)) # print('rank', ef.connectivity(x,rank_x,pctg, two_sided)) # print('lap_score', ef.connectivity(x,lap_score_x,pctg, two_sided)) # print('SPEC', ef.connectivity(x,SPEC_x,pctg, two_sided)) # print('cut-SPEC', ef.connectivity(x,CSPEC_x,pctg, two_sided)) # print('MCFS',ef.connectivity(x,MCFS_x,pctg, two_sided)) # print('NDFS',ef.connectivity(x,NDFS_x,pctg, two_sided)) # print('UDFS',ef.connectivity(x,UDFS_x,pctg, two_sided),'\n') print('purity score | NMI') print('origin', ef.purity_score(x,y)) print('rf', ef.purity_score(rf_x,y)) print('rank', ef.purity_score(rank_x,y)) print('lap_score', ef.purity_score(lap_score_x,y)) print('SPEC', ef.purity_score(SPEC_x,y) ) print('MCFS', ef.purity_score(MCFS_x,y)) dgm = ef.compute_dgm(x, t, dim, thresh) dgm_rf = ef.compute_dgm(rf_x, t, dim, thresh) dgm_rank = ef.compute_dgm(rank_x, t, dim, thresh) dgm_l1 = ef.compute_dgm(l1_x, t, dim, thresh) dgm_l2 = ef.compute_dgm(l2_x, t, dim, thresh) dgm_lmax = ef.compute_dgm(lmax_x, t, dim, thresh) dgm_lap_score = ef.compute_dgm(lap_score_x, t, dim, thresh) dgm_SPEC = ef.compute_dgm(SPEC_x, t, dim, thresh) dgm_MCFS = ef.compute_dgm(MCFS_x, t, dim, thresh) # plt.figure() # plt.plot(dgm[:,-2:], 'ro') # plt.figure() # plt.plot(dgm_rf[:,-2:], 'ro') # plt.figure() # plt.plot(dgm_rank[:,-2:], 'ro') # plt.figure() # plt.plot(dgm_SPEC[:,-2:], 'ro') # plt.figure() # plt.plot(dgm_MCFS[:,-2:], 'ro') print('dgm distance') print('rf', ef.dgm_distance(dgm,dgm_rf,'W', dim),' ',ef.dgm_distance(dgm,dgm_rf,'B', dim)) print('rank', ef.dgm_distance(dgm,dgm_rank,'W', dim),' ',ef.dgm_distance(dgm,dgm_rank,'B', dim)) print('l1', ef.dgm_distance(dgm,dgm_l1,'W', dim),' ',ef.dgm_distance(dgm,dgm_l1,'B', dim)) print('l2', ef.dgm_distance(dgm,dgm_l2,'W', dim),' ',ef.dgm_distance(dgm,dgm_l2,'B', dim)) print('lmax', ef.dgm_distance(dgm,dgm_lmax,'W', dim),' ',ef.dgm_distance(dgm,dgm_lmax,'B', dim)) print('lap_score', ef.dgm_distance(dgm,dgm_lap_score,'W', dim),' ',ef.dgm_distance(dgm,dgm_lap_score,'B', dim)) print('SPEC', ef.dgm_distance(dgm,dgm_SPEC,'W', dim),' ',ef.dgm_distance(dgm,dgm_SPEC,'B', dim)) print('MCFS', ef.dgm_distance(dgm,dgm_MCFS,'W', dim),' ',ef.dgm_distance(dgm,dgm_MCFS,'B', dim))
def generate_result_dist(dataset, x,y,num_select, zero_mean=False, N=1000, t=0.6, thresh=0.1): if zero_mean == False: x = normalize(x,axis=0) else: x = standardize_feature(x) n,d = x.shape if num_select==300: start_dim = 20; step = 20 elif num_select==200: # the dimension start_dim = 20; step = 10 elif num_select==100: start_dim = 10; step = 10 elif num_select==50: start_dim = 10; step = 5 elif num_select == 20: start_dim = 4; step = 2 else: start_dim = 5; step = 1 dimension_list = list(range(start_dim,num_select+1,step)) ######### rank: parameter preserve_pctg, num_use ######### D0 = compute_dist(x) preserve_pctg_list = [0.2,0.4,0.6,0.8,1] #dimension 0 num_use_list = [0.1,0.2,0.3,0.4,0.5] #dimension 1 rank_result = np.zeros([len(preserve_pctg_list),len(num_use_list),7,len(dimension_list)]) rank_result_l1 = np.zeros([len(preserve_pctg_list),len(num_use_list),7,len(dimension_list)]) rank_result_l2 = np.zeros([len(preserve_pctg_list),len(num_use_list),7,len(dimension_list)]) rank_result_lmax = np.zeros([len(preserve_pctg_list),len(num_use_list),7,len(dimension_list)]) for i,preserve_pctg in enumerate(preserve_pctg_list): for j,num_use in enumerate(num_use_list): print(i,j) rank_selected, rank_selected_l1, rank_selected_l2, rank_selected_lmax= ranking_selection(x, num_select, N=N, num_use=int(num_use*d+1),sample_pctg=1, preserve_pctg=preserve_pctg) rank_selected = list(rank_selected)[::-1] for k,dimension in enumerate(dimension_list): #performance using different number fo features s = rank_selected[:dimension] rank_x = x[:,s] D_rank = compute_dist(rank_x) rank_result[i,j,0,k] = ef.dif_dist(D0,D_rank,'l1') rank_result[i,j,1,k] = ef.dif_dist(D0,D_rank,'l2') rank_result[i,j,2,k] = ef.dif_dist(D0,D_rank,'lmax') s_l1 = rank_selected_l1[:dimension] rank_l1_x = x[:,s_l1] D1 = compute_dist(rank_l1_x) rank_result_l1[i,j,0,k] = ef.dif_dist(D0,D1,'l1') rank_result_l1[i,j,1,k] = ef.dif_dist(D0,D1,'l2') rank_result_l1[i,j,2,k] = ef.dif_dist(D0,D1,'lmax') s_l2 = rank_selected_l2[:dimension] rank_l2_x = x[:,s_l2] D2 = compute_dist(rank_l2_x) rank_result_l2[i,j,0,k] = ef.dif_dist(D0,D2,'l1') rank_result_l2[i,j,1,k] = ef.dif_dist(D0,D2,'l2') rank_result_l2[i,j,2,k] = ef.dif_dist(D0,D2,'lmax') s_lmax = rank_selected_lmax[:dimension] rank_lmax_x = x[:,s_lmax] D_max = compute_dist(rank_lmax_x) rank_result_lmax[i,j,0,k] = ef.dif_dist(D0,D_max,'l1') rank_result_lmax[i,j,1,k] = ef.dif_dist(D0,D_max,'l2') rank_result_lmax[i,j,2,k] = ef.dif_dist(D0,D_max,'lmax') np.save('./result/'+dataset+'/rank_dist',rank_result) np.save('./result/'+dataset+'/rank_l1_dist',rank_result_l1) np.save('./result/'+dataset+'/rank_l2_dist',rank_result_l2) np.save('./result/'+dataset+'/rank_lmax_dist',rank_result_lmax) ######## lap_score ########### lap_score_result = np.zeros([7,len(dimension_list)]) lap_score_selected = lap_score.lap_score(x) lap_score_selected = list(np.argsort(lap_score_selected)[:num_select]) #find minimum for k,dimension in enumerate(dimension_list): #performance using different number fo features s = lap_score_selected[:dimension] lap_score_x = x[:,s] D1 = compute_dist(lap_score_x) lap_score_result[0,k] = ef.dif_dist(D0,D1,'l1') lap_score_result[1,k] = ef.dif_dist(D0,D1,'l2') lap_score_result[2,k] = ef.dif_dist(D0,D1,'lmax') np.save('./result/'+dataset+'/lap_score_dist',lap_score_result) ######## SPEC ########### SPEC_result = np.zeros([7,len(dimension_list)]) SPEC_selected = SPEC.spec(x) SPEC_selected = list(np.argsort(SPEC_selected)[:num_select]) #find minimum for k,dimension in enumerate(dimension_list): #performance using different number fo features s = SPEC_selected[:dimension] SPEC_x = x[:,s] D1 = compute_dist(SPEC_x) SPEC_result[0,k] = ef.dif_dist(D0,D1,'l1') SPEC_result[1,k] = ef.dif_dist(D0,D1,'l2') SPEC_result[2,k] = ef.dif_dist(D0,D1,'lmax') np.save('./result/'+dataset+'/SPEC_dist',SPEC_result) ####### MCFS parameter: num_clusters ############## num_clusters_list = [5,10,20,30] MCFS_result = np.zeros([len(num_clusters_list),7,len(dimension_list)]) for i,num_clusters in enumerate(num_clusters_list): MCFS_W = MCFS.mcfs(x,num_select,**{'n_clusters':num_clusters}) MCFS_selected = [np.max(np.abs(x)) for x in MCFS_W] #find maximum MCFS_selected= np.argsort(MCFS_selected)[-num_select:] MCFS_selected = list(MCFS_selected)[::-1] for k,dimension in enumerate(dimension_list): #performance using different number fo features s = MCFS_selected[:dimension] MCFS_x = x[:,s] D1 = compute_dist(MCFS_x) MCFS_result[i,0,k] = ef.dif_dist(D0,D1,'l1') MCFS_result[i,1,k] = ef.dif_dist(D0,D1,'l2') MCFS_result[i,2,k] = ef.dif_dist(D0,D1,'lmax') np.save('./result/'+dataset+'/MCFS_dist',MCFS_result) return rank_result, rank_result_l1, rank_result_l2,rank_result_lmax,lap_score_result, SPEC_result, MCFS_result
bigger = np.transpose(W) > W W = W - W.multiply(bigger) + np.transpose(W).multiply(bigger) print('Sparse Affinity Matrix:', W) ## Logging # with open('output.txt', 'a') as f: # print("W", file=f) # print(W, file=f) ##Euclidean laplacian result numTrainData = trainData.values kwargs_W = {"metric": "euclidean", "neighbour_mode": "knn"} W = construct_W.construct_W(numTrainData, **kwargs_W) ## Calculate Laplacian Score score = lap_score.lap_score(numTrainData, W=W) print('Laplacian Score:', score) ## Logging with open('output.txt', 'a') as f: print("Laplacian Score", file=f) print(score, file=f) # Laplacian HEOM result hardcoded """score = np.array( [np.nan, np.nan, np.nan, np.nan, 0.25866548, 0.25866548, np.nan, 0.25946108, np.nan, np.nan, np.nan, np.nan, 0.67265115, 0.73108302, np.nan, np.nan, np.nan, 0.86144223, np.nan, 0.6201575, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 0.8655987, 0.85803891, 0.87968564, 0.88995775, 0.87647355, 0.86576088, 0.87689691, 0.8832944, 0.8750145, 0.85803891, 0.87919727, 0.89337948, 0.668559, 1, 0.63601804, 0.64669977, 1, 0.87252428, 0.86959342, 0.83178639, 1, 0.78901017, 0.6930278, 0.81462815, 0.84261471, 0.84425971, 0.86648025, 0.6385317, np.nan, 0.57706172, 0.85893685, np.nan, 0.85893685, 0.63022226, np.nan, 0.56493291, 0.7190018,
x_train = X[train_idx] x_test = X[test_idx] y_train = to_onehot(map(lambda x: mods.index(lbl[x][0]), train_idx)) y_test = to_onehot(map(lambda x: mods.index(lbl[x][0]), test_idx)) # compute fisher scores x_train = np.append(x_train[:, 0, :], x_train[:, 1, :], axis=1) kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1 } W = construct_W.construct_W(x_train, **kwargs_W) score = lap_score.lap_score(x_train, W=W) idx = lap_score.feature_ranking(score) np.save('features/laplacian.npy', idx) print('Features saved') #idx = np.load('features/laplacian.npy', idx) x_train = x_train.transpose() x_train = np.split(x_train, 2) x_train = np.array(x_train).transpose((2, 0, 1)) # In[4]: in_shp = list(x_train.shape[1:]) print(x_train.shape, in_shp, snrs) classes = mods # create copies of the data x_train_copy = x_train
X_train, X_test = features[train_index], features[test_index] y_train, y_test = labels[train_index], labels[test_index] start_time = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')) acc = [] # lap_score method = 'lap_score' kwargs_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1 } W = construct_W.construct_W(X_train, **kwargs_W) score = lap_score.lap_score(X_train, W=W) idx = lap_score.feature_ranking(score) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # fisher_score score = fisher_score.fisher_score(X_train, y_train) idx = fisher_score.feature_ranking(score) selected_fea_train = X_train[:, idx[0:num_features]] selected_fea_test = X_test[:, idx[0:num_features]] clf.fit(selected_fea_train, y_train) acc.append(accuracy_score(y_test, clf.predict(selected_fea_test))) # reliefF
print "Data Preparation finished." timeStart = datetime.datetime.now() # feature selection if methodType == 0: # Laplacian Score kwrags_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, "t": 1 } W = construct_W(data, **kwrags_W) result = lap_score.lap_score(data, W=W) print result elif methodType == 1: # MCFS kwrags_W = { "metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, "t": 1 } W = construct_W(data, **kwrags_W) # 参数n_selected_features用于控制LARs算法解的稀疏性,也就是result每一列中非零元素的个数 # 参数n_clusters用于控制LE降维的目标维数,也就是result的列数 result = MCFS.mcfs(data, n_selected_features=2, W=W, n_clusters=2) print result