Exemplo n.º 1
0
def Laplacian_score(diheds):
    import scipy.io
    import numpy
    import os
    #os.chdir('/home/anu/Downloads/scikit-feature-1.0.0')
    from skfeature.function.similarity_based import lap_score
    from skfeature.utility import construct_W
    from numpy import mean
    kwargs_W = {
        "metric": "euclidean",
        "neighbor_mode": "knn",
        "weight_mode": "heat_kernel",
        "k": 5,
        't': 1
    }
    idx = []
    #change the path for every system to be run.
    #os.chdir('/home/anu/Downloads/traj_benz_trypsin/')
    for i in range(len(diheds)):
        X = diheds[i]
        W = construct_W.construct_W(X, **kwargs_W)
        score = lap_score.lap_score(X, W=W)
        idx.append(score)
    col_mean = mean(idx, axis=0)
    imp_features = numpy.argsort(col_mean)
    return col_mean, imp_features
Exemplo n.º 2
0
    def utilize_selection_method(self, options):
        logging.info('     Unsupervised Feature Selection : Start')
        self.parse_options(options)
        normalize_feature = SupervisedFs.normalize_feature(self.data_feature)
        feature_amount = len(self.data_feature[0])
        selection_result = {}

        if self.options['v'] == 1:
            widget = [
                'Calculating Variance             : ',
                pb.Percentage(), ' ',
                pb.Bar(marker=pb.RotatingMarker()), ' ',
                pb.ETA()
            ]
            timer = pb.ProgressBar(widgets=widget,
                                   maxval=feature_amount).start()
            variance = []
            for n in range(0, feature_amount):
                variance.append([np.var(normalize_feature[:, n]), n + 1])
                timer.update(n)
            timer.finish()
            selection_result['variance'] = sorted(variance, reverse=True)

        if self.options['l'] == 1:
            logging.info('   -----Calculating Laplacian score---- ')
            kwargs_w = {
                'metric': 'euclidean',
                'neighbor': 'knn',
                'weight_mode': 'heat_kernel',
                'k': 5,
                't': 1
            }
            W = construct_W.construct_W(self.data_feature, **kwargs_w)
            score = lap_score.lap_score(self.data_feature, W=W)
            lap = []
            for n in range(0, feature_amount):
                lap.append([score[n], n + 1])
            selection_result['laplacian'] = sorted(lap, reverse=False)
            logging.info('   -----Calculating Laplacian score---- ==> Done')

        if self.options['s'] == 1:
            logging.info('   -----Calculating Spectral score---- ')
            kwargs_w = {
                'metric': 'euclidean',
                'neighbor': 'knn',
                'weight_mode': 'heat_kernel',
                'k': 5,
                't': 1
            }
            W = construct_W.construct_W(self.data_feature, **kwargs_w)
            kwargs_s = {'style': 2, 'W': W}
            score = SPEC.spec(self.data_feature, **kwargs_s)
            spec = []
            for n in range(0, feature_amount):
                spec.append([score[n], n + 1])
            selection_result['spectral'] = sorted(spec, reverse=True)
            logging.info('   -----Calculating Spectral score---- ==> Done')
        return selection_result
Exemplo n.º 3
0
def selectFeatureLapScore(filename, num_feature, num_cluster):

    # Recupero del pickle salvato su disco con i sample e TUTTE le feature estratte da TSFresh. SU QUESTO LAVOREREMO NOI
    all_features_train = pd.read_pickle(
        "./pickle/feature_complete/TRAIN/{0}_TRAIN_FeatureComplete.pkl".format(
            filename))
    all_features_test = pd.read_pickle(
        "./pickle/feature_complete/TEST/{0}_TEST_FeatureComplete.pkl".format(
            filename))

    # Elimino colonne con valori NaN
    all_features_train = all_features_train.dropna(axis=1)
    all_features_test = all_features_test.dropna(axis=1)

    # Costruisco matrice W da dare a NDFS
    kwargs_W = {
        "metric": "euclidean",
        "neighbor_mode": "knn",
        "weight_mode": "heat_kernel",
        "k": 5,
        't': 1
    }
    W = construct_W.construct_W(all_features_train.values, **kwargs_W)

    # Esecuzione dell'algoritmo NDFS. Otteniamo il peso delle feature per cluster.
    featurePesate = lap_score.lap_score(all_features_train.values, W=W)

    # ordinamento delle feature in ordine discendente
    idx = lap_score.feature_ranking(featurePesate)

    idxSelected = idx[0:
                      num_feature]  # seleziono il numero di feature che voglio

    # Estraggo i nomi delle feature che ho scelto
    nomiFeatureSelezionate = []

    for i in idxSelected:
        nomiFeatureSelezionate.append(all_features_train.columns[i])

    # Creo il dataframe con solo le feature che ho selezionato
    dataframeFeatureSelezionate = all_features_train.loc[:,
                                                         nomiFeatureSelezionate]

    # Aggiusto anche il dataset di test con solo le feature scelte
    all_features_test = all_features_test.loc[:, nomiFeatureSelezionate]

    # Estraggo le classi conosciute
    labelConosciute = estrattoreClassiConosciute.estraiLabelConosciute(
        "./UCRArchive_2018/{0}/{0}_TEST.tsv".format(filename))

    # K-means su feature selezionate
    print("\nRisultati con feature selezionate da noi con Lap Score")
    print("Numero feature: {0}".format(all_features_test.shape[1]))
    testFeatureSelection(X_selected=dataframeFeatureSelezionate.values,
                         X_test=all_features_test.values,
                         num_clusters=num_cluster,
                         y=labelConosciute)
Exemplo n.º 4
0
def lap():
    before = datetime.datetime.now()
    result = lap_score.lap_score(data.copy(), labels.copy(), mode="index")  # prepisuje vstup, preto ho kopirujem
    after = datetime.datetime.now()
    print("Laplacian")
    result = result[:treshold]
    print(len(result))
    print("cas: " + str(after - before))
    print('\n')
    if len(result) < len(header):
        transform_and_save(result, "Laplacian")
Exemplo n.º 5
0
def calc_lap_score(data):
    kwargs_W = {
        "metric": "euclidean",
        "neighbor_mode": "knn",
        "weight_mode": "heat_kernel",
        "k": 5,
        't': 1
    }
    W = construct_W.construct_W(data, **kwargs_W)

    return lap_score.lap_score(data, W=W)
def SKF_lap(X, y):
    # construct affinity matrix
    kwargs_W = {
        "metric": "euclidean",
        "neighbor_mode": "knn",
        "weight_mode": "heat_kernel",
        "k": 5,
        't': 1
    }
    W = construct_W(X, **kwargs_W)
    # obtain the scores of features
    score = lap_score.lap_score(X, W=W)
    return lap_score.feature_ranking(score)
def get_lap_score(data, k=5, t=1,top_feature = 30):
    kwargs_W = {"metric":"euclidean","neighbor_mode":"knn","weight_mode":"heat_kernel","k":k,'t':t}
    W = construct_W.construct_W(data, **kwargs_W)
    score = lap_score.lap_score(data, W=W)
    #print(score)
    ranking = lap_score.feature_ranking(score)
    #print(idx)
    
    dfscores = pd.DataFrame(score)
    dfcolumns = pd.DataFrame(data.columns)
    #df_rank = pd.DataFrame(idx)
    
    featureScores = pd.concat([dfcolumns,dfscores],axis=1)
    featureScores.columns = ['Feature','Score']  #naming the dataframe columns
    #print(featureScores.nlargest(k,'Score'))  #print 20 best features
    result = featureScores.nlargest(top_feature,'Score')
    
    return result, ranking
Exemplo n.º 8
0
def laplacian_score(X, y=None, **kwargs):
    # construct affinity matrix
    kwargs_W = {
        "metric": "euclidean",
        "neighbor_mode": "knn",
        "weight_mode": "heat_kernel",
        "k": 5,
        't': 1
    }
    W = construct_W.construct_W(X, **kwargs_W)

    # obtain the scores of features
    score = lap_score.lap_score(X, W=W)

    # sort the feature scores in an ascending order according to the feature scores
    idx = lap_score.feature_ranking(score)

    return idx
Exemplo n.º 9
0
def main():
    # load data
    mat = scipy.io.loadmat('../data/COIL20.mat')
    X = mat['X']  # data
    X = X.astype(float)
    y = mat['Y']  # label
    y = y[:, 0]

    # construct affinity matrix
    kwargs_W = {
        "metric": "euclidean",
        "neighbor_mode": "knn",
        "weight_mode": "heat_kernel",
        "k": 5,
        't': 1
    }
    W = construct_w.construct_w(X, **kwargs_W)

    # obtain the scores of features
    score = lap_score.lap_score(X, W=W)

    # sort the feature scores in an ascending order according to the feature scores
    idx = lap_score.feature_ranking(score)

    # perform evaluation on clustering task
    num_fea = 100  # number of selected features
    num_cluster = 20  # number of clusters, it is usually set as the number of classes in the ground truth

    # obtain the dataset on the selected features
    selected_features = X[:, idx[0:num_fea]]

    # perform kmeans clustering based on the selected features and repeats 20 times
    nmi_total = 0
    acc_total = 0
    for i in range(0, 20):
        nmi, acc = unsupervised_evaluation.evaluation(
            X_selected=selected_features, n_clusters=num_cluster, y=y)
        nmi_total += nmi
        acc_total += acc

    # output the average NMI and average ACC
    print('NMI:', old_div(float(nmi_total), 20))
    print('ACC:', old_div(float(acc_total), 20))
Exemplo n.º 10
0
 def lap_score_filtering(self, vt_data, num_features):
     vt_numpy = vt_data.to_numpy()
     # construct affinity matrix
     kwargs_W = {
         "metric": "cosine",
         "neighbor_mode": "knn",
         "weight_mode": "cosine",
         "k": 40,
         't': 500
     }
     print(
         "We perform Laplacian score filtering using the following parameters: "
         + str(kwargs_W))
     W = construct_W.construct_W(vt_numpy, **kwargs_W)
     score = lap_score.lap_score(vt_numpy, W=W)
     idx = lap_score.feature_ranking(score)  # rank features
     filtered_data = vt_data.iloc[:, idx[0:num_features]].copy()
     print("\nThe data now has " + str(len(filtered_data.T)) +
           " features after Laplacian score filtering.")
     return filtered_data
Exemplo n.º 11
0
 def plot_ls_after_vt_filtering(self, threshold):
     data = self.test_reddy_dataset.expression_data.copy()
     vt_data = self.variance_threshold_selector(data, threshold)
     # perform ls filtering
     vt_numpy = vt_data.to_numpy()
     # construct affinity matrix
     kwargs_W = {
         "metric": "cosine",
         "neighbor_mode": "knn",
         "weight_mode": "cosine",
         "k": 40,
         't': 500
     }
     print(
         "We plot the Laplacian scores of the features using the following affinity matrix parameters: "
         + str(kwargs_W))
     W = construct_W.construct_W(vt_numpy, **kwargs_W)
     # compute lap score of each remaining features
     score = lap_score.lap_score(vt_numpy, W=W)
     self.plot_lap_scores(score)
Exemplo n.º 12
0
def main():
    # load data
    mat = scipy.io.loadmat("../data/COIL20.mat")
    X = mat["X"]  # data
    X = X.astype(float)
    y = mat["Y"]  # label
    y = y[:, 0]

    # construct affinity matrix
    kwargs_W = {"metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, "t": 1}
    W = construct_W.construct_W(X, **kwargs_W)

    # obtain the scores of features
    score = lap_score.lap_score(X, W=W)

    # sort the feature scores in an ascending order according to the feature scores
    idx = lap_score.feature_ranking(score)

    # perform evaluation on clustering task
    num_fea = 100  # number of selected features
    num_cluster = 20  # number of clusters, it is usually set as the number of classes in the ground truth

    # obtain the dataset on the selected features
    selected_features = X[:, idx[0:num_fea]]

    # perform kmeans clustering based on the selected features and repeats 20 times
    nmi_total = 0
    acc_total = 0
    for i in range(0, 20):
        nmi, acc = unsupervised_evaluation.evaluation(X_selected=selected_features, n_clusters=num_cluster, y=y)
        nmi_total += nmi
        acc_total += acc

    # output the average NMI and average ACC
    print "NMI:", float(nmi_total) / 20
    print "ACC:", float(acc_total) / 20
Exemplo n.º 13
0
    def predict(self, X):
        """
        :param X: shape [n_row*n_clm, n_band]
        :return:
        """
        # n_row, n_column, __n_band = X.shape
        # XX = X.reshape((n_row * n_column, -1))  # n_sample * n_band
        XX = X

        kwargs_W = {"metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1}
        W = construct_W.construct_W(XX, **kwargs_W)

        # obtain the scores of features
        score = lap_score.lap_score(X, W=W)

        # sort the feature scores in an ascending order according to the feature scores
        idx = lap_score.feature_ranking(score)

        # obtain the dataset on the selected features
        selected_features = X[:, idx[0:self.n_band]]

        # selected_features.reshape((self.n_band, n_row, n_column))
        # selected_features = np.transpose(selected_features, axes=(1, 2, 0))
        return selected_features
Exemplo n.º 14
0
    print('fs')

    ########################### Apply Feature Selection methods :ReliefF, Laplacian score & Fisher
    #ReliefF
    score_rel = reliefF.reliefF(X_train, y_train)
    idx_rel = reliefF.feature_ranking(score_rel)
    #Laplacian score
    kwargs_W = {
        "metric": "euclidean",
        "neighbor_mode": "knn",
        "k": 7,
        't': 1,
        'reliefF': True
    }
    W = construct_W.construct_W(X_train, **kwargs_W)
    score_lap = lap_score.lap_score(X_train, W=W)
    idx_lap = lap_score.feature_ranking(score_lap)
    #Fisher
    score_fish = fisher_score.fisher_score(X_train, y_train)
    print(score_fish)
    idx_fish = fisher_score.feature_ranking(score_fish)
    ###################################### Feature Integration
    idxM = idx_rel[:threshold]
    idxN = idx_lap[:threshold]
    idxO = idx_fish[:threshold]

    if combination_method == 1:
        #AND
        idx_and = reduce(np.intersect1d, (idxO, idxM, idxN))
        idx = idx_and
        print("number of selectes features (bins) = ", idx.shape[0])
def lapscore_main():

    # iterate the whole process for 10 times
    for index, subsample in enumerate(X_testset):

        # construct affinity matrix
        kwargs_W = {"metric": "euclidean", "neighbor_mode": "knn",
                    "weight_mode": "heat_kernel", "k": 5, 't': 1}
        W = construct_W.construct_W(subsample, **kwargs_W)

        # obtain the scores of  features
        idx = lap_score.lap_score(subsample, mode="rank", W=W)
        # obtian the array of variables through ranking
        X_col_list = X_test_full.columns.values.tolist()
        prepare_list['lap_ranked_Xtestset' +
                     str(index)] = get_variable_rank(idx, X_col_list)
        ranked_var_filename = 'lap_ranked_Xtestset' + str(index) + '.txt'
        f_rank = open(ranked_var_filename, 'w')
        f_rank.write(str(prepare_list['lap_ranked_Xtestset' + str(index)]))
        f_rank.close()

        # perform evaluation on clustering task
        range_num_fea = range(10, 210, 10)  # number of selected features
        range_n_clusters = [3, 4, 5, 6, 7, 8, 9, 10]  # number of clusters

        # dynamic generating dictionaries to store results
        prepare_list['lapscore_criteria' +
                     str(index)] = {'silhouette_score': [], 'ch_score': [], 'db_score': []}

        # deciding optimal value for num_cluster and the optimal number of selected features

        for n_cluster in range_n_clusters:

            for num_features in range_num_fea:
                # obtain the dataset on the selected features
                selected_features = subsample[:, idx[0:num_features]]

                # initialize the clusterer with n_clusters value and a random generator
                # seed of 10 for reproducbility
                clusterer = KMeans(
                    n_clusters=n_cluster, random_state=10)
                cluster_labels = clusterer.fit_predict(selected_features)

                # the silhouette_score gives the average value for all the samples
                # this gives a perspective into the density and separation of the formed clusters
                silhouette_avg = metrics.silhouette_score(
                    selected_features, cluster_labels, metric='euclidean')
                # write the content into the dict
                prepare_list['lapscore_criteria' +
                             str(index)]['silhouette_score'].append(silhouette_avg)
                # in normal usage, the Calinski-Harabasz index is applied to the results of a cluster analysis
                ch_idx = metrics.calinski_harabasz_score(
                    selected_features, cluster_labels)
                # write the content into the dict
                prepare_list['lapscore_criteria' + str(index)
                             ]['ch_score'].append(ch_idx)
                # in normal usage, the Davies-Bouldin index is applied to the results of a cluster analysis
                db_idx = davies_bouldin_score(
                    selected_features, cluster_labels)
                # write the content into the dict
                prepare_list['lapscore_criteria' +
                             str(index)]['db_score'].append(db_idx)

                print("subset No.", index, ","
                      "For n_clusters =", n_cluster, ","
                      "For num_features =", num_features, ","
                      "the average silhouette_score is: ", silhouette_avg, ","
                      "the Calinski-Harabasz index is: ", ch_idx, ","
                      "the Davies-Bouldin index is: ", db_idx)

    lapscore_silhouette_score = generate_criteria_tb(
        dict_name='lapscore_criteria', col_name='silhouette_score')
    lapscore_Calinski_Harabasz_index = generate_criteria_tb(
        dict_name='lapscore_criteria', col_name='ch_score')
    lapscore_Davies_Bouldin_index = generate_criteria_tb(
        dict_name='lapscore_criteria', col_name='db_score')

    lapscore_silhouette_score.to_csv(
        'lapscore_silhouette_score.csv', index=False)
    lapscore_Calinski_Harabasz_index.to_csv(
        'lapscore_Calinski_Harabasz_index.csv', index=False)
    lapscore_Davies_Bouldin_index.to_csv(
        'lapscore_Davies_Bouldin_index.csv', index=False)
Exemplo n.º 16
0
    def bench(self, X, X_norm, y, n=2):
        num_feats = 20
        output_data = {'method': list(), 'features': list(), 'time': list(), self.test_att: list(), 'supervised': list()}

        # ----------------------------------------------------------------
        # CFS
        # start = time.perf_counter()
        # idx = cfs(X_norm.to_numpy(), y.to_numpy())[0]
        # print(idx)
        # selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist()
        # output_data['method'].append('CFS')
        # output_data['time'].append(time.perf_counter() - start)
        # output_data['features'].append(selected_features)
        # output_data[self.test_att].append(self.train_real_data(selected_features, X))

        # LA: Laplacian Score
        start = time.perf_counter()
        kwargs_W = {"metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1}
        W = construct_W.construct_W(X_norm.to_numpy(), **kwargs_W)
        score = lap_score.lap_score(X_norm.to_numpy(), W=W)
        idx = lap_score.feature_ranking(score)
        selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist()
        output_data['method'].append('Laplacian Score')
        output_data['time'].append(time.perf_counter() - start)
        output_data['features'].append(selected_features)
        output_data['supervised'].append(False)
        output_data[self.test_att].append(self.train_real_data(selected_features, X))
        print(output_data)

        # FCBF: Feature correlation based filter
        # start = time.perf_counter()
        # idx = fcbf(X_norm.to_numpy(), y.to_numpy(), n_selected_features=num_feats)[0]
        # selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist()
        # output_data['method'].append('FCBF')
        # output_data['time'].append(time.perf_counter() - start)
        # output_data['features'].append(selected_features)
        # output_data['supervised'].append(True)
        # output_data[self.test_att].append(self.train_real_data(selected_features, X))
        # print(output_data)
        # output_data['method'].append('FCBF')
        # output_data['time'].append(9999999)
        # output_data['features'].append([])
        # output_data['supervised'].append(True)
        # output_data[self.test_att].append(0.0)

        # UDFS: Unsupervised Discriminative Feature Selection
        start = time.perf_counter()
        Weight = udfs(X_norm.to_numpy(), gamma=0.1, n_clusters=n)
        idx = feature_ranking(Weight)
        selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist()
        output_data['method'].append('UDFS')
        output_data['time'].append(time.perf_counter() - start)
        output_data['features'].append(selected_features)
        output_data['supervised'].append(False)
        output_data[self.test_att].append(self.train_real_data(selected_features, X))
        print(output_data)

        # SPEC: Spectral Feature Selection
        start = time.perf_counter()
        score = spec(X_norm.to_numpy())
        idx = feature_ranking_spec(score)
        selected_features = X_norm.iloc[:, idx[0: num_feats]].columns.tolist()
        output_data['method'].append('SPEC')
        output_data['time'].append(time.perf_counter() - start)
        output_data['features'].append(selected_features)
        output_data['supervised'].append(False)
        output_data[self.test_att].append(self.train_real_data(selected_features, X))
        print(output_data)

        # Mrmr: minimum redundency maximum relevance
        start = time.perf_counter()
        mrmr = pymrmr.mRMR(X_norm, 'MIQ', num_feats)
        output_data['method'].append('MRMR(MIQ)')
        output_data['time'].append(time.perf_counter() - start)
        output_data['features'].append(mrmr)
        output_data['supervised'].append(False)
        output_data[self.test_att].append(self.train_real_data(mrmr, X))
        print(output_data)

        # Mrmr: minimum redundency maximum relevance
        start = time.perf_counter()
        mrmr = pymrmr.mRMR(X_norm, 'MID', num_feats)
        output_data['method'].append('MRMR(MID)')
        output_data['time'].append(time.perf_counter() - start)
        output_data['features'].append(mrmr)
        output_data['supervised'].append(False)
        output_data[self.test_att].append(self.train_real_data(mrmr, X))
        print(output_data)

        # recursive feature elimination(RFE):

        from sklearn.feature_selection import RFE
        from sklearn.linear_model import LogisticRegression
        rfe_selector = RFE(estimator=LogisticRegression(), n_features_to_select=num_feats, step=10, verbose=5)
        start = time.perf_counter()
        rfe_selector.fit(X_norm, y)
        rfe_support = rfe_selector.get_support()
        rfe_feature = X_norm.loc[:, rfe_support].columns.tolist()
        output_data['method'].append('RFE')
        output_data['time'].append(time.perf_counter() - start)
        output_data['features'].append(rfe_feature)
        output_data['supervised'].append(True)
        output_data[self.test_att].append(self.train_real_data(rfe_feature, X))
        print(output_data)

        # ----------------------------------------------------------------
        # Lasso: SelectFromModel:

        from sklearn.feature_selection import SelectFromModel
        from sklearn.linear_model import LogisticRegression

        embeded_lr_selector = SelectFromModel(LogisticRegression(penalty="l1"), max_features=num_feats)
        start = time.perf_counter()
        embeded_lr_selector.fit(X_norm, y)

        embeded_lr_support = embeded_lr_selector.get_support()
        embeded_lr_feature = X_norm.loc[:, embeded_lr_support].columns.tolist()
        output_data['method'].append('Lasso')
        output_data['time'].append(time.perf_counter() - start)
        output_data['features'].append(embeded_lr_feature)
        output_data['supervised'].append(True)
        output_data[self.test_att].append(self.train_real_data(embeded_lr_feature, X))
        print(output_data)
        print(str(len(embeded_lr_feature)), 'selected features')

        # -----------------------------------------------------------------------------
        # Tree - based: SelectFromModel:

        from sklearn.feature_selection import SelectFromModel
        from sklearn.ensemble import RandomForestClassifier

        embeded_rf_selector = SelectFromModel(RandomForestClassifier(n_estimators=100), max_features=num_feats)
        start = time.perf_counter()
        embeded_rf_selector.fit(X_norm, y)

        embeded_rf_support = embeded_rf_selector.get_support()
        embeded_rf_feature = X_norm.loc[:, embeded_rf_support].columns.tolist()
        output_data['method'].append('Tree_Based_RF')
        output_data['time'].append(time.perf_counter() - start)
        output_data['features'].append(embeded_rf_feature)
        output_data['supervised'].append(True)
        output_data[self.test_att].append(self.train_real_data(embeded_rf_feature, X))
        print(output_data)
        print(str(len(embeded_rf_feature)), 'selected features')

        # -------------------------------------------------------------------------------
        # also tree based:

        from sklearn.feature_selection import SelectFromModel
        from lightgbm import LGBMClassifier

        lgbc = LGBMClassifier(n_estimators=500, learning_rate=0.05, num_leaves=32, colsample_bytree=0.2,
                              reg_alpha=3, reg_lambda=1, min_split_gain=0.01, min_child_weight=40)

        embeded_lgb_selector = SelectFromModel(lgbc, max_features=num_feats)
        start = time.perf_counter()
        embeded_lgb_selector.fit(X_norm, y)

        embeded_lgb_support = embeded_lgb_selector.get_support()
        embeded_lgb_feature = X_norm.loc[:, embeded_lgb_support].columns.tolist()
        output_data['method'].append('Tree_Based_lightGBM')
        output_data['time'].append(time.perf_counter() - start)
        output_data['supervised'].append(True)
        output_data['features'].append(embeded_lgb_feature)
        output_data[self.test_att].append(self.train_real_data(embeded_lgb_feature, X))
        print(output_data)
        print(str(len(embeded_lgb_feature)), 'selected features')

        return output_data
Exemplo n.º 17
0
def Scoreseries():

    # Score like algorithm
    # init
    n = 120
    test_alpha = 0.325

    f_features = SelectKBest(f_classif, k=n).fit_transform(X_transed, y)
    mi_features = SelectKBest(mutual_info_classif,
                              k=n).fit_transform(X_transed, y)

    lap_featureindex = lap_score.lap_score(X_transed, y)
    lap_features = X_transed[:, lap_featureindex[0:n]]

    fdr_features = SelectFdr(alpha=0.335).fit_transform(X_transed, y)
    print("fdr_features shape:", fdr_features.shape)
    fpr_features = SelectFpr(alpha=0.33).fit_transform(X_transed, y)
    print("fpr_features shape:", fpr_features.shape)
    fwe_features = SelectFwe(alpha=test_alpha).fit_transform(X_transed, y)
    print("fwe_features shape:", fwe_features.shape)

    baseresult = cross_val_score(cls, X, y, cv=5, scoring='accuracy')

    #     chi2result = cross_val_score(cls, chi2_features, y, cv = 5, scoring = 'accuracy' )
    #     print(baseresult,sum(baseresult)/5)
    #     print(chi2result,sum(chi2result)/5)

    print("f")
    fresult = cross_val_score(cls, f_features, y, cv=5, scoring='accuracy')
    print(baseresult, sum(baseresult) / 5)
    print(fresult, sum(fresult) / 5)
    print("mutual information")
    miresult = cross_val_score(cls, mi_features, y, cv=5, scoring='accuracy')
    print(baseresult, sum(baseresult) / 5)
    print(miresult, sum(miresult) / 5)
    print("lap score")
    lapresult = cross_val_score(cls, lap_features, y, cv=5, scoring='accuracy')
    print(baseresult, sum(baseresult) / 5)
    print(lapresult, sum(lapresult) / 5)
    print("fdr")
    if fdr_features.shape[1] > 0:
        fdrresult = cross_val_score(cls,
                                    fdr_features,
                                    y,
                                    cv=5,
                                    scoring='accuracy')
        print(baseresult, sum(baseresult) / 5)
        print(fdrresult, sum(fdrresult) / 5)
        print("fpr")
    if fpr_features.shape[1] > 0:
        fprresult = cross_val_score(cls,
                                    fpr_features,
                                    y,
                                    cv=5,
                                    scoring='accuracy')
        print(baseresult, sum(baseresult) / 5)
        print(fprresult, sum(fprresult) / 5)
    if fwe_features.shape[1] > 0:
        print("fwe")
        fweresult = cross_val_score(cls,
                                    fwe_features,
                                    y,
                                    cv=5,
                                    scoring='accuracy')
        print(baseresult, sum(baseresult) / 5)
        print(fweresult, sum(fweresult) / 5)

    return
Exemplo n.º 18
0
def compare_methods(x,
                    y,
                    num_select,
                    pctg=0.1,
                    pack_size=1,
                    num_clusters=5,
                    two_sided=False):

    n, d = x.shape
    idx = np.random.permutation(n)
    x, y = x[idx], y[idx]

    #########  split train and test  #########
    X = x
    Y = y
    train_num = int(n * 0.7)
    test_num = n - int(n * 0.7)
    x = X[:train_num, :]
    y = Y[:train_num]
    x_test = X[-test_num:, :]
    y_test = Y[-test_num:]

    ###########  other methods  ######################
    '''    Similarity based: lap_score  SPEC          '''
    start_time = time.clock()
    lap_score_result = lap_score.lap_score(x)
    lap_score_result = np.argsort(lap_score_result)[:num_select]
    print('lap_score running time:', time.clock() - start_time)

    #    _,stepwise = backward_distance_selection(x,num_select,pctg,pack_size)   #pctg controls sensitivity to outliers

    start_time = time.clock()
    rf_result = random_selection(x,
                                 num_select,
                                 N=300,
                                 num_use=int(d / 2),
                                 pctg=pctg,
                                 two_sided=two_sided)
    print('rf running time:', time.clock() - start_time)

    start_time = time.clock()
    SPEC_result = SPEC.spec(x)
    print('SPEC running time:', time.clock() - start_time)
    SPEC_result = np.argsort(SPEC_result)[:num_select]  #find minimum

    start_time = time.clock()
    CSPEC_result = cut_spec(x, pctg=0.15)
    print('cut-SPEC running time:', time.clock() - start_time)
    CSPEC_result = np.argsort(CSPEC_result)[:num_select]  #find minimum
    '''sparse learning based'''
    start_time = time.clock()
    MCFS_W = MCFS.mcfs(x, num_select)
    print('MCFS running time:', time.clock() - start_time)
    MCFS_result = [np.max(np.abs(x)) for x in MCFS_W]  #find maximum
    MCFS_result = np.argsort(MCFS_result)[-num_select:]

    #    start_time = time.clock()
    #    NDFS_W = NDFS.ndfs(x,**{'n_clusters':num_clusters})
    #    print('NDFS running time:',time.clock()-start_time)
    #    NDFS_result = [np.sqrt(np.sum(x**2)) for x in NDFS_W]     #find maximum
    #    NDFS_result= np.argsort(NDFS_result)[-num_select:]
    #
    #    start_time = time.clock()
    #    UDFS_W = UDFS.udfs(x,**{'n_clusters':num_clusters})
    #    print('UDFS running time:',time.clock()-start_time)
    #    UDFS_result = [np.sqrt(np.sum(x**2)) for x in UDFS_W]     #find minimum ??????????????????????
    #    UDFS_result= np.argsort(UDFS_result)[:num_select]

    #    prop_x = x[:,list(stepwise)]
    rf_x = x[:, list(rf_result)]
    lap_score_x = x[:, list(lap_score_result)]
    SPEC_x = x[:, list(SPEC_result)]
    CSPEC_x = x[:, list(CSPEC_result)]
    MCFS_x = x[:, list(MCFS_result)]
    #    NDFS_x = x[:,list(NDFS_result)]
    #    UDFS_x = x[:,list(UDFS_result)]

    print('\n')
    print('Class Seperability')
    #    print('prop', ef.class_seperability(prop_x,y))
    print('rf', ef.class_seperability(rf_x, y))
    print('lap_score', ef.class_seperability(lap_score_x, y))
    print('SPEC', ef.class_seperability(SPEC_x, y))
    print('cut-SPEC', ef.class_seperability(CSPEC_x, y))
    print('MCFS', ef.class_seperability(MCFS_x, y))
    #    print('NDFS',ef.class_seperability(NDFS_x,y))
    #    print('UDFS',ef.class_seperability(UDFS_x,y))

    print('\n')
    print('KNN accuracy')
    #    print('prop', ef.knn_accuracy(prop_x,y))
    print('rf', ef.knn_accuracy(x_test, y_test, rf_result))
    print('lap_score', ef.knn_accuracy(x_test, y_test, lap_score_result))
    print('SPEC', ef.knn_accuracy(x_test, y_test, SPEC_result))
    print('cut-SPEC', ef.knn_accuracy(x_test, y_test, CSPEC_result))
    print('MCFS', ef.knn_accuracy(x_test, y_test, MCFS_result))
    #    print('NDFS',ef.knn_accuracy(x_test,y_test,NDFS_result))
    #    print('UDFS',ef.knn_accuracy(x_test,y_test,UDFS_result),'\n')

    print('\n')
    print('connectivity')
    #    print('prop', ef.knn_accuracy(prop_x,y))
    print('rf', ef.connectivity(x, rf_x, pctg, two_sided))
    print('lap_score', ef.connectivity(x, lap_score_x, pctg, two_sided))
    print('SPEC', ef.connectivity(x, SPEC_x, pctg, two_sided))
    print('cut-SPEC', ef.connectivity(x, CSPEC_x, pctg, two_sided))
    print('MCFS', ef.connectivity(x, MCFS_x, pctg, two_sided))
Exemplo n.º 19
0
# URL for the Pima Indians Diabetes dataset (UCI Machine Learning Repository)
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data"
# download the file

raw_data = urllib2.urlopen(url)
# load the CSV file as a numpy matrix
dataset = np.loadtxt(raw_data, delimiter=",")
X = dataset[:,:9]
y = dataset[:,8]


kwargs_W = {"metric":"euclidean","neighbor_mode":"knn","weight_mode":"heat_kernel","k":5,'t':1}
W = construct_W.construct_W(X, **kwargs_W)
from skfeature.function.similarity_based import lap_score
score = lap_score.lap_score(X, W=W)
print score
idx = lap_score.feature_ranking(score)

fig = plt.figure()
plt.plot(score, label='Laplacian Score')

plt.legend(loc='upper middle', shadow=True)
plt.show()
print idx
num_fea = 3

#selected_features = X[:, idx[0:num_fea]]
#print selected_features
#print selected_features
selected_features1 = X[:, 0:1]
Exemplo n.º 20
0

process = LinearCombination.kernel_GramSchmidtProcess(rbf_kernel)
#np.random.seed(42)
#data = np.random.random(size=[10,5])
#process.fit(data)
#print(process.kmatrix)
#print(process.basisweight)
Xg,yg = datasets.make_gaussian_quantiles(n_features=10,random_state=42)
print(yg.shape)
Xp,yp = datasets.make_multilabel_classification(n_features=10,random_state=42,n_classes= 1)
print(yp.shape)

Rf1 = relief.Relief()
print(Rf1.fit(Xg,yg).w_)
Rf2 = relief.ReliefF()
print(Rf2.fit(Xg,yg).w_)
Rf3 = relief.RReliefF()
print(Rf3.fit(Xg,yg).w_)
L_score = lap_score.lap_score(Xg)
print(L_score)
MI = feature_selection.mutual_info_classif(Xg,yg)
print(MI)

数学 = "aaa"
print(数学)

# a = 154476802108746166441951315019919837485664325669565431700026634898253202035277999
# b = 36875131794129999827197811565225474825492979968971970996283137471637224634055579
# c = 4373612677928697257861252602371390152816537558161613618621437993378423467772036
# print( (a/(b+c)) + (b/(a+c)) + (c/(a+b)) )
def lap_ours(train, test, K):
    scores = lap_score(train[0])
    indices = lap_score_ranking(scores)[:K]
    return train[0][:, indices], test[0][:, indices]
Exemplo n.º 22
0
def laplacian_score(data):
    W = construct_W(data)
    return lap_score(data, W=W)
def compare_methods(x,y,num_select,pctg=0.5,sample_pctg=1, num_clusters=5,zero_mean=False,dim=1,t=0.8,thresh=0.1):
    if zero_mean == False:
        x = normalize(x,axis=0)
    else:
        x = standardize_feature(x)
        
    n,d = x.shape
    
#    idx = np.random.permutation(n)
#    x,y = x[idx], y[idx]
#    
#    #########  split train and test  #########
#    X=x;Y=y
#    train_num = int(n*0.6)
#    test_num = n-int(n*0.6)
#    x=X[:train_num,:]; y=Y[:train_num]
#    x_test = X[-test_num:,:];y_test = Y[-test_num:]
    
    ###########  calculate  ######################

    start_time = time.clock()
    rf_result = random_selection(x, num_select, N=500, num_use=int(0.5*d),pctg=pctg, two_sided=False)
    print('rf running time:',time.clock()-start_time)

    start_time = time.clock()
    rank_result,l1,l2,lmax= ranking_selection(x, num_select, N=500, num_use=int(0.5*d),sample_pctg=1, preserve_pctg=pctg)
    print('rank running time:',time.clock()-start_time)
    
    start_time = time.clock()
    lap_score_result = lap_score.lap_score(x)
    lap_score_result= np.argsort(lap_score_result)[:num_select]    #find minimum
    print('lap_score running time:',time.clock()-start_time)
    
    start_time = time.clock()
    SPEC_result = SPEC.spec(x)
    print('SPEC running time:',time.clock()-start_time)
    SPEC_result= np.argsort(SPEC_result)[:num_select]     #find minimum
    
    '''sparse learning based'''
    start_time = time.clock()
    MCFS_W = MCFS.mcfs(x,num_select,**{'n_clusters':num_clusters})
    print('MCFS running time:',time.clock()-start_time)
    MCFS_result = [np.max(np.abs(x)) for x in MCFS_W]     #find maximum
    MCFS_result= np.argsort(MCFS_result)[-num_select:]

#    start_time = time.clock()
#    NDFS_W = NDFS.ndfs(x,**{'n_clusters':num_clusters})
#    print('NDFS running time:',time.clock()-start_time)
#    NDFS_result = [np.sqrt(np.sum(x**2)) for x in NDFS_W]     #find maximum
#    NDFS_result= np.argsort(NDFS_result)[-num_select:]
#
#    start_time = time.clock()
#    UDFS_W = UDFS.udfs(x,**{'n_clusters':num_clusters}) 
#    print('UDFS running time:',time.clock()-start_time)             
#    UDFS_result = [np.sqrt(np.sum(x**2)) for x in UDFS_W]     #find minimum ??????????????????????
#    UDFS_result= np.argsort(UDFS_result)[:num_select]
    
#    prop_x = x[:,list(stepwise)]
    rf_x = x[:,list(rf_result)]
    rank_x = x[:,list(rank_result)]
    l1_x = x[:,list(l1)]
    l2_x = x[:,list(l2)]
    lmax_x = x[:,list(lmax)]
    lap_score_x = x[:,list(lap_score_result)]
    SPEC_x = x[:,list(SPEC_result)]
    MCFS_x = x[:,list(MCFS_result)]
#    NDFS_x = x[:,list(NDFS_result)]
#    UDFS_x = x[:,list(UDFS_result)]
    
#    '''[KNN purity NMI dgm0 dgm1], each one is a matrix'''
#    methods = ['rf','rank','lap_score','SPEC','MCFS']
#    for method in methods:
#        if method=='rf':
#            selected_feature = list(rf_result).reverse()
#        elif method=='rank':
#            selected_feature = list(rank_result).reverse()
#        elif method=='lap_score':
#            selected_feature = list(lap_score_result)
#        elif method=='SPEC':
#            selected_feature = list(SPEC_result)
#        else:
#            selected_feature = list(MCFS_result).reverse()
#        
#        if num_select<=50:         # the dimension
#            start_dim = 5; step = 2
#        else:
#            start_dim = 10; step = 5
        
    print('KNN accuracy')
    print('rf', ef.knn_accuracy(x,y,rf_result))
    print('rank', ef.knn_accuracy(x,y,rank_result))
    print('l1', ef.knn_accuracy(x,y,l1))
    print('l2', ef.knn_accuracy(x,y,l2))
    print('lmax', ef.knn_accuracy(x,y,lmax))
    print('lap_score', ef.knn_accuracy(x,y,lap_score_result))
    print('SPEC', ef.knn_accuracy(x,y,SPEC_result))
    print('MCFS',ef.knn_accuracy(x,y,MCFS_result))
#    print('NDFS',ef.knn_accuracy(x_test,y_test,NDFS_result))
#    print('UDFS',ef.knn_accuracy(x_test,y_test,UDFS_result),'\n')  

#    print('connectivity')
#    print('rf', ef.connectivity(x,rf_x,pctg, two_sided))
#    print('rank', ef.connectivity(x,rank_x,pctg, two_sided))
#    print('lap_score', ef.connectivity(x,lap_score_x,pctg, two_sided))
#    print('SPEC', ef.connectivity(x,SPEC_x,pctg, two_sided))
#    print('cut-SPEC', ef.connectivity(x,CSPEC_x,pctg, two_sided))
#    print('MCFS',ef.connectivity(x,MCFS_x,pctg, two_sided))
    
#    print('NDFS',ef.connectivity(x,NDFS_x,pctg, two_sided))
#    print('UDFS',ef.connectivity(x,UDFS_x,pctg, two_sided),'\n')  

    print('purity score | NMI')
    print('origin', ef.purity_score(x,y))
    print('rf', ef.purity_score(rf_x,y))
    print('rank', ef.purity_score(rank_x,y))
    print('lap_score', ef.purity_score(lap_score_x,y))
    print('SPEC', ef.purity_score(SPEC_x,y)  )
    print('MCFS', ef.purity_score(MCFS_x,y))
   
    dgm = ef.compute_dgm(x, t, dim, thresh)
    dgm_rf = ef.compute_dgm(rf_x, t, dim, thresh)
    dgm_rank = ef.compute_dgm(rank_x, t, dim, thresh)
    dgm_l1 = ef.compute_dgm(l1_x, t, dim, thresh)
    dgm_l2 = ef.compute_dgm(l2_x, t, dim, thresh)
    dgm_lmax = ef.compute_dgm(lmax_x, t, dim, thresh)
    dgm_lap_score = ef.compute_dgm(lap_score_x, t, dim, thresh)
    dgm_SPEC = ef.compute_dgm(SPEC_x, t, dim, thresh)
    dgm_MCFS = ef.compute_dgm(MCFS_x, t, dim, thresh)
#    plt.figure()
#    plt.plot(dgm[:,-2:], 'ro')
#    plt.figure()
#    plt.plot(dgm_rf[:,-2:], 'ro')
#    plt.figure()
#    plt.plot(dgm_rank[:,-2:], 'ro')
#    plt.figure()
#    plt.plot(dgm_SPEC[:,-2:], 'ro')
#    plt.figure()
#    plt.plot(dgm_MCFS[:,-2:], 'ro')
    
    print('dgm distance')
    print('rf', ef.dgm_distance(dgm,dgm_rf,'W', dim),'  ',ef.dgm_distance(dgm,dgm_rf,'B', dim))
    print('rank', ef.dgm_distance(dgm,dgm_rank,'W', dim),'  ',ef.dgm_distance(dgm,dgm_rank,'B', dim))
    print('l1', ef.dgm_distance(dgm,dgm_l1,'W', dim),'  ',ef.dgm_distance(dgm,dgm_l1,'B', dim))
    print('l2', ef.dgm_distance(dgm,dgm_l2,'W', dim),'  ',ef.dgm_distance(dgm,dgm_l2,'B', dim))
    print('lmax', ef.dgm_distance(dgm,dgm_lmax,'W', dim),'  ',ef.dgm_distance(dgm,dgm_lmax,'B', dim))
    print('lap_score', ef.dgm_distance(dgm,dgm_lap_score,'W', dim),'  ',ef.dgm_distance(dgm,dgm_lap_score,'B', dim))
    print('SPEC', ef.dgm_distance(dgm,dgm_SPEC,'W', dim),'  ',ef.dgm_distance(dgm,dgm_SPEC,'B', dim))
    print('MCFS', ef.dgm_distance(dgm,dgm_MCFS,'W', dim),'  ',ef.dgm_distance(dgm,dgm_MCFS,'B', dim))
def generate_result_dist(dataset, x,y,num_select, zero_mean=False, N=1000, t=0.6, thresh=0.1):
    if zero_mean == False:
        x = normalize(x,axis=0)
    else:
        x = standardize_feature(x)
        
    n,d = x.shape
    
    if num_select==300:
        start_dim = 20; step = 20
    elif num_select==200:         # the dimension
        start_dim = 20; step = 10
    elif num_select==100:
        start_dim = 10; step = 10
    elif num_select==50:
        start_dim = 10; step = 5
    elif num_select == 20:
        start_dim = 4; step = 2
    else:
        start_dim = 5; step = 1
           
    dimension_list = list(range(start_dim,num_select+1,step))
    
    #########  rank: parameter  preserve_pctg, num_use  #########
    D0 = compute_dist(x)
    
    preserve_pctg_list = [0.2,0.4,0.6,0.8,1]   #dimension 0
    num_use_list = [0.1,0.2,0.3,0.4,0.5]    #dimension 1
        
    rank_result = np.zeros([len(preserve_pctg_list),len(num_use_list),7,len(dimension_list)])
    rank_result_l1 = np.zeros([len(preserve_pctg_list),len(num_use_list),7,len(dimension_list)])
    rank_result_l2 = np.zeros([len(preserve_pctg_list),len(num_use_list),7,len(dimension_list)])
    rank_result_lmax = np.zeros([len(preserve_pctg_list),len(num_use_list),7,len(dimension_list)])
    
    for i,preserve_pctg in enumerate(preserve_pctg_list):
        for j,num_use in enumerate(num_use_list):
            print(i,j)
            rank_selected, rank_selected_l1, rank_selected_l2, rank_selected_lmax= ranking_selection(x, num_select, N=N, num_use=int(num_use*d+1),sample_pctg=1, preserve_pctg=preserve_pctg)
            rank_selected = list(rank_selected)[::-1]

            for k,dimension in enumerate(dimension_list):      #performance using different number fo features
                s = rank_selected[:dimension]
                rank_x = x[:,s]
                D_rank = compute_dist(rank_x)
                rank_result[i,j,0,k] = ef.dif_dist(D0,D_rank,'l1')
                rank_result[i,j,1,k] = ef.dif_dist(D0,D_rank,'l2')
                rank_result[i,j,2,k] = ef.dif_dist(D0,D_rank,'lmax')
                
                s_l1 = rank_selected_l1[:dimension]
                rank_l1_x = x[:,s_l1]
                D1 = compute_dist(rank_l1_x)
                
                rank_result_l1[i,j,0,k] = ef.dif_dist(D0,D1,'l1')
                rank_result_l1[i,j,1,k] = ef.dif_dist(D0,D1,'l2')
                rank_result_l1[i,j,2,k] = ef.dif_dist(D0,D1,'lmax')               

                s_l2 = rank_selected_l2[:dimension]
                rank_l2_x = x[:,s_l2]
                D2 = compute_dist(rank_l2_x)
                
                rank_result_l2[i,j,0,k] = ef.dif_dist(D0,D2,'l1')
                rank_result_l2[i,j,1,k] = ef.dif_dist(D0,D2,'l2')
                rank_result_l2[i,j,2,k] = ef.dif_dist(D0,D2,'lmax')  
                
                s_lmax = rank_selected_lmax[:dimension]
                rank_lmax_x = x[:,s_lmax]
                D_max = compute_dist(rank_lmax_x)
                
                rank_result_lmax[i,j,0,k] = ef.dif_dist(D0,D_max,'l1')
                rank_result_lmax[i,j,1,k] = ef.dif_dist(D0,D_max,'l2')
                rank_result_lmax[i,j,2,k] = ef.dif_dist(D0,D_max,'lmax')                 

    
    np.save('./result/'+dataset+'/rank_dist',rank_result)
    np.save('./result/'+dataset+'/rank_l1_dist',rank_result_l1)
    np.save('./result/'+dataset+'/rank_l2_dist',rank_result_l2)
    np.save('./result/'+dataset+'/rank_lmax_dist',rank_result_lmax)
    
    ########  lap_score  ###########
    lap_score_result = np.zeros([7,len(dimension_list)])
    lap_score_selected = lap_score.lap_score(x)
    lap_score_selected = list(np.argsort(lap_score_selected)[:num_select])    #find minimum
    
    for k,dimension in enumerate(dimension_list):      #performance using different number fo features
        s = lap_score_selected[:dimension]
        lap_score_x = x[:,s]
        D1 = compute_dist(lap_score_x)
        
        lap_score_result[0,k] = ef.dif_dist(D0,D1,'l1')
        lap_score_result[1,k] = ef.dif_dist(D0,D1,'l2')
        lap_score_result[2,k] = ef.dif_dist(D0,D1,'lmax')

    np.save('./result/'+dataset+'/lap_score_dist',lap_score_result)
    
    ########  SPEC  ###########
    SPEC_result = np.zeros([7,len(dimension_list)])
    SPEC_selected = SPEC.spec(x)
    SPEC_selected = list(np.argsort(SPEC_selected)[:num_select])    #find minimum
    
    for k,dimension in enumerate(dimension_list):      #performance using different number fo features
        s = SPEC_selected[:dimension]
        SPEC_x = x[:,s]
        D1 = compute_dist(SPEC_x)
        
        SPEC_result[0,k] = ef.dif_dist(D0,D1,'l1')
        SPEC_result[1,k] = ef.dif_dist(D0,D1,'l2')
        SPEC_result[2,k] = ef.dif_dist(D0,D1,'lmax')

    np.save('./result/'+dataset+'/SPEC_dist',SPEC_result)
    
    #######  MCFS  parameter: num_clusters  ##############   
    num_clusters_list = [5,10,20,30]     
    MCFS_result = np.zeros([len(num_clusters_list),7,len(dimension_list)])
    for i,num_clusters in enumerate(num_clusters_list):
        MCFS_W = MCFS.mcfs(x,num_select,**{'n_clusters':num_clusters})
        MCFS_selected = [np.max(np.abs(x)) for x in MCFS_W]     #find maximum
        MCFS_selected= np.argsort(MCFS_selected)[-num_select:]
        MCFS_selected = list(MCFS_selected)[::-1]
        for k,dimension in enumerate(dimension_list):      #performance using different number fo features
            s = MCFS_selected[:dimension]
            MCFS_x = x[:,s]
            D1 = compute_dist(MCFS_x)
            
            MCFS_result[i,0,k] = ef.dif_dist(D0,D1,'l1')
            MCFS_result[i,1,k] = ef.dif_dist(D0,D1,'l2')
            MCFS_result[i,2,k] = ef.dif_dist(D0,D1,'lmax')
           
        
    np.save('./result/'+dataset+'/MCFS_dist',MCFS_result)   
    
    return rank_result, rank_result_l1, rank_result_l2,rank_result_lmax,lap_score_result, SPEC_result, MCFS_result
    bigger = np.transpose(W) > W
    W = W - W.multiply(bigger) + np.transpose(W).multiply(bigger)
    print('Sparse Affinity Matrix:', W)

    ## Logging
    #    with open('output.txt', 'a') as f:
    #        print("W", file=f)
    #        print(W, file=f)

    ##Euclidean laplacian result
    numTrainData = trainData.values
    kwargs_W = {"metric": "euclidean", "neighbour_mode": "knn"}
    W = construct_W.construct_W(numTrainData, **kwargs_W)

    ## Calculate Laplacian Score
    score = lap_score.lap_score(numTrainData, W=W)
    print('Laplacian Score:', score)

    ## Logging
    with open('output.txt', 'a') as f:
        print("Laplacian Score", file=f)
        print(score, file=f)

    # Laplacian HEOM result hardcoded
    """score = np.array(
        [np.nan, np.nan, np.nan, np.nan, 0.25866548, 0.25866548, np.nan, 0.25946108, np.nan, np.nan, np.nan, np.nan,
         0.67265115, 0.73108302, np.nan, np.nan, np.nan, 0.86144223, np.nan, 0.6201575, np.nan, np.nan, np.nan, np.nan,
         np.nan, np.nan, np.nan, np.nan, np.nan, 0.8655987, 0.85803891, 0.87968564, 0.88995775, 0.87647355, 0.86576088,
         0.87689691, 0.8832944, 0.8750145, 0.85803891, 0.87919727, 0.89337948, 0.668559, 1, 0.63601804, 0.64669977, 1,
         0.87252428, 0.86959342, 0.83178639, 1, 0.78901017, 0.6930278, 0.81462815, 0.84261471, 0.84425971, 0.86648025,
         0.6385317, np.nan, 0.57706172, 0.85893685, np.nan, 0.85893685, 0.63022226, np.nan, 0.56493291, 0.7190018,
Exemplo n.º 26
0
x_train = X[train_idx]
x_test = X[test_idx]
y_train = to_onehot(map(lambda x: mods.index(lbl[x][0]), train_idx))
y_test = to_onehot(map(lambda x: mods.index(lbl[x][0]), test_idx))

# compute fisher scores
x_train = np.append(x_train[:, 0, :], x_train[:, 1, :], axis=1)
kwargs_W = {
    "metric": "euclidean",
    "neighbor_mode": "knn",
    "weight_mode": "heat_kernel",
    "k": 5,
    't': 1
}
W = construct_W.construct_W(x_train, **kwargs_W)
score = lap_score.lap_score(x_train, W=W)
idx = lap_score.feature_ranking(score)
np.save('features/laplacian.npy', idx)
print('Features saved')
#idx = np.load('features/laplacian.npy', idx)
x_train = x_train.transpose()
x_train = np.split(x_train, 2)
x_train = np.array(x_train).transpose((2, 0, 1))

# In[4]:
in_shp = list(x_train.shape[1:])
print(x_train.shape, in_shp, snrs)
classes = mods

# create copies of the data
x_train_copy = x_train
Exemplo n.º 27
0
        X_train, X_test = features[train_index], features[test_index]
        y_train, y_test = labels[train_index], labels[test_index]
        start_time = str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        acc = []

        # lap_score
        method = 'lap_score'
        kwargs_W = {
            "metric": "euclidean",
            "neighbor_mode": "knn",
            "weight_mode": "heat_kernel",
            "k": 5,
            't': 1
        }
        W = construct_W.construct_W(X_train, **kwargs_W)
        score = lap_score.lap_score(X_train, W=W)
        idx = lap_score.feature_ranking(score)
        selected_fea_train = X_train[:, idx[0:num_features]]
        selected_fea_test = X_test[:, idx[0:num_features]]
        clf.fit(selected_fea_train, y_train)
        acc.append(accuracy_score(y_test, clf.predict(selected_fea_test)))

        # fisher_score
        score = fisher_score.fisher_score(X_train, y_train)
        idx = fisher_score.feature_ranking(score)
        selected_fea_train = X_train[:, idx[0:num_features]]
        selected_fea_test = X_test[:, idx[0:num_features]]
        clf.fit(selected_fea_train, y_train)
        acc.append(accuracy_score(y_test, clf.predict(selected_fea_test)))

        # reliefF
Exemplo n.º 28
0
print "Data Preparation finished."

timeStart = datetime.datetime.now()

# feature selection
if methodType == 0:
    # Laplacian Score
    kwrags_W = {
        "metric": "euclidean",
        "neighbor_mode": "knn",
        "weight_mode": "heat_kernel",
        "k": 5,
        "t": 1
    }
    W = construct_W(data, **kwrags_W)
    result = lap_score.lap_score(data, W=W)
    print result
elif methodType == 1:
    # MCFS
    kwrags_W = {
        "metric": "euclidean",
        "neighbor_mode": "knn",
        "weight_mode": "heat_kernel",
        "k": 5,
        "t": 1
    }
    W = construct_W(data, **kwrags_W)
    # 参数n_selected_features用于控制LARs算法解的稀疏性,也就是result每一列中非零元素的个数
    # 参数n_clusters用于控制LE降维的目标维数,也就是result的列数
    result = MCFS.mcfs(data, n_selected_features=2, W=W, n_clusters=2)
    print result