Пример #1
0
def ls_l21_FS(X_train, y, train_index):
    Y = construct_label_matrix_pan(y)
    Y_train = Y[train_index]
    W, obj, value_gamma = ls_l21.proximal_gradient_descent(X_train,
                                                           Y_train,
                                                           0.1,
                                                           verbose=False)
    idx = feature_ranking(W)

    return (idx, W)
Пример #2
0
def main():
    # load data
    mat = scipy.io.loadmat('../data/COIL20.mat')
    X = mat['X']  # data
    X = X.astype(float)
    y = mat['Y']  # label
    y = y[:, 0]
    Y = construct_label_matrix_pan(y)
    n_samples, n_features = X.shape  # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 100  # number of selected features
    clf = svm.LinearSVC()  # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the feature weight matrix
        Weight, obj, value_gamma = ls_l21.proximal_gradient_descent(
            X[train], Y[train], 0.1, verbose=False)

        # sort the feature scores in an ascending order according to the feature scores
        idx = feature_ranking(Weight)

        # obtain the dataset on the selected features
        selected_features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(selected_features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(selected_features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print('Accuracy:', float(correct) / 10)
Пример #3
0
def main():
    # load data
    mat = scipy.io.loadmat('../data/COIL20.mat')
    X = mat['X']    # data
    X = X.astype(float)
    y = mat['Y']    # label
    y = y[:, 0]
    Y = construct_label_matrix_pan(y)
    n_samples, n_features = X.shape    # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 100    # number of selected features
    clf = svm.LinearSVC()    # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the feature weight matrix
        Weight, obj, value_gamma = ls_l21.proximal_gradient_descent(X[train], Y[train], 0.1, verbose=False)

        # sort the feature scores in an ascending order according to the feature scores
        idx = feature_ranking(Weight)

        # obtain the dataset on the selected features
        selected_features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(selected_features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(selected_features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print 'Accuracy:', float(correct)/10
Пример #4
0
def main_opp2opp_fs(tgt_num_samp_per_class,
                    src_type,
                    sim_type,
                    k,
                    tgtsbj,
                    lambda_l21=1e-2):

    TGT_NUM_SAMP_PER_CLASS = int(tgt_num_samp_per_class)

    # Dataset information
    SRC_NUM_CLASSES = 10
    TGT_NUM_CLASSES = 7
    NUM_DIM = 77

    # Initialize random seed
    #init_random_seed(1)

    # Load data
    print("=== Data loading ===")
    src_loader,tgt_loader_train,tgt_loader_test = \
    get_dataloader_fs('opp',tgt_num_samp_per_class,src_type,k,tgtsbj)

    for data_src, labels_src in src_loader:

        for data_train_tgt, labels_train_tgt in tgt_loader_train:

            for data_test_tgt, labels_test_tgt in tgt_loader_test:

                # model Construction
                print("=== model Construction ===")
                # Initialize encoder_init
                print(">>> Encoder Initialization <<<")
                model_path = "./model_init/opp/" + str(tgtsbj) + "/"
                if not os.path.exists(model_path):
                    os.makedirs(model_path)
                if not os.path.exists(model_path + "encoder_init.pkl"):
                    # Construct encoder_init
                    encoder_init = \
                    FFLSTMEncoder1(lstm_input_size=NUM_DIM,
                                   lstm_hidden_size=params.lstm_hidden_size_opp,
                                   lstm_num_layers=params.lstm_num_layers_opp,
                                   fc2_size=params.fc2_size_opp)
                    # Save encoder_init
                    torch.save(encoder_init, model_path + "encoder_init.pkl")
                # Initialize classifier_tgt_init
                print(">>> Target Classifier Initialization <<<")
                if not os.path.exists(model_path + "classifier_tgt_init.pkl"):
                    # Construct classifier_tgt_init
                    classifier_tgt_init = \
                    FFLSTMClassifier(fc2_size=params.fc2_size_opp,
                                     num_classes=TGT_NUM_CLASSES)
                    # Save classifier_tgt_init
                    torch.save(classifier_tgt_init,
                               model_path + "classifier_tgt_init.pkl")
                # Initialize classifier_src_init
                print(">>> Source Classifier Initialization <<<")
                if not os.path.exists(model_path + "classifier_src_init.pkl"):
                    # Construct classifier_src_init
                    classifier_src_init = \
                    FFLSTMClassifier(fc2_size=params.fc2_size_opp,
                                     num_classes=SRC_NUM_CLASSES)
                    # Save classifier_src_init
                    torch.save(classifier_src_init,
                               model_path + "classifier_src_init.pkl")

                # Source Only
                print("=== Source Only ===")
                if (not os.path.exists(model_path+"encoder_src_"+src_type+".pkl")) \
                or (not os.path.exists(model_path+"classifier_src_"+src_type+".pkl")):
                    # Train encoder_src and classifier_src
                    encoder_init = torch.load(model_path + "encoder_init.pkl")
                    classifier_src_init = torch.load(model_path +
                                                     "classifier_src_init.pkl")
                    encoder_src,classifier_src = \
                    train_single_domain_source(encoder_init,classifier_src_init,
                                               data_src,labels_src,1e-3)
                    # Save encoder_src and classifier_src
                    torch.save(encoder_src,
                               model_path + "encoder_src_" + src_type + ".pkl")
                    torch.save(
                        classifier_src,
                        model_path + "classifier_src_" + src_type + ".pkl")

                # Parameter Transfer
                print("=== Parameter Transfer ===")
                f1score_list = np.zeros((4))
                confusemat_list = np.zeros(
                    (TGT_NUM_CLASSES, TGT_NUM_CLASSES, 4))
                # Target Only
                print("=== Target Only ===")
                # Train encoder_tgt and classifier_tgt
                encoder_init = torch.load(model_path + "encoder_init.pkl")
                classifier_tgt_init = torch.load(model_path +
                                                 "classifier_tgt_init.pkl")
                encoder_tgt,classifier_tgt,f1score,confusemat = \
                train_single_domain_target(encoder_init,classifier_tgt_init,
                                           data_train_tgt,labels_train_tgt,
                                           data_test_tgt,labels_test_tgt,
                                           1e-3)
                f1score_list[0] = f1score
                confusemat_list[:, :, 0] = confusemat

                # Sample Selection
                print("=== Sample Selection ===")
                if sim_type == "l21":
                    opp_path = "./results(f1)_"+sim_type+"_"+\
                                str(int(np.log10(lambda_l21)))+"/opp/"+\
                                src_type+"/tgtsbj="+str(tgtsbj)+"/num_samp="+\
                                str(tgt_num_samp_per_class)+"/"
                else:
                    opp_path = "./results(f1)_"+sim_type+"/opp/"+src_type+\
                                "/tgtsbj="+str(tgtsbj)+"/num_samp="+\
                                str(tgt_num_samp_per_class)+"/"
                if not os.path.exists(opp_path):
                    os.makedirs(opp_path)
                # Source feature extraction
                encoder_src = torch.load(model_path + "encoder_src_" +
                                         src_type + ".pkl")
                feat_src = encoder_src(data_src).detach().numpy()
                print("Source features are with size " + str(feat_src.shape))
                # Target feature extraction
                feat_train_tgt = encoder_src(data_train_tgt).detach().numpy()
                print("Target features are with size " +
                      str(feat_train_tgt.shape))
                if sim_type == "SR":
                    A,_,_ = \
                    proximal_gradient_descent(np.transpose(feat_src),
                                              np.transpose(feat_train_tgt),
                                              1e-2)
                    APos = abs(A)
                    NUM_SAMP_SRC = A.shape[0]
                    SRC_NUM_SAMP_PER_CLASS = int(NUM_SAMP_SRC /
                                                 SRC_NUM_CLASSES)
                    AProbPrime = np.zeros((SRC_NUM_CLASSES, TGT_NUM_CLASSES))
                    for i in range(SRC_NUM_CLASSES):
                        for j in range(TGT_NUM_CLASSES):
                            APosij = APos[i * SRC_NUM_SAMP_PER_CLASS:(i + 1) *
                                          SRC_NUM_SAMP_PER_CLASS,
                                          j * TGT_NUM_SAMP_PER_CLASS:(j + 1) *
                                          TGT_NUM_SAMP_PER_CLASS]
                            AProbPrime[i, j] = sum(sum(APosij))
                elif sim_type == "NGD":
                    f_ngd = h5py.File('sim_ngd_opp.h5')
                    AProbPrime = f_ngd.get('sim_ngd')[()]
                    print(AProbPrime.shape)
                elif sim_type == "Cos":
                    feat_src_norm = np.zeros(feat_src.shape)
                    for i in range(feat_src.shape[0]):
                        x = np.squeeze(feat_src[i, :])
                        feat_src_norm[i, :] = x / np.linalg.norm(x)
                    feat_train_tgt_norm = np.zeros(feat_train_tgt.shape)
                    for i in range(feat_train_tgt.shape[0]):
                        x = np.squeeze(feat_train_tgt[i, :])
                        feat_train_tgt_norm[i, :] = x / np.linalg.norm(x)
                    A = np.exp(
                        np.dot(feat_src_norm,
                               np.transpose(feat_train_tgt_norm)))
                    NUM_SAMP_SRC = A.shape[0]
                    SRC_NUM_SAMP_PER_CLASS = int(NUM_SAMP_SRC /
                                                 SRC_NUM_CLASSES)
                    AProbPrime = np.zeros((SRC_NUM_CLASSES, TGT_NUM_CLASSES))
                    for i in range(SRC_NUM_CLASSES):
                        for j in range(TGT_NUM_CLASSES):
                            Aij = A[i * SRC_NUM_SAMP_PER_CLASS:(i + 1) *
                                    SRC_NUM_SAMP_PER_CLASS,
                                    j * TGT_NUM_SAMP_PER_CLASS:(j + 1) *
                                    TGT_NUM_SAMP_PER_CLASS]
                            AProbPrime[i, j] = sum(sum(Aij))

                # Source Classifier Combination
                # Initialize classifier_tgt_init
                classifier_src = torch.load(model_path + "classifier_src_" +
                                            src_type + ".pkl")
                classifier_src_weight = classifier_src.fc.weight.detach(
                ).numpy()
                # Combination1
                AProbSoft = np.zeros((SRC_NUM_CLASSES, TGT_NUM_CLASSES))
                for j in range(TGT_NUM_CLASSES):
                    AProbPrimej = np.squeeze(AProbPrime[:, j])
                    AProbSoft[:, j] = AProbPrimej / sum(AProbPrimej)
                if (k + 1) % 20 == 0:
                    # Save statistics as file
                    f = h5py.File(opp_path + "AProbSoft_" + str(k) + ".h5")
                    f.create_dataset("AProbSoft", data=AProbSoft)
                    f.close()
                print(">>> Combined Classifier 1 Initialization <<<")
                classifier_comb1_weight = np.matmul(np.transpose(AProbSoft),
                                                    classifier_src_weight)
                classifier_comb1 = \
                FFLSTMClassifier(fc2_size=params.fc2_size_opp,
                                 num_classes=TGT_NUM_CLASSES)
                classifier_comb1.fc.weight.data.copy_(
                    torch.tensor(classifier_comb1_weight))
                # Combination2
                AProbHard = np.zeros((SRC_NUM_CLASSES, TGT_NUM_CLASSES))
                for j in range(TGT_NUM_CLASSES):
                    AProbPrimej = np.squeeze(AProbPrime[:, j])
                    AProbHard[np.argmax(AProbPrimej).astype(int), j] = 1.0
                if (k + 1) % 20 == 0:
                    # Save statistics as file
                    f = h5py.File(opp_path + "AProbHard_" + str(k) + ".h5")
                    f.create_dataset("AProbHard", data=AProbHard)
                    f.close()
                print(">>> Combined Classifier 2 Initialization <<<")
                classifier_comb2_weight = np.matmul(np.transpose(AProbHard),
                                                    classifier_src_weight)
                classifier_comb2 = \
                FFLSTMClassifier(fc2_size=params.fc2_size_opp,
                                 num_classes=TGT_NUM_CLASSES)
                classifier_comb2.fc.weight.data.copy_(
                    torch.tensor(classifier_comb2_weight))

                # Fine Tuning
                print("=== Fine Tuning (Target Only) ===")
                # Initialize with encoder_src and classifier_tgt_init
                if sim_type == "SR" and lambda_l21 == 1e-2:
                    encoder_src = torch.load(model_path + "encoder_src_" +
                                             src_type + ".pkl")
                    classifier_tgt_init = torch.load(model_path +
                                                     "classifier_tgt_init.pkl")
                    encoder_tgt_ft,classifier_tgt_ft,f1score_ft,confusemat_ft = \
                    train_single_domain_target(encoder_src,classifier_tgt_init,
                                               data_train_tgt,labels_train_tgt,
                                               data_test_tgt,labels_test_tgt,
                                               5e-4)
                    f1score_list[1] = f1score_ft
                    confusemat_list[:, :, 1] = confusemat_ft
                else:
                    path = "./results(f1)_SR/opp/"+src_type+"/tgtsbj="+\
                           str(tgtsbj)+"/num_samp="+\
                           str(tgt_num_samp_per_class)+"/"
                    f = h5py.File(path + "f1score.h5", "r")
                    f1score_list_general = f.get("list")[()]
                    f1score_list[1] = f1score_list_general[1, k]

                # Initialize with encoder_src and classifier_comb1
                print("=== Fine Tuning (Combined Classifier 1) ===")
                encoder_src = torch.load(model_path + "encoder_src_" +
                                         src_type + ".pkl")
                print("The size of source classifier is " +
                      str(classifier_comb1.fc.weight.shape))
                encoder_tgt_comb1,classifier_tgt_comb1,f1score_comb1,confusemat_comb1 = \
                train_single_domain_target(encoder_src,classifier_comb1,
                                           data_train_tgt,labels_train_tgt,
                                           data_test_tgt,labels_test_tgt,
                                           5e-4)
                f1score_list[2] = f1score_comb1
                confusemat_list[:, :, 2] = confusemat_comb1
                # Initialize with encoder_src and classifier_comb2
                print("=== Fine Tuning (Combined Classifier 2) ===")
                encoder_src = torch.load(model_path + "encoder_src_" +
                                         src_type + ".pkl")
                print("The size of source classifier is " +
                      str(classifier_comb2.fc.weight.shape))
                encoder_tgt_comb2,classifier_tgt_comb2,f1score_comb2,confusemat_comb2 = \
                train_single_domain_target(encoder_src,classifier_comb2,
                                           data_train_tgt,labels_train_tgt,
                                           data_test_tgt,labels_test_tgt,
                                           5e-4)
                f1score_list[3] = f1score_comb2
                confusemat_list[:, :, 3] = confusemat_comb2

                return f1score_list, confusemat_list
Пример #5
0
    def fit(self, X, y):

        idx = []

        if self.tp == 'ITB':

            if self.name == 'MRMR':
                idx = MRMR.mrmr(X,
                                y,
                                n_selected_features=self.params['num_feats'])

        elif self.tp == 'filter':

            if self.name == 'Relief':
                score = reliefF.reliefF(X, y, k=self.params['k'])
                idx = reliefF.feature_ranking(score)

            if self.name == 'Fisher':
                # obtain the score of each feature on the training set
                score = fisher_score.fisher_score(X, y)

                # rank features in descending order according to score
                idx = fisher_score.feature_ranking(score)

            if self.name == 'MI':
                idx = np.argsort(
                    mutual_info_classif(
                        X, y, n_neighbors=self.params['n_neighbors']))[::-1]

        elif self.tp == 'wrapper':

            model_fit = self.model.fit(X, y)
            model = SelectFromModel(model_fit, prefit=True)
            idx = model.get_support(indices=True)
        elif self.tp == 'SLB':

            # one-hot-encode on target
            y = construct_label_matrix(y)

            if self.name == 'SMBA':
                scba = fs.SCBA(data=X,
                               alpha=self.params['alpha'],
                               norm_type=self.params['norm_type'],
                               verbose=self.params['verbose'],
                               thr=self.params['thr'],
                               max_iter=self.params['max_iter'],
                               affine=self.params['affine'],
                               normalize=self.params['normalize'],
                               step=self.params['step'],
                               PCA=self.params['PCA'],
                               GPU=self.params['GPU'],
                               device=self.params['device'])

                nrmInd, sInd, repInd, _ = scba.admm()
                if self.params['type_indices'] == 'nrmInd':
                    idx = nrmInd
                elif self.params['type_indices'] == 'repInd':
                    idx = repInd
                else:
                    idx = sInd

            if self.name == 'RFS':
                W = RFS.rfs(X, y, gamma=self.params['gamma'])
                idx = feature_ranking(W)

            if self.name == 'll_l21':
                # obtain the feature weight matrix
                W, _, _ = ll_l21.proximal_gradient_descent(X,
                                                           y,
                                                           z=self.params['z'],
                                                           verbose=False)
                # sort the feature scores in an ascending order according to the feature scores
                idx = feature_ranking(W)
            if self.name == 'ls_l21':
                # obtain the feature weight matrix
                W, _, _ = ls_l21.proximal_gradient_descent(X,
                                                           y,
                                                           z=self.params['z'],
                                                           verbose=False)

                # sort the feature scores in an ascending order according to the feature scores
                idx = feature_ranking(W)

            if self.name == 'LASSO':

                LASSO = Lasso(alpha=self.params['alpha'], positive=True)

                y_pred_lasso = LASSO.fit(X, y)

                if y_pred_lasso.coef_.ndim == 1:
                    coeff = y_pred_lasso.coef_
                else:
                    coeff = np.asarray(y_pred_lasso.coef_[0, :])

                idx = np.argsort(-coeff)

            if self.name == 'EN':  # elastic net L1

                enet = ElasticNet(alpha=self.params['alpha'],
                                  l1_ratio=1,
                                  positive=True)
                y_pred_enet = enet.fit(X, y)

                if y_pred_enet.coef_.ndim == 1:
                    coeff = y_pred_enet.coef_
                else:
                    coeff = np.asarray(y_pred_enet.coef_[0, :])

                idx = np.argsort(-coeff)

        return idx
Пример #6
0
    def fit(self, X, y):

        if self.name == 'LASSO':

            # print self.params['alpha']

            LASSO = Lasso(alpha=self.params['alpha'], positive=True)

            y_pred_lasso = LASSO.fit(X, y)

            if y_pred_lasso.coef_.ndim == 1:
                coeff = y_pred_lasso.coef_
            else:
                coeff = np.asarray(y_pred_lasso.coef_[0, :])

            idx = np.argsort(-coeff)

        if self.name == 'EN':  # elastic net L1

            # alpha = self.params['alpha']

            # alpha = .9 - ((self.params['alpha'] - 1.0) * (1 - 0.1)) / ((50 - 1) + 0.1)
            # print alpha

            enet = ElasticNet(alpha=self.params['alpha'],
                              l1_ratio=1,
                              positive=True)

            y_pred_enet = enet.fit(X, y)
            # if y_pred_enet.coef_
            if y_pred_enet.coef_.ndim == 1:
                coeff = y_pred_enet.coef_
            else:
                coeff = np.asarray(y_pred_enet.coef_[0, :])

            idx = np.argsort(-coeff)

        if self.name == 'RFS':

            W = RFS.rfs(X,
                        construct_label_matrix(y),
                        gamma=self.params['gamma'])
            idx = feature_ranking(W)

        if self.name == 'll_l21':
            # obtain the feature weight matrix
            W, _, _ = ll_l21.proximal_gradient_descent(
                X,
                construct_label_matrix(y),
                z=self.params['z'],
                verbose=False)
            # sort the feature scores in an ascending order according to the feature scores
            idx = feature_ranking(W)

        if self.name == 'ls_l21':
            # obtain the feature weight matrix
            W, _, _ = ls_l21.proximal_gradient_descent(
                X,
                construct_label_matrix(y),
                z=self.params['z'],
                verbose=False)

            # sort the feature scores in an ascending order according to the feature scores
            idx = feature_ranking(W)

        if self.tp == 'ITB':

            if self.name == 'MRMR':
                idx = MRMR.mrmr(X,
                                y,
                                n_selected_features=self.params['num_feats'])

        if self.name == 'Relief':

            score = reliefF.reliefF(X, y, k=self.params['k'])
            idx = reliefF.feature_ranking(score)

        if self.name == 'MI':
            idx = np.argsort(
                mutual_info_classif(
                    X, y, n_neighbors=self.params['n_neighbors']))[::-1]

        return idx