示例#1
0
 def save_fig(sel, out_name):
     # Save ROC and Classification 2D figure
     (acc, sens, spec, auc) = eval_performance(
         sel.label_all, sel.prediction, sel.decision, 
         sel.accuracy, sel.sensitivity, sel.specificity, sel.AUC,
         verbose=0, is_showfig=1, legend1='HC', legend2='SZ', is_savefig=1, 
         out_name=out_name
     )
    def main_function(self, i, label_all_perm, feature_all):
        """The training data, validation data and  test data are randomly splited
        """
        print(f"Permutaion {i}...\n")
        # KFold Cross Validation
        accuracy, sensitivity, specificity, AUC = np.array([]), np.array(
            []), np.array([]), np.array([])
        kf = KFold(n_splits=self.cv, shuffle=True, random_state=0)
        for i, (tr_ind, te_ind) in enumerate(kf.split(feature_all)):
            feature_train = feature_all[tr_ind, :]
            label_train = label_all_perm[tr_ind]
            feature_test = feature_all[te_ind, :]
            label_test = label_all_perm[te_ind]

            # normalization
            prep = el_preprocessing.Preprocessing(
                data_preprocess_method='StandardScaler',
                data_preprocess_level='group')
            feature_train, feature_test = prep.data_preprocess(
                feature_train, feature_test)

            # dimension reduction
            if self.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = el_dimreduction.pca_apply(
                    feature_train, feature_test, self.components)

            # train
            model = self.training(feature_train, label_train)

            # test
            pred, dec = self.testing(model, feature_test)

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(label_test,
                                                    pred,
                                                    dec,
                                                    accuracy_kfold=None,
                                                    sensitivity_kfold=None,
                                                    specificity_kfold=None,
                                                    AUC_kfold=None,
                                                    verbose=0,
                                                    is_showfig=0)
            accuracy = np.append(accuracy, acc)
            sensitivity = np.append(sensitivity, sens)
            specificity = np.append(specificity, spec)
            AUC = np.append(AUC, auc)

        # return np.mean(accuracy),np.mean(sensitivity), np.mean(specificity), np.mean(AUC)
        return accuracy, sensitivity, specificity, AUC
示例#3
0
 def save_fig(self):
     # Save ROC and Classification 2D figure
     acc, sens, spec, auc = eval_performance(
         self.label_test_all,
         self.prediction,
         self.decision,
         self.accuracy,
         self.sensitivity,
         self.specificity,
         self.AUC,
         verbose=0,
         is_showfig=self.is_showfig_finally,
         is_savefig=1,
         out_name=os.path.join(self.path_out,
                               'Classification_performances.pdf'))
 def save_fig(selftest, label_validation, prediction, decision, accuracy,
              sensitivity, specificity, AUC, outname):
     # Save ROC and Classification 2D figure
     acc, sens, spec, auc = eval_performance(label_validation,
                                             prediction,
                                             decision,
                                             accuracy,
                                             sensitivity,
                                             specificity,
                                             AUC,
                                             verbose=0,
                                             is_showfig=1,
                                             is_savefig=1,
                                             out_name=os.path.join(
                                                 path_out, outname),
                                             legend1='Healthy',
                                             legend2='Unhealthy')
 def save_fig(selftest):
     # Save ROC and Classification 2D figure
     acc, sens, spec, auc = eval_performance(
         selftest.label_test,
         selftest.prediction,
         selftest.decision,
         selftest.accuracy,
         selftest.sensitivity,
         selftest.specificity,
         selftest.AUC,
         verbose=0,
         is_showfig=selftest.is_showfig_finally,
         is_savefig=1,
         out_name=os.path.join(selftest.path_out,
                               'Classification_performances_test.pdf'),
         legend1='Healthy',
         legend2='Unhealthy')
示例#6
0
    def main_svc_rfe_cv(sel):
        print('Training model and testing...\n')
        # Load data
        uid_550, feature_550, label_550 = sel._load_data(sel.dataset_our_center_550)
        uid_206, feature_206, label_206 = sel._load_data(sel.dataset_206)
        uid_COBRE, feature_COBRE, label_COBRE = sel._load_data(sel.data_COBRE)
        uid_UCAL, feature_UCAL, label_UCAL = sel._load_data(sel.data_UCAL)
        uid_all = np.concatenate([uid_550, uid_206, uid_COBRE, uid_UCAL])
        feature_all = [feature_550, feature_206, feature_COBRE, feature_UCAL]
        sel.label_all = [label_550, label_206, label_COBRE, label_UCAL]
        name = ['550','206','COBRE','UCLA']

        # Leave one site CV
        n_site = len(sel.label_all)
        test_index = np.array([], dtype=np.int16)
        sel.decision = np.array([], dtype=np.int16)
        sel.prediction = np.array([], dtype=np.int16)
        sel.accuracy = np.array([], dtype=np.float16)
        sel.sensitivity = np.array([], dtype=np.float16)
        sel.specificity = np.array([], dtype=np.float16)
        sel.AUC = np.array([], dtype=np.float16)
        sel.coef = []
        for i in range(n_site):
            print('-'*40)
            print(f'{i+1}/{n_site}: test dataset is {name[i]}...')
            feature_train, label_train = feature_all.copy(), sel.label_all.copy()
            feature_test, label_test = feature_train.pop(i), label_train.pop(i)
            feature_train = np.concatenate(feature_train, axis=0)
            label_train = np.concatenate(label_train, axis=0)

            # Resampling training data
            # feature_train, label_train = sel.re_sampling(feature_train, label_train)
            # Normalization
            prep = el_preprocessing.Preprocessing(data_preprocess_method='StandardScaler', data_preprocess_level='subject')
            feature_train, feature_test = prep.data_preprocess(feature_train, feature_test)

            # Dimension reduction
            if sel.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = el_dimreduction.pca_apply(
                    feature_train, feature_test, sel.components
                )

                print(f'After dimension reduction, the feature number is {feature_train.shape[1]}')
            else:
                print('No dimension reduction perfromed\n')

            # Train and test
            print('training and testing...\n')
            model = sel.training(feature_train, label_train, sel.cv)
        
            if sel.is_dim_reduction:
                sel.coef.append(model_dim_reduction.inverse_transform(model.coef_))  # save coef
            else:
                sel.coef.append(clf.coef_)  # save coef

            pred, dec = sel.testing(model, feature_test)
            sel.prediction = np.append(sel.prediction, np.array(pred))
            sel.decision = np.append(sel.decision, np.array(dec))
            
            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(label_test, pred, dec, 
                accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None,
                 verbose=1, is_showfig=0)
            sel.accuracy = np.append(sel.accuracy, acc)
            sel.sensitivity = np.append(sel.sensitivity, sens)
            sel.specificity = np.append(sel.specificity, spec)
            sel.AUC = np.append(sel.AUC, auc)
            print(f'performances = {acc, sens, spec,auc}')
        
        sel.label_all = np.concatenate(sel.label_all)
        sel.special_result = np.concatenate( [uid_all, sel.label_all, sel.decision, sel.prediction], axis=0).reshape(4, -1).T
        return sel
示例#7
0
    def main_function(self):
        """
        This function is the main function.
        """

        # Load data and mask
        data_all, label_all, self.orig_shape, self.mask_obj, self.mask_all = self._load_nii_and_gen_label(
        )

        # KFold Cross Validation
        self.label_test_all = np.array([], dtype=np.int16)
        train_index = np.array([], dtype=np.int16)
        test_index = np.array([], dtype=np.int16)
        self.decision = np.array([], dtype=np.int16)
        self.prediction = np.array([], dtype=np.int16)
        self.accuracy = np.array([], dtype=np.float16)
        self.sensitivity = np.array([], dtype=np.float16)
        self.specificity = np.array([], dtype=np.float16)
        self.AUC = np.array([], dtype=np.float16)
        self.coef = []
        kf = KFold(n_splits=self.num_of_fold_outer,
                   shuffle=True,
                   random_state=0)
        for i, (tr_ind, te_ind) in enumerate(kf.split(data_all)):
            print(f'------{i+1}/{self.num_of_fold_outer}...------\n')
            train_index = np.int16(np.append(train_index, tr_ind))
            test_index = np.int16(np.append(test_index, te_ind))
            feature_train = data_all[tr_ind, :]
            label_train = label_all[tr_ind]
            feature_test = data_all[te_ind, :]
            label_test = label_all[te_ind]
            self.label_test_all = np.int16(
                np.append(self.label_test_all, label_test))

            # Resampling training data
            feature_train, label_train = self.re_sampling(
                feature_train, label_train)

            # data_preprocess
            prep = elprep.Preprocessing(self.data_preprocess_method,
                                        self.data_preprocess_level)
            feature_train, feature_test = prep.data_preprocess(
                feature_train, feature_test)

            # dimension reduction using univariate feature selection
            # feature_train, feature_test, mask_selected = self.dimReduction_filter(
            #         feature_train, label_train, feature_test, 0.05)

            # Dimension reduction using PCA
            if self.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = self.dimReduction_PCA(
                    feature_train, feature_test, self.components)
                print(
                    f'After dimension reduction, the feature number is {feature_train.shape[1]}'
                )
            else:
                print('No dimension reduction perfromed\n')
                print(f'The feature number is {feature_train.shape[1]}')

            # Train: inner feature selection using RFECV
            print('Training...\n')
            model, weight = self.rfeCV_training(feature_train, label_train,
                                                self.step,
                                                self.num_fold_of_inner_rfeCV,
                                                self.n_jobs)

            if self.is_dim_reduction:
                self.coef.append(model_dim_reduction.inverse_transform(weight))
            else:
                self.coef.append(weight)

            # Testting
            print('Testting...\n')
            pred, dec = self.testing(model, feature_test)
            self.prediction = np.append(self.prediction, np.array(pred))
            self.decision = np.append(self.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(
                label_test,
                pred,
                dec,
                accuracy_kfold=None,
                sensitivity_kfold=None,
                specificity_kfold=None,
                AUC_kfold=None,
                verbose=1,
                is_showfig=self.is_showfig_in_each_fold)

            self.accuracy = np.append(self.accuracy, acc)
            self.sensitivity = np.append(self.sensitivity, sens)
            self.specificity = np.append(self.specificity, spec)
            self.AUC = np.append(self.AUC, auc)

        # Save results and fig to local path
        self.save_results()
        self._weight2nii(dimension_nii_data=(61, 73, 61))
        self.save_fig()

        print("--" * 10 + "Done!" + "--" * 10)
        return self
示例#8
0
    def main_function(self):
        """
        """
        print('Training model and testing...\n')

        # load data and mask
        data_all, label_all, self.orig_shape, self.mask_obj, self.mask_all = self._load_nii_and_gen_label(
        )

        # KFold Cross Validation
        self.label_test_all = np.array([], dtype=np.int16)
        train_index = np.array([], dtype=np.int16)
        test_index = np.array([], dtype=np.int16)
        self.decision = np.array([], dtype=np.int16)
        self.prediction = np.array([], dtype=np.int16)
        self.accuracy = np.array([], dtype=np.float16)
        self.sensitivity = np.array([], dtype=np.float16)
        self.specificity = np.array([], dtype=np.float16)
        self.AUC = np.array([], dtype=np.float16)
        self.coef = []
        kf = KFold(n_splits=self.num_of_kfold, shuffle=True, random_state=0)
        for i, (tr_ind, te_ind) in enumerate(kf.split(data_all)):
            print(f'------{i+1}/{self.num_of_kfold}...------\n')
            train_index = np.int16(np.append(train_index, tr_ind))
            test_index = np.int16(np.append(test_index, te_ind))
            feature_train = data_all[tr_ind, :]
            label_train = label_all[tr_ind]
            feature_test = data_all[te_ind, :]
            label_test = label_all[te_ind]
            self.label_test_all = np.int16(
                np.append(self.label_test_all, label_test))

            # Resampling training data
            feature_train, label_train = self.re_sampling(
                feature_train, label_train)

            # data_preprocess
            feature_train, feature_test = elprep.Preprocessing(
            ).data_preprocess(feature_train, feature_test,
                              self.data_preprocess_method,
                              self.data_preprocess_level)

            # Dimension reduction using PCA
            if self.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = self.dimReduction_PCA(
                    feature_train, feature_test, self.components)
                print(
                    f'After dimension reduction, the feature number is {feature_train.shape[1]}'
                )
            else:
                print('No dimension reduction perfromed\n')
                print(f'The feature number is {feature_train.shape[1]}')

            # Feature selection
            if self.is_feature_selection:
                feature_train, feature_test, mask, n_features_origin = self.feature_selection_relief(
                    feature_train, label_train, feature_test,
                    self.n_features_to_select)
            # Train and test
            print('training and testing...\n')
            model = self.training(feature_train, label_train)

            # Get weight
            if self.is_feature_selection:
                coef = np.zeros([
                    n_features_origin,
                ])
                coef[mask] = model.coef_
            else:
                coef = model.coef_

            if self.is_dim_reduction:
                self.coef.append(model_dim_reduction.inverse_transform(coef))
            else:
                self.coef.append(coef)

            pred, dec = self.testing(model, feature_test)
            self.prediction = np.append(self.prediction, np.array(pred))
            self.decision = np.append(self.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(
                label_test,
                pred,
                dec,
                accuracy_kfold=None,
                sensitivity_kfold=None,
                specificity_kfold=None,
                AUC_kfold=None,
                verbose=1,
                is_showfig=self.is_showfig_in_each_fold)

            self.accuracy = np.append(self.accuracy, acc)
            self.sensitivity = np.append(self.sensitivity, sens)
            self.specificity = np.append(self.specificity, spec)
            self.AUC = np.append(self.AUC, auc)

        # Save results and fig to local path
        self.save_results()
        self._weight2nii(dimension_nii_data=(61, 73, 61))
        self.save_fig()

        print("--" * 10 + "Done!" + "--" * 10)
        return self
示例#9
0
    def main_function(sel):
        """
        The training data, validation data and  test data are randomly splited
        """
        print('Training model and testing...\n')

        # load data
        dataset_our_center_550 = np.load(sel.dataset_our_center_550)
        dataset_206 = np.load(sel.dataset_206)
        dataset_COBRE = np.load(sel.dataset_COBRE)
        dataset_UCAL = np.load(sel.dataset_UCAL)

        # Extracting features and label
        features_our_center_550 = dataset_our_center_550[:, 2:]
        features_206 = dataset_206[:, 2:]
        features_COBRE = dataset_COBRE[:, 2:]
        features_UCAL = dataset_UCAL[:, 2:]

        label_our_center_550 = dataset_our_center_550[:, 1]
        label_206 = dataset_206[:, 1]
        label_COBRE = dataset_COBRE[:, 1]
        label_UCAL = dataset_UCAL[:, 1]

        # Generate training data and test data
        data_all = np.concatenate(
            [features_our_center_550, features_206, features_UCAL, features_COBRE], axis=0)
        label_all = np.concatenate(
            [label_our_center_550, label_206, label_UCAL, label_COBRE], axis=0)

        # Unique ID
        uid_our_center_550 = np.int32(dataset_our_center_550[:, 0])
        uid_206 = np.int32(dataset_206[:, 0])
        uid_all = np.concatenate([uid_our_center_550, uid_206, 
                                  np.zeros(len(label_UCAL, )) -1, 
                                  np.zeros(len(label_COBRE, )) -1], axis=0)
        uid_all = np.int32(uid_all)

        # KFold Cross Validation
        sel.label_test_all = np.array([], dtype=np.int16)
        train_index = np.array([], dtype=np.int16)
        test_index = np.array([], dtype=np.int16)
        sel.decision = np.array([], dtype=np.int16)
        sel.prediction = np.array([], dtype=np.int16)
        sel.accuracy = np.array([], dtype=np.float16)
        sel.sensitivity = np.array([], dtype=np.float16)
        sel.specificity = np.array([], dtype=np.float16)
        sel.AUC = np.array([], dtype=np.float16)
        sel.coef = []        
        kf = KFold(n_splits=sel.cv, shuffle=True, random_state=0)
        for i, (tr_ind, te_ind) in enumerate(kf.split(data_all)):
            print(f'------{i+1}/{sel.cv}...------\n')
            train_index = np.int16(np.append(train_index, tr_ind))
            test_index = np.int16(np.append(test_index, te_ind))
            feature_train = data_all[tr_ind, :]
            label_train = label_all[tr_ind]
            feature_test = data_all[te_ind, :]
            label_test = label_all[te_ind]
            sel.label_test_all = np.int16(np.append(sel.label_test_all, label_test))

            # resampling training data
            # feature_train, label_train = sel.re_sampling(feature_train, label_train)

            # normalization
            prep = elprep.Preprocessing(data_preprocess_method='StandardScaler', data_preprocess_level='subject')
            feature_train, feature_test = prep.data_preprocess(feature_train, feature_test)

            # dimension reduction
            if sel.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = sel.dimReduction(
                    feature_train, feature_test, sel.components)
                print(f'After dimension reduction, the feature number is {feature_train.shape[1]}')
            else:
                print('No dimension reduction perfromed\n')
                
            # train
            print('training and testing...\n')
            # model, weight = rfeCV(feature_train, label_train, step=0.2, cv=3, n_jobs=-1, permutation=0)
            model = sel.training(feature_train, label_train)
            coef = model.coef_
            # coef = weight
            
            # Weight
            if sel.is_dim_reduction:
                sel.coef.append(model_dim_reduction.inverse_transform(coef))  # save coef
            else:
                sel.coef.append(coef)  # save coef
                
            # test
            pred, dec = sel.testing(model, feature_test)
            sel.prediction = np.append(sel.prediction, np.array(pred))
            sel.decision = np.append(sel.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(label_test, pred, dec, 
                accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None,
                 verbose=1, is_showfig=0)
            sel.accuracy = np.append(sel.accuracy, acc)
            sel.sensitivity = np.append(sel.sensitivity, sens)
            sel.specificity = np.append(sel.specificity, spec)
            sel.AUC = np.append(sel.AUC, auc)

        uid_all_sorted = np.int32(uid_all[test_index])
        sel.special_result = np.concatenate(
            [uid_all_sorted, sel.label_test_all, sel.decision, sel.prediction], axis=0).reshape(4, -1).T
        print('Done!')
        return sel
示例#10
0
    def main_function(sel):
        """
        The training data, validation data and  test data are randomly splited
        """
        print('training model and testing...\n')

        # load data
        data  = np.load(sel.data )


        # Extracting features and label
        features_our_center_550 = data [:,2:]
        label_our_center_550 = data [:,1]
        
        # Generate training data and test data	
        data_all = features_our_center_550
        label_all = label_our_center_550

        # Unique ID

        # KFold Cross Validation
        sel.label_test_all = np.array([], dtype=np.int16)
        train_index = np.array([], dtype=np.int16)
        test_index = np.array([], dtype=np.int16)
        sel.decision = np.array([], dtype=np.int16)
        sel.prediction = np.array([], dtype=np.int16)
        sel.accuracy  = np.array([], dtype=np.float16)
        sel.sensitivity  = np.array([], dtype=np.float16)
        sel.specificity  = np.array([], dtype=np.float16)
        sel.AUC = np.array([], dtype=np.float16)
        sel.coef = []     
        kf = KFold(n_splits=sel.cv, shuffle=True, random_state=0)
        for i, (tr_ind , te_ind) in enumerate(kf.split(data_all)):
            print(f'------{i+1}/{sel.cv}...------\n')
            train_index = np.int16(np.append(train_index, tr_ind))
            test_index = np.int16(np.append(test_index, te_ind))
            feature_train = data_all[tr_ind,:]
            label_train = label_all[tr_ind]
            feature_test = data_all[te_ind,:]
            label_test = label_all[te_ind]
            sel.label_test_all = np.int16(np.append(sel.label_test_all, label_test))

            # resampling training data
            # feature_train, label_train = sel.re_sampling(feature_train, label_train)

            # normalization
            prep = elprep.Preprocessing(data_preprocess_method='StandardScaler', data_preprocess_level='subject')
            feature_train, feature_test = prep.data_preprocess(feature_train, feature_test)

            # dimension reduction
            if sel.is_dim_reduction:
                feature_train,feature_test, model_dim_reduction= sel.dimReduction(feature_train, feature_test, sel.components)
                print(f'After dimension reduction, the feature number is {feature_train.shape[1]}')
            else:
                print('No dimension reduction perfromed\n')
            
            # train and test
            print('training and testing...\n')
            model = sel.training(feature_train,label_train) 
            weight = model.coef_

            if sel.is_dim_reduction:
                sel.coef.append(model_dim_reduction.inverse_transform(weight))  # save coef
            else:
                sel.coef.append(weight)  # save coef
                
            pred, dec = sel.testing(model,feature_test)
            sel.prediction = np.append(sel.prediction, np.array(pred))
            sel.decision = np.append(sel.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(label_test, pred, dec, 
                accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None,
                 verbose=1, is_showfig=0)
        
            sel.accuracy  = np.append(sel.accuracy,acc)
            sel.sensitivity  = np.append(sel.sensitivity,sens)
            sel.specificity  = np.append(sel.specificity,spec)
            sel.AUC = np.append(sel.AUC,auc)
        sel.special_result = np.concatenate([sel.label_test_all, sel.decision, sel.prediction], axis=0).reshape(3, -1).T
        print('Done!')
        return  sel
示例#11
0
        data_chronic_medicated_SSD_550_18_figure3)
acc_firstepisode_medicated_SSD_550_18_figure3 = np.sum(
    data_firstepisode_medicated_SSD_550_18_figure3[1] -
    data_firstepisode_medicated_SSD_550_18_figure3[3] == 0) / len(
        data_firstepisode_medicated_SSD_550_18_figure3)
acc_first_episode_unmedicated_SSD_550_18_figure3 = np.sum(
    data_firstepisode_unmedicated_SSD_550_18_figure3[1] -
    data_firstepisode_unmedicated_SSD_550_18_figure3[3] == 0) / len(
        data_firstepisode_unmedicated_SSD_550_18_figure3)
accuracy_figure3, sensitivity_figure3, specificity_figure3, auc_figure3 = eval_performance(
    scale_550_selected_figure3[1].values,
    scale_550_selected_figure3[3].values,
    scale_550_selected_figure3[2].values,
    accuracy_kfold=None,
    sensitivity_kfold=None,
    specificity_kfold=None,
    AUC_kfold=None,
    verbose=True,
    is_showfig=False,
    legend1='HC',
    legend2='Patients',
    is_savefig=False,
    out_name=None)

#%% Statistics
# figure3
n = len(data_chronic_medicated_SSD_550_18_figure3)
acc = acc_chronic_medicated_SSD_550_18_figure3
k = np.int32(n * acc)
p, sum_prob, prob, randk = lc_binomialtest(n, k, 0.5, 0.5)
print(p)
n = len(data_firstepisode_medicated_SSD_550_18_figure3)
    def main_function(selftest):
        """
        """
        print('Training model and testing...\n')

        # load data
        feature_train, feature_validation, label_train, label_validation, colname = selftest._load_data(
        )
        n_features_orig = feature_train.shape[1]

        # Check data

        # Age encoding
        feature_train[:, 2] = selftest.age_encodeing(feature_train[:, 2],
                                                     feature_train[:, 2])
        feature_validation[:, 2] = selftest.age_encodeing(
            feature_train[:, 2], feature_validation[:, 2])

        # Data normalization: do not need, because all variables are discrete variables.

        # Feature selection: LassoCV
        if selftest.is_feature_selection:
            coef, mask_lassocv = selftest.feature_selection_lasso(
                feature_train, label_train)
            feature_train, feature_validation = feature_train[:,
                                                              mask_lassocv], feature_validation[:,
                                                                                                mask_lassocv]
            var_important = pd.DataFrame(np.array(colname)[mask_lassocv])
            var_important_coef = pd.concat(
                [var_important, pd.DataFrame(coef[coef != 0])], axis=1)
            var_important_coef.columns = ['变量', '系数(lasso); 正系数为危险因素,负系数为保护因素']
            var_important_coef.to_csv(os.path.join(selftest.path_out,
                                                   'important_variables.txt'),
                                      index=False)

        # Onehot encoding
        # onehot = OneHotEncoder()
        # onehot.fit(feature_train)
        # feature_train = onehot.transform(feature_train).toarray()
        # feature_validation= onehot.transform(feature_validation).toarray()

        # Train
        print('training and testing...\n')
        if selftest.is_feature_selection:
            model = selftest.training(feature_train, label_train)
        else:
            model, w = selftest.rfeCV(feature_train, label_train)

        # Save model
        with open(os.path.join(selftest.path_out, 'model_classification.pkl'),
                  'wb') as f_model:
            joblib.dump(model, f_model)

        # Validating
        prediction_train, decision_train = selftest.testing(
            model, feature_train)
        prediction_validation, decision_validation = selftest.testing(
            model, feature_validation)

        # Evaluating classification performances
        accuracy_train, sensitivity_train, specificity_train, AUC_train = eval_performance(
            label_train,
            prediction_train,
            decision_train,
            accuracy_kfold=None,
            sensitivity_kfold=None,
            specificity_kfold=None,
            AUC_kfold=None,
            verbose=1,
            is_showfig=0)

        accuracy_validation, sensitivity_validation, specificity_validation, AUC_validation = eval_performance(
            label_validation,
            prediction_validation,
            decision_validation,
            accuracy_kfold=None,
            sensitivity_kfold=None,
            specificity_kfold=None,
            AUC_kfold=None,
            verbose=1,
            is_showfig=0)

        # Save results and fig to local path
        selftest.save_results(accuracy_train, sensitivity_train,
                              specificity_train, AUC_train, decision_train,
                              prediction_train, label_train, 'train')

        selftest.save_results(accuracy_validation, sensitivity_validation,
                              specificity_validation, AUC_validation,
                              decision_validation, prediction_validation,
                              label_validation, 'validation')

        selftest.save_fig(label_train, prediction_train, decision_train,
                          accuracy_train, sensitivity_train, specificity_train,
                          AUC_train, 'classification_performances_train.pdf')

        selftest.save_fig(label_validation, prediction_validation,
                          decision_validation, accuracy_validation,
                          sensitivity_validation, specificity_validation,
                          AUC_validation,
                          'classification_performances_validation.pdf')

        print(
            f"MSE = {np.mean(np.power((decision_validation - label_validation), 2))}"
        )

        print("--" * 10 + "Done!" + "--" * 10)
        return selftest
    def main_function(selftest):
        """
        """
        print('Training model and testing...\n')

        # load data and mask
        mask_lassocv = joblib.load(
            os.path.join(selftest.path_out,
                         'mask_selected_features_lassocv.pkl'))
        model_feature_selection = joblib.load(
            os.path.join(selftest.models_path, 'model_feature_selection.pkl'))
        model_classification = joblib.load(
            os.path.join(selftest.models_path, 'model_classification.pkl'))
        feature_test, selftest.label_test, feature_train = selftest._load_data(
        )

        # Age encoding
        feature_test[:, 2] = ClassifyFourKindOfPersonTrain().age_encodeing(
            feature_train[:, 2], feature_test[:, 2])

        # Feature selection
        if selftest.is_feature_selection:
            feature_test = feature_test[:, mask_lassocv != 0]

        # Testting
        selftest.prediction, selftest.decision = selftest.testing(
            model_classification, feature_test)

        # Evaluating classification performances
        selftest.accuracy, selftest.sensitivity, selftest.specificity, selftest.AUC = eval_performance(
            selftest.label_test,
            selftest.prediction,
            selftest.decision,
            accuracy_kfold=None,
            sensitivity_kfold=None,
            specificity_kfold=None,
            AUC_kfold=None,
            verbose=1,
            is_showfig=0)

        # Save results and fig to local path
        selftest.save_results()
        selftest.save_fig()

        print("--" * 10 + "Done!" + "--" * 10)
        return selftest
示例#14
0
        data_chronic_medicated_SSD_550_18_pca70)
acc_firstepisode_medicated_SSD_550_18_pca70 = np.sum(
    data_firstepisode_medicated_SSD_550_18_pca70[1] -
    data_firstepisode_medicated_SSD_550_18_pca70[3] == 0) / len(
        data_firstepisode_medicated_SSD_550_18_pca70)
acc_first_episode_unmedicated_SSD_550_18_pca70 = np.sum(
    data_firstepisode_unmedicated_SSD_550_18_pca70[1] -
    data_firstepisode_unmedicated_SSD_550_18_pca70[3] == 0) / len(
        data_firstepisode_unmedicated_SSD_550_18_pca70)
accuracy_pca70, sensitivity_pca70, specificity_pca70, auc_pca70 = eval_performance(
    scale_550_selected_pca70[1].values,
    scale_550_selected_pca70[3].values,
    scale_550_selected_pca70[2].values,
    accuracy_kfold=None,
    sensitivity_kfold=None,
    specificity_kfold=None,
    AUC_kfold=None,
    verbose=True,
    is_showfig=False,
    legend1='HC',
    legend2='Patients',
    is_savefig=False,
    out_name=None)

# pca80
acc_chronic_medicated_SSD_550_18_pca80 = np.sum(
    data_chronic_medicated_SSD_550_18_pca80[1] -
    data_chronic_medicated_SSD_550_18_pca80[3] == 0) / len(
        data_chronic_medicated_SSD_550_18_pca80)
acc_firstepisode_medicated_SSD_550_18_pca80 = np.sum(
    data_firstepisode_medicated_SSD_550_18_pca80[1] -
    data_firstepisode_medicated_SSD_550_18_pca80[3] == 0) / len(