Python Preprocessing примеры использования

Язык программирования: Python

Пространство имен/Пакет: eslearn.utils.el_preprocessing

Метод/Функция: Preprocessing

Примеров на hotexamples.com: 5

Python Preprocessing - 5 примеров найдено. Это лучшие примеры Python кода для eslearn.utils.el_preprocessing.Preprocessing, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

    def main_function(self):
        """
        This function is the main function.
        """

        # Load data and mask
        data_all, label_all, self.orig_shape, self.mask_obj, self.mask_all = self._load_nii_and_gen_label(
        )

        # KFold Cross Validation
        self.label_test_all = np.array([], dtype=np.int16)
        train_index = np.array([], dtype=np.int16)
        test_index = np.array([], dtype=np.int16)
        self.decision = np.array([], dtype=np.int16)
        self.prediction = np.array([], dtype=np.int16)
        self.accuracy = np.array([], dtype=np.float16)
        self.sensitivity = np.array([], dtype=np.float16)
        self.specificity = np.array([], dtype=np.float16)
        self.AUC = np.array([], dtype=np.float16)
        self.coef = []
        kf = KFold(n_splits=self.num_of_fold_outer,
                   shuffle=True,
                   random_state=0)
        for i, (tr_ind, te_ind) in enumerate(kf.split(data_all)):
            print(f'------{i+1}/{self.num_of_fold_outer}...------\n')
            train_index = np.int16(np.append(train_index, tr_ind))
            test_index = np.int16(np.append(test_index, te_ind))
            feature_train = data_all[tr_ind, :]
            label_train = label_all[tr_ind]
            feature_test = data_all[te_ind, :]
            label_test = label_all[te_ind]
            self.label_test_all = np.int16(
                np.append(self.label_test_all, label_test))

            # Resampling training data
            feature_train, label_train = self.re_sampling(
                feature_train, label_train)

            # data_preprocess
            prep = elprep.Preprocessing(self.data_preprocess_method,
                                        self.data_preprocess_level)
            feature_train, feature_test = prep.data_preprocess(
                feature_train, feature_test)

            # dimension reduction using univariate feature selection
            # feature_train, feature_test, mask_selected = self.dimReduction_filter(
            #         feature_train, label_train, feature_test, 0.05)

            # Dimension reduction using PCA
            if self.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = self.dimReduction_PCA(
                    feature_train, feature_test, self.components)
                print(
                    f'After dimension reduction, the feature number is {feature_train.shape[1]}'
                )
            else:
                print('No dimension reduction perfromed\n')
                print(f'The feature number is {feature_train.shape[1]}')

            # Train: inner feature selection using RFECV
            print('Training...\n')
            model, weight = self.rfeCV_training(feature_train, label_train,
                                                self.step,
                                                self.num_fold_of_inner_rfeCV,
                                                self.n_jobs)

            if self.is_dim_reduction:
                self.coef.append(model_dim_reduction.inverse_transform(weight))
            else:
                self.coef.append(weight)

            # Testting
            print('Testting...\n')
            pred, dec = self.testing(model, feature_test)
            self.prediction = np.append(self.prediction, np.array(pred))
            self.decision = np.append(self.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(
                label_test,
                pred,
                dec,
                accuracy_kfold=None,
                sensitivity_kfold=None,
                specificity_kfold=None,
                AUC_kfold=None,
                verbose=1,
                is_showfig=self.is_showfig_in_each_fold)

            self.accuracy = np.append(self.accuracy, acc)
            self.sensitivity = np.append(self.sensitivity, sens)
            self.specificity = np.append(self.specificity, spec)
            self.AUC = np.append(self.AUC, auc)

        # Save results and fig to local path
        self.save_results()
        self._weight2nii(dimension_nii_data=(61, 73, 61))
        self.save_fig()

        print("--" * 10 + "Done!" + "--" * 10)
        return self

Пример #2

Показать файл

    def main_svc_rfe_cv(sel):
        print('Training model and testing...\n')
        # Load data
        feature_550, label_550 = sel._load_data(sel.dataset_our_center_550)
        feature_206, label_206 = sel._load_data(sel.dataset_206)
        feature_COBRE, label_COBRE = sel._load_data(sel.data_COBRE)
        feature_UCAL, label_UCAL = sel._load_data(sel.data_UCAL)
        feature_all = [feature_550, feature_206, feature_COBRE, feature_UCAL]
        label_all = [label_550, label_206, label_COBRE, label_UCAL]

        # Leave one site CV
        n_site = len(label_all)
        name = ['550', '206', 'COBRE', 'UCLA']
        sel.label_test_all = np.array([], dtype=np.int16)
        sel.decision = np.array([], dtype=np.int16)
        sel.prediction = np.array([], dtype=np.int16)
        sel.accuracy = np.array([], dtype=np.float16)
        sel.sensitivity = np.array([], dtype=np.float16)
        sel.specificity = np.array([], dtype=np.float16)
        sel.AUC = np.array([], dtype=np.float16)
        sel.coef = []
        for i in range(n_site):
            print('-' * 40)
            print(f'{i+1}/{n_site}: test dataset is {name[i]}...')
            feature_train, label_train = feature_all.copy(), label_all.copy()
            feature_test, label_test = feature_train.pop(i), label_train.pop(i)
            sel.label_test_all = np.int16(
                np.append(sel.label_test_all, label_test))
            feature_train = np.concatenate(feature_train, axis=0)
            label_train = np.concatenate(label_train, axis=0)

            # Resampling training data
            # feature_train, label_train = sel.re_sampling(feature_train, label_train)
            # Normalization
            prep = elprep.Preprocessing(
                data_preprocess_method='StandardScaler',
                data_preprocess_level='subject')
            feature_train, feature_test = prep.data_preprocess(
                feature_train, feature_test)

            # Dimension reduction
            if sel.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = sel.dimReduction(
                    feature_train, feature_test, sel.components)
                print(
                    f'After dimension reduction, the feature number is {feature_train.shape[1]}'
                )
            else:
                print('No dimension reduction perfromed\n')

            # Train and test
            print('training and testing...\n')
            model = sel.training(feature_train, label_train, sel.cv)
            if sel.is_dim_reduction:
                sel.coef.append(
                    model_dim_reduction.inverse_transform(
                        model.coef_))  # save coef
            else:
                sel.coef.append(model.coef_)  # save coef

            pred, dec = sel.testing(model, feature_test)
            sel.prediction = np.append(sel.prediction, np.array(pred))
            sel.decision = np.append(sel.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(label_test,
                                                    pred,
                                                    dec,
                                                    accuracy_kfold=None,
                                                    sensitivity_kfold=None,
                                                    specificity_kfold=None,
                                                    AUC_kfold=None,
                                                    verbose=1,
                                                    is_showfig=0)
            sel.accuracy = np.append(sel.accuracy, acc)
            sel.sensitivity = np.append(sel.sensitivity, sens)
            sel.specificity = np.append(sel.specificity, spec)
            sel.AUC = np.append(sel.AUC, auc)
            print(f'performances = {acc, sens, spec,auc}')
        return sel

Пример #3

Показать файл

    def main_function(sel):
        """
        The training data, validation data and  test data are randomly splited
        """
        print('Training model and testing...\n')

        # load data
        dataset_our_center_550 = np.load(sel.dataset_our_center_550)
        dataset_206 = np.load(sel.dataset_206)
        dataset_COBRE = np.load(sel.dataset_COBRE)
        dataset_UCAL = np.load(sel.dataset_UCAL)

        # Extracting features and label
        features_our_center_550 = dataset_our_center_550[:, 2:]
        features_206 = dataset_206[:, 2:]
        features_COBRE = dataset_COBRE[:, 2:]
        features_UCAL = dataset_UCAL[:, 2:]

        label_our_center_550 = dataset_our_center_550[:, 1]
        label_206 = dataset_206[:, 1]
        label_COBRE = dataset_COBRE[:, 1]
        label_UCAL = dataset_UCAL[:, 1]

        # Generate training data and test data
        data_all = np.concatenate(
            [features_our_center_550, features_206, features_UCAL, features_COBRE], axis=0)
        label_all = np.concatenate(
            [label_our_center_550, label_206, label_UCAL, label_COBRE], axis=0)

        # Unique ID
        uid_our_center_550 = np.int32(dataset_our_center_550[:, 0])
        uid_206 = np.int32(dataset_206[:, 0])
        uid_all = np.concatenate([uid_our_center_550, uid_206, 
                                  np.zeros(len(label_UCAL, )) -1, 
                                  np.zeros(len(label_COBRE, )) -1], axis=0)
        uid_all = np.int32(uid_all)

        # KFold Cross Validation
        sel.label_test_all = np.array([], dtype=np.int16)
        train_index = np.array([], dtype=np.int16)
        test_index = np.array([], dtype=np.int16)
        sel.decision = np.array([], dtype=np.int16)
        sel.prediction = np.array([], dtype=np.int16)
        sel.accuracy = np.array([], dtype=np.float16)
        sel.sensitivity = np.array([], dtype=np.float16)
        sel.specificity = np.array([], dtype=np.float16)
        sel.AUC = np.array([], dtype=np.float16)
        sel.coef = []        
        kf = KFold(n_splits=sel.cv, shuffle=True, random_state=0)
        for i, (tr_ind, te_ind) in enumerate(kf.split(data_all)):
            print(f'------{i+1}/{sel.cv}...------\n')
            train_index = np.int16(np.append(train_index, tr_ind))
            test_index = np.int16(np.append(test_index, te_ind))
            feature_train = data_all[tr_ind, :]
            label_train = label_all[tr_ind]
            feature_test = data_all[te_ind, :]
            label_test = label_all[te_ind]
            sel.label_test_all = np.int16(np.append(sel.label_test_all, label_test))

            # resampling training data
            # feature_train, label_train = sel.re_sampling(feature_train, label_train)

            # normalization
            prep = elprep.Preprocessing(data_preprocess_method='StandardScaler', data_preprocess_level='subject')
            feature_train, feature_test = prep.data_preprocess(feature_train, feature_test)

            # dimension reduction
            if sel.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = sel.dimReduction(
                    feature_train, feature_test, sel.components)
                print(f'After dimension reduction, the feature number is {feature_train.shape[1]}')
            else:
                print('No dimension reduction perfromed\n')
                
            # train
            print('training and testing...\n')
            # model, weight = rfeCV(feature_train, label_train, step=0.2, cv=3, n_jobs=-1, permutation=0)
            model = sel.training(feature_train, label_train)
            coef = model.coef_
            # coef = weight
            
            # Weight
            if sel.is_dim_reduction:
                sel.coef.append(model_dim_reduction.inverse_transform(coef))  # save coef
            else:
                sel.coef.append(coef)  # save coef
                
            # test
            pred, dec = sel.testing(model, feature_test)
            sel.prediction = np.append(sel.prediction, np.array(pred))
            sel.decision = np.append(sel.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(label_test, pred, dec, 
                accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None,
                 verbose=1, is_showfig=0)
            sel.accuracy = np.append(sel.accuracy, acc)
            sel.sensitivity = np.append(sel.sensitivity, sens)
            sel.specificity = np.append(sel.specificity, spec)
            sel.AUC = np.append(sel.AUC, auc)

        uid_all_sorted = np.int32(uid_all[test_index])
        sel.special_result = np.concatenate(
            [uid_all_sorted, sel.label_test_all, sel.decision, sel.prediction], axis=0).reshape(4, -1).T
        print('Done!')
        return sel

Пример #4

Показать файл

    def main_function(self):
        """
        """
        print('Training model and testing...\n')

        # load data and mask
        data_all, label_all, self.orig_shape, self.mask_obj, self.mask_all = self._load_nii_and_gen_label(
        )

        # KFold Cross Validation
        self.label_test_all = np.array([], dtype=np.int16)
        train_index = np.array([], dtype=np.int16)
        test_index = np.array([], dtype=np.int16)
        self.decision = np.array([], dtype=np.int16)
        self.prediction = np.array([], dtype=np.int16)
        self.accuracy = np.array([], dtype=np.float16)
        self.sensitivity = np.array([], dtype=np.float16)
        self.specificity = np.array([], dtype=np.float16)
        self.AUC = np.array([], dtype=np.float16)
        self.coef = []
        kf = KFold(n_splits=self.num_of_kfold, shuffle=True, random_state=0)
        for i, (tr_ind, te_ind) in enumerate(kf.split(data_all)):
            print(f'------{i+1}/{self.num_of_kfold}...------\n')
            train_index = np.int16(np.append(train_index, tr_ind))
            test_index = np.int16(np.append(test_index, te_ind))
            feature_train = data_all[tr_ind, :]
            label_train = label_all[tr_ind]
            feature_test = data_all[te_ind, :]
            label_test = label_all[te_ind]
            self.label_test_all = np.int16(
                np.append(self.label_test_all, label_test))

            # Resampling training data
            feature_train, label_train = self.re_sampling(
                feature_train, label_train)

            # data_preprocess
            feature_train, feature_test = elprep.Preprocessing(
            ).data_preprocess(feature_train, feature_test,
                              self.data_preprocess_method,
                              self.data_preprocess_level)

            # Dimension reduction using PCA
            if self.is_dim_reduction:
                feature_train, feature_test, model_dim_reduction = self.dimReduction_PCA(
                    feature_train, feature_test, self.components)
                print(
                    f'After dimension reduction, the feature number is {feature_train.shape[1]}'
                )
            else:
                print('No dimension reduction perfromed\n')
                print(f'The feature number is {feature_train.shape[1]}')

            # Feature selection
            if self.is_feature_selection:
                feature_train, feature_test, mask, n_features_origin = self.feature_selection_relief(
                    feature_train, label_train, feature_test,
                    self.n_features_to_select)
            # Train and test
            print('training and testing...\n')
            model = self.training(feature_train, label_train)

            # Get weight
            if self.is_feature_selection:
                coef = np.zeros([
                    n_features_origin,
                ])
                coef[mask] = model.coef_
            else:
                coef = model.coef_

            if self.is_dim_reduction:
                self.coef.append(model_dim_reduction.inverse_transform(coef))
            else:
                self.coef.append(coef)

            pred, dec = self.testing(model, feature_test)
            self.prediction = np.append(self.prediction, np.array(pred))
            self.decision = np.append(self.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(
                label_test,
                pred,
                dec,
                accuracy_kfold=None,
                sensitivity_kfold=None,
                specificity_kfold=None,
                AUC_kfold=None,
                verbose=1,
                is_showfig=self.is_showfig_in_each_fold)

            self.accuracy = np.append(self.accuracy, acc)
            self.sensitivity = np.append(self.sensitivity, sens)
            self.specificity = np.append(self.specificity, spec)
            self.AUC = np.append(self.AUC, auc)

        # Save results and fig to local path
        self.save_results()
        self._weight2nii(dimension_nii_data=(61, 73, 61))
        self.save_fig()

        print("--" * 10 + "Done!" + "--" * 10)
        return self

Пример #5

Показать файл

    def main_function(sel):
        """
        The training data, validation data and  test data are randomly splited
        """
        print('training model and testing...\n')

        # load data
        data  = np.load(sel.data )


        # Extracting features and label
        features_our_center_550 = data [:,2:]
        label_our_center_550 = data [:,1]
        
        # Generate training data and test data	
        data_all = features_our_center_550
        label_all = label_our_center_550

        # Unique ID

        # KFold Cross Validation
        sel.label_test_all = np.array([], dtype=np.int16)
        train_index = np.array([], dtype=np.int16)
        test_index = np.array([], dtype=np.int16)
        sel.decision = np.array([], dtype=np.int16)
        sel.prediction = np.array([], dtype=np.int16)
        sel.accuracy  = np.array([], dtype=np.float16)
        sel.sensitivity  = np.array([], dtype=np.float16)
        sel.specificity  = np.array([], dtype=np.float16)
        sel.AUC = np.array([], dtype=np.float16)
        sel.coef = []     
        kf = KFold(n_splits=sel.cv, shuffle=True, random_state=0)
        for i, (tr_ind , te_ind) in enumerate(kf.split(data_all)):
            print(f'------{i+1}/{sel.cv}...------\n')
            train_index = np.int16(np.append(train_index, tr_ind))
            test_index = np.int16(np.append(test_index, te_ind))
            feature_train = data_all[tr_ind,:]
            label_train = label_all[tr_ind]
            feature_test = data_all[te_ind,:]
            label_test = label_all[te_ind]
            sel.label_test_all = np.int16(np.append(sel.label_test_all, label_test))

            # resampling training data
            # feature_train, label_train = sel.re_sampling(feature_train, label_train)

            # normalization
            prep = elprep.Preprocessing(data_preprocess_method='StandardScaler', data_preprocess_level='subject')
            feature_train, feature_test = prep.data_preprocess(feature_train, feature_test)

            # dimension reduction
            if sel.is_dim_reduction:
                feature_train,feature_test, model_dim_reduction= sel.dimReduction(feature_train, feature_test, sel.components)
                print(f'After dimension reduction, the feature number is {feature_train.shape[1]}')
            else:
                print('No dimension reduction perfromed\n')
            
            # train and test
            print('training and testing...\n')
            model = sel.training(feature_train,label_train) 
            weight = model.coef_

            if sel.is_dim_reduction:
                sel.coef.append(model_dim_reduction.inverse_transform(weight))  # save coef
            else:
                sel.coef.append(weight)  # save coef
                
            pred, dec = sel.testing(model,feature_test)
            sel.prediction = np.append(sel.prediction, np.array(pred))
            sel.decision = np.append(sel.decision, np.array(dec))

            # Evaluating classification performances
            acc, sens, spec, auc = eval_performance(label_test, pred, dec, 
                accuracy_kfold=None, sensitivity_kfold=None, specificity_kfold=None, AUC_kfold=None,
                 verbose=1, is_showfig=0)
        
            sel.accuracy  = np.append(sel.accuracy,acc)
            sel.sensitivity  = np.append(sel.sensitivity,sens)
            sel.specificity  = np.append(sel.specificity,spec)
            sel.AUC = np.append(sel.AUC,auc)
        sel.special_result = np.concatenate([sel.label_test_all, sel.decision, sel.prediction], axis=0).reshape(3, -1).T
        print('Done!')
        return  sel