Python regression示例，libpysat.regression.regression.regression Python示例

示例#1

0

显示文件

文件： test_regression.py 项目： Kelvinrr/PySAT

def test_OMP_CV_false():
    regress = regression(method=['OMP'],
                         yrange=[0.0, 100.0],
                         params=[{
                             'fit_intercept': True,
                             'CV': False
                         }])

示例#2

0

显示文件

文件： RegressionTrain.py 项目： daneishdespot/PySAT_Point_Spectra_GUI

    def setup(self):
        method = self.chooseAlgorithmComboBox.currentText()
        xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
        yvars = [('comp', str(y.text()))
                 for y in self.yVariableList.selectedItems()]
        yrange = [
            self.yMinDoubleSpinBox.value(),
            self.yMaxDoubleSpinBox.value()
        ]
        try:
            params, modelkey = self.alg[
                self.chooseAlgorithmComboBox.currentText()].run()
            modelkey = "{} - {} - ({}, {}) {}".format(method, yvars[0][-1],
                                                      yrange[0], yrange[1],
                                                      modelkey)
            self.list_amend(self.modelkeys, self.curr_count, modelkey)
            #print(params, modelkey)
            self.models[modelkey] = regression.regression([method], [yrange],
                                                          [params])
            self.model_xvars[modelkey] = xvars
            self.model_yvars[modelkey] = yvars

            if 'Model Coefficients' not in self.datakeys:
                self.datakeys.append('Model Coefficients')

            else:
                pass

        except:
            pass

示例#3

0

显示文件

文件： test_regression.py 项目： Kelvinrr/PySAT

def test_PLS():
    regress = regression(method=['PLS'],
                         yrange=[0.0, 100.0],
                         params=[{
                             'n_components': 0,
                             'scale': False
                         }])

示例#4

0

显示文件

文件： test_regression.py 项目： cneubauerUSGS/PyHAT

def test_OMP():
    regress = regression(method=['OMP'],
                         yrange=[0.0, 100.0],
                         params=[{
                             'fit_intercept': True,
                             'n_nonzero_coefs': 615
                         }])

示例#5

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_KRR():
    regress = regression(method=['KRR'], yrange=[0.0, 100.0],
                         params=[{'alpha': 0,
                                  'kernel': 'linear',
                                  'gamma': 'None',
                                  'degree': 3.0,
                                  'coef0': 1.0,
                                  'kernel_params': 'None'}])

示例#6

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_GP():
    regress = regression(method=['GP'], yrange=[0.0, 100.0],
                         params=[{'reduce_dim': 'PCA',
                                  'n_components': 0,
                                  'random_start': 1,
                                  'theta0': 1.0,
                                  'thetaL': 0.1,
                                  'thetaU': 100.0}])

示例#7

0

显示文件

def test_LASSO_CV_none():
    regress = regression(method=['LASSO'], yrange=[0.0, 100.0],
                         params=[{'alpha': 1.0,
                                  'fit_intercept': True,
                                  'max_iter': 1000,
                                  'tol': 0.0001,
                                  'positive': False,
                                  'selection': 'random'}])

示例#8

0

显示文件

def test_KRR():
    regress = regression(method=['KRR'], yrange=[0.0, 100.0],
                         params=[{'alpha': 0,
                                  'kernel': 'linear',
                                  'gamma': 'None',
                                  'degree': 3.0,
                                  'coef0': 1.0,
                                  'kernel_params': 'None'}])

示例#9

0

显示文件

文件： test_regression.py 项目： Kelvinrr/PySAT

def test_OMP_CV_true():
    regress = regression(method=['OMP'],
                         yrange=[0.0, 100.0],
                         params=[{
                             'fit_intercept': True,
                             'CV': True,
                             'precompute': True
                         }])

示例#10

0

显示文件

文件： test_regression.py 项目： Kelvinrr/PySAT

def test_Ridge_CV_true():
    regress = regression(method=['Ridge'],
                         yrange=[0.0, 100.0],
                         params=[{
                             'fit_intercept': True,
                             'normalize': False,
                             'CV': True
                         }])

示例#11

0

显示文件

文件： test_regression.py 项目： USGS-Astrogeology/PySAT

def test_Lasso():
    regress = regression(method=['Lasso'], yrange=[0.0, 100.0],
                         params=[{'alpha': 1.0,
                                  'fit_intercept': True,
                                  'max_iter': 1000,
                                  'tol': 0.0001,
                                  'positive': False,
                                  'selection': 'random'}])

示例#12

0

显示文件

def test_GP():
    regress = regression(method=['GP'], yrange=[0.0, 100.0],
                         params=[{'reduce_dim': 'PCA',
                                  'n_components': 0,
                                  'random_start': 1,
                                  'theta0': 1.0,
                                  'thetaL': 0.1,
                                  'thetaU': 100.0}])

示例#13

0

显示文件

def test_LARS2_CV_true():
    regress = regression(method=['LARS'], yrange=[0.0, 100.0],
                         params=[{'fit_intercept': True,
                                  'positive': False,
                                  'verbose': False,
                                  'normalize': False,
                                  'precompute': True,
                                  'copy_X': True,
                                  'eps': 2.220445,
                                  'CV': True}])

示例#14

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_Ridge():
    regress = regression(method=['Ridge'], yrange=[0.0, 100.0],
                         params=[{'alpha': 1.0,
                                  'copy_X': True,
                                  'fit_intercept': True,
                                  'max_iter': 'None',
                                  'normalize': False,
                                  'solver': 'auto',
                                  'tol': 0.0,
                                  'random_state': ''}])

示例#15

0

显示文件

def test_Ridge_CV_none():
    regress = regression(method=['Ridge'], yrange=[0.0, 100.0],
                         params=[{'alpha': 1.0,
                                  'copy_X': True,
                                  'fit_intercept': True,
                                  'max_iter': 'None',
                                  'normalize': False,
                                  'solver': 'auto',
                                  'tol': 0.0,
                                  'random_state': ''}])

示例#16

0

显示文件

def test_Lasso_LARS_model_none():
    regress = regression(method=['Lasso LARS'], yrange=[0.0, 100.0],
                         params=[{'fit_intercept': True,
                                  'positive': False,
                                  'verbose': False,
                                  'normalize': True,
                                  'copy_X': True,
                                  'precompute': 'Auto',
                                  'max_iter': 500,
                                  'model': None,
                                  'eps': 2.220446}])

示例#17

0

显示文件

def test_LARS_CV_none():
    regress = regression(method=['LARS'], yrange=[0.0, 100.0],
                         params=[{'n_nonzero_coefs': 500,
                                  'fit_intercept': True,
                                  'positive': False,
                                  'verbose': False,
                                  'normalize': False,
                                  'precompute': True,
                                  'copy_X': True,
                                  'eps': 2.220445,
                                  'fit_path': True}])

示例#18

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_LARS():
    regress = regression(method=['LARS'], yrange=[0.0, 100.0],
                         params=[{'n_nonzero_coefs': 500,
                                  'fit_intercept': True,
                                  'positive': False,
                                  'verbose': False,
                                  'normalize': False,
                                  'precompute': True,
                                  'copy_X': True,
                                  'eps': 2.220445,
                                  'fit_path': True}])

示例#19

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_SVR():
    regress = regression(method=['SVR'], yrange=[0.0, 100.0], params=[{'C': 1.0,
                                                                       'epsilon': 0.1,
                                                                       'kernel': 'rbf',
                                                                       'degree': 0,
                                                                       'gamma': 'auto',
                                                                       'coef0': 0.0,
                                                                       'shrinking': False,
                                                                       'tol': 0.001,
                                                                       'cache_size': 200,
                                                                       'verbose': False,
                                                                       'max_iter': -1}])

示例#20

0

显示文件

def test_SVR():
    regress = regression(method=['SVR'], yrange=[0.0, 100.0], params=[{'C': 1.0,
                                                                       'epsilon': 0.1,
                                                                       'kernel': 'rbf',
                                                                       'degree': 0,
                                                                       'gamma': 'auto',
                                                                       'coef0': 0.0,
                                                                       'shrinking': False,
                                                                       'tol': 0.001,
                                                                       'cache_size': 200,
                                                                       'verbose': False,
                                                                       'max_iter': -1}])

示例#21

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_Lasso_LARS():
    regress = regression(method=['Lasso LARS'], yrange=[0.0, 100.0],
                         params=[{'alpha': 0.0,
                                  'fit_intercept': True,
                                  'positive': False,
                                  'verbose': False,
                                  'normalize': True,
                                  'copy_X': True,
                                  'precompute': 'Auto',
                                  'max_iter': 500,
                                  'model': 0,
                                  'eps': 2.220446,
                                  'fit_path': True}])

示例#22

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_Bayesian_Ridge():
    regress = regression(method=['Bayesian Ridge'], yrange=[0.0, 100.0],
                         params=[{'n_iter': 300,
                                  'tol': 0.001,
                                  'alpha_1': 0.001,
                                  'alpha_2': 1e-06,
                                  'lambda_1': 1e-06,
                                  'lambda_2': 1e-06,
                                  'compute_score': False,
                                  'fit_intercept': True,
                                  'normalize': False,
                                  'copy_X': True,
                                  'verbose': False}])

示例#23

0

显示文件

def test_Bayesian_Ridge():
    regress = regression(method=['Bayesian Ridge'], yrange=[0.0, 100.0],
                         params=[{'n_iter': 300,
                                  'tol': 0.001,
                                  'alpha_1': 0.001,
                                  'alpha_2': 1e-06,
                                  'lambda_1': 1e-06,
                                  'lambda_2': 1e-06,
                                  'compute_score': False,
                                  'fit_intercept': True,
                                  'normalize': False,
                                  'copy_X': True,
                                  'verbose': False}])

示例#24

0

显示文件

def test_Elastic_Net_CV_true():
    regress = regression(method=['Elastic Net'], yrange=[0.0, 100.0],
                         params=[{'l1_ratio': 0.5,
                                  'fit_intercept': True,
                                  'normalize': False,
                                  'precompute': 'False',
                                  'max_iter': 1000,
                                  'copy_X': True,
                                  'tol': 0.0001,
                                  'positive': False,
                                  'selection': 'cyclic',
                                  'random_state': 'None',
                                  'CV': True}])

示例#25

0

显示文件

文件： RegressionTrain.py 项目： daneishdespot/PySAT_Point_Spectra_GUI

    def run(self):
        method = self.chooseAlgorithmComboBox.currentText()
        datakey = self.chooseDataComboBox.currentText()
        xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
        yvars = [('comp', str(y.text()))
                 for y in self.yVariableList.selectedItems()]
        yrange = [
            self.yMinDoubleSpinBox.value(),
            self.yMaxDoubleSpinBox.value()
        ]

        params, modelkey = self.alg[
            self.chooseAlgorithmComboBox.currentText()].run()
        modelkey = "{} - {} - ({}, {}) {}".format(method, yvars[0][-1],
                                                  yrange[0], yrange[1],
                                                  modelkey)
        self.list_amend(self.modelkeys, self.curr_count, modelkey)
        #print(params, modelkey)
        self.models[modelkey] = regression.regression([method], [yrange],
                                                      [params])
        x = self.data[datakey].df[xvars]
        y = self.data[datakey].df[yvars]
        x = np.array(x)
        y = np.array(y)
        ymask = np.squeeze((y > yrange[0]) & (y < yrange[1]))
        y = y[ymask]
        x = x[ymask, :]
        self.models[modelkey].fit(x, y)
        self.model_xvars[modelkey] = xvars
        self.model_yvars[modelkey] = yvars
        try:
            coef = np.squeeze(self.models[modelkey].model.coef_)
            coef = pd.DataFrame(coef)
            coef.index = pd.MultiIndex.from_tuples(
                self.data[datakey].df[xvars].columns.values)
            coef = coef.T
            coef[('meta', 'Model')] = modelkey
            try:
                coef[('meta',
                      'Intercept')] = self.models[modelkey].model.intercept_
            except:
                pass
            try:
                self.data['Model Coefficients'] = spectral_data(
                    pd.concat([self.data['Model Coefficients'].df, coef]))
            except:
                self.data['Model Coefficients'] = spectral_data(coef)
                self.datakeys.append('Model Coefficients')
        except:
            pass

示例#26

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_ARD():
    regress = regression(method=['ARD'], yrange=[0.0, 100.0],
                         params=[{'n_iter': 300,
                                  'tol': 0.001,
                                  'alpha_1': 0.001,
                                  'alpha_2': 1e-06,
                                  'lambda_1': 1e-06,
                                  'lambda_2': 1e-06,
                                  'compute_score': False,
                                  'threshold_lambda': 100000,
                                  'fit_intercept': True,
                                  'normalize': False,
                                  'copy_X': True,
                                  'verbose': False}])

示例#27

0

显示文件

def test_ARD():
    regress = regression(method=['ARD'], yrange=[0.0, 100.0],
                         params=[{'n_iter': 300,
                                  'tol': 0.001,
                                  'alpha_1': 0.001,
                                  'alpha_2': 1e-06,
                                  'lambda_1': 1e-06,
                                  'lambda_2': 1e-06,
                                  'compute_score': False,
                                  'threshold_lambda': 100000,
                                  'fit_intercept': True,
                                  'normalize': False,
                                  'copy_X': True,
                                  'verbose': False}])

示例#28

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_Elastic_Net():
    regress = regression(method=['Elastic Net'], yrange=[0.0, 100.0],
                         params=[{'alpha': 1.0,
                                  'l1_ratio': 0.5,
                                  'fit_intercept': True,
                                  'normalize': False,
                                  'precompute': 'False',
                                  'max_iter': 1000,
                                  'copy_X': True,
                                  'tol': 0.0001,
                                  'warm_start': False,
                                  'positive': False,
                                  'selection': 'cyclic',
                                  'random_state': 'None'}])

示例#29

0

显示文件

def test_OMP_CV_true():
    regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{'fit_intercept': True,
<<<<<<< HEAD:libpysat/tests/test_regression.py
                                                                       'n_nonzero_coefs': 615}])

示例#30

0

显示文件

文件： test_regression.py 项目： USGS-Astrogeology/PySAT

def test_OMP():
    regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{'fit_intercept': True,
                                                                       'n_nonzero_coefs': 615}])

示例#31

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_OMP():
    regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{'fit_intercept': True,
                                                                       'CV': True}])

示例#32

0

显示文件

文件： test_regression.py 项目： tisaconundrum2/PySAT

def test_PLS():
    regress = regression(method=['PLS'], yrange=[0.0, 100.0],
                         params=[{'n_components': 0,'scale': False}])

示例#33

0

显示文件

文件： test_regression.py 项目： cneubauerUSGS/PyHAT

def test_OLS():
    regress = regression(method=['OLS'],
                         yrange=[0.0, 100.0],
                         params=[{
                             'fit_intercept': True
                         }])

示例#34

0

显示文件

文件： cv.py 项目： tisaconundrum2/PySAT

    def do_cv(self, Train, xcols='wvl', ycol=('comp', 'SiO2'), method='PLS',
              yrange=[0, 100]):


        try:
            cv_iterator = LeaveOneLabelOut(
            Train[('meta', 'Folds')])  # create an iterator for cross validation based on the predefined folds
        except:
            print('***No folds found! Did you remember to define folds before running cross validation?***')

        rmsecv_folds = []
        rmsec = []
        rmsecv = []
        models = []
        modelkeys = []

        # loop through the grid of parameters, do cross validation for each permutation
        # try:
        #     self.progress.setMaximum(len(self.paramgrid))
        #     self.progress.setValue(0)
        #     self.progress.show()
        # except:
        #     pass

        for i in list(range(len(self.paramgrid))):
            print(self.paramgrid[i])
#            self.progress.setValue(i)
            model = regression([method], [yrange], [self.paramgrid[i]])
            modelkey = "{} - {} - ({}, {}) {}".format(method, ycol[0][-1], yrange[0], yrange[1], self.paramgrid[i])

            rmsecv_folds_tmp = []  # Create empty list to hold RMSECV for each fold
            for train, holdout in cv_iterator:  # Iterate through each of the folds in the training set

                cvcol = ('predict', '"'+method + '-CV-' + str(self.paramgrid[
                                                           i])+'"')  # ycol[-1]+'_cv_'+method+'_param'+str(i))  #create the name of the column in which results will be stored

                cv_train = Train.iloc[train]  # extract the data to be used to create the model
                cv_holdout = Train.iloc[holdout]  # extract the data that will be held out of the model
                model.fit(cv_train[xcols], cv_train[ycol])
                if model.goodfit:
                    y_pred_holdout = model.predict(cv_holdout[xcols])
                else:
                    y_pred_holdout = cv_holdout[ycol] * np.nan
                Train.set_value(Train.index[holdout], cvcol, y_pred_holdout)
                rmsecv_folds_tmp.append(RMSE(y_pred_holdout, cv_holdout[ycol]))

            rmsecv_folds.append(rmsecv_folds_tmp)
            rmsecv.append(RMSE(Train[ycol], Train[cvcol]))

            model.fit(Train[xcols], Train[ycol])
            if model.goodfit:
                models.append(model)
                modelkeys.append(modelkey)
                ypred_train = model.predict(Train[xcols])

            else:
                ypred_train = Train[ycol] * np.nan
            calcol = ('predict', '"'+method + '-Cal-' + str(self.paramgrid[i])+'"')
            Train[calcol] = ypred_train
            rmsec.append(RMSE(ypred_train, Train[ycol]))

        output = pd.DataFrame(self.paramgrid)
        output['RMSEC'] = rmsec
        output['RMSECV'] = rmsecv
        rmsecv_folds = np.array(rmsecv_folds)
        for i in list(range(len(rmsecv_folds[0, :]))):
            label = 'Fold' + str(i)
            output[label] = rmsecv_folds[:, i]
        cols = output.columns.values
        cols = [('cv', i) for i in cols]
        output.columns = pd.MultiIndex.from_tuples(cols)
        return Train, output, models, modelkeys

示例#35

0

显示文件

文件： cv.py 项目： nazerat/PySAT

    def do_cv(self,
              Train,
              cv_iterator,
              xcols='wvl',
              ycol=('comp', 'SiO2'),
              method='PLS',
              yrange=[0, 100],
              calc_path=False,
              alphas=None,
              n_folds=3):

        models = []
        modelkeys = []
        predictkeys = []
        cv_iterators = itertools.tee(
            cv_iterator, len(self.paramgrid)
        )  #need to duplicate the cv_iterator so it can be used for each permutation in paramgrid

        for i in list(range(len(self.paramgrid))):
            print(self.paramgrid[i])
            # create an empty output data frame to serve as template
            output_tmp = pd.DataFrame()
            # add columns for RMSEC, RMSECV, and RMSE for the folds
            output_tmp['RMSEC'] = 0
            output_tmp['RMSECV'] = 0
            #for f in np.array(range(n_folds)) + 1:
            for f in np.array(range(n_folds)) + 1:
                output_tmp['Fold ' + str(f)] = 0
            #fill in the output template based on the current permutation parameters
            for k in self.paramgrid[i].keys():
                output_tmp.at[0, k] = self.paramgrid[i][k]
            if alphas is not None:
                output_tmp = pd.concat([output_tmp] * len(alphas))
                output_tmp['alphas'] = alphas

            rmsecv_folds_tmp = np.empty(
                shape=(0))  # Create empty array to hold RMSECV for each fold
            alphas_out = np.empty(shape=(0))
            cvcols_all = np.empty(shape=(0))

            foldcount = 1

            for train, holdout in cv_iterators[
                    i]:  # Iterate through each of the folds in the training set

                cv_train = Train.iloc[
                    train]  # extract the data to be used to create the model
                cv_holdout = Train.iloc[
                    holdout]  # extract the data that will be held out of the model

                if calc_path:
                    # get X and y data
                    X = cv_train[xcols]
                    y = cv_train[ycol]

                    #do the path calculation
                    path_alphas,\
                    path_coefs,\
                    intercepts,\
                    path_n_iters,\
                    y_pred_holdouts,\
                    fold_rmses,\
                    cvcols = path_calc(X, y, cv_holdout[xcols], cv_holdout[ycol], alphas, self.paramgrid[i], yname = ycol[0][-1], method = method)

                    output_tmp['Fold ' + str(foldcount)] = fold_rmses
                    for n in list(range(len(path_alphas))):
                        Train.set_value(Train.index[holdout], cvcols[n],
                                        y_pred_holdouts[n])

                else:
                    cvcols = [('predict', '"' + method + '- CV -' +
                               str(self.paramgrid[i]) + '"')]

                    #fit the model and predict the held-out data
                    model = regression([method], [yrange], [self.paramgrid[i]])
                    model.fit(cv_train[xcols], cv_train[ycol])
                    if model.goodfit:
                        y_pred_holdout = model.predict(cv_holdout[xcols])
                    else:
                        y_pred_holdout = cv_holdout[ycol] * np.nan
                    #add the predictions to the appropriate column in the training data
                    Train.set_value(Train.index[holdout], cvcols[0],
                                    y_pred_holdout)
                    #append the RMSECV to the list
                    output_tmp['Fold ' + str(foldcount)] = RMSE(
                        y_pred_holdout, cv_holdout[ycol])
                    pass

                foldcount = foldcount + 1

            #now that all the folds have been held out and predicted, calculate the overall rmsecv and add it to the output
            rmsecv = []
            for col in cvcols:
                rmsecv.append(RMSE(Train[col], Train[ycol]))
                predictkeys.append(col[-1])
            output_tmp['RMSECV'] = rmsecv

            #fit the model on the full training set using the current settings
            if calc_path:
                X = Train[xcols]
                y = Train[ycol]

                path_alphas, \
                path_coefs, \
                intercepts, \
                path_n_iters, \
                ypred_train, \
                rmsec_train, \
                cols = path_calc(X, y, X, y, alphas, self.paramgrid[i], colname = 'Cal', yname = ycol[0][-1], method = method)

                for n in list(range(len(path_alphas))):
                    Train[cols[n]] = ypred_train[
                        n]  #put the training set predictions in the data frame
                    predictkeys.append(cols[n][-1])
                    #create the model and manually set its parameters based on the path results rather than training it
                    model = regression([method], [yrange], [self.paramgrid[i]])
                    model.model.set_params(alpha=path_alphas[n])
                    setattr(model.model, 'intercept_', intercepts[n])
                    setattr(model.model, 'coef_', np.squeeze(path_coefs)[:, n])
                    setattr(model.model, 'n_iter_', path_n_iters[n])

                    #add the model and its name to the list
                    models.append(model)
                    modelkey = "{} - {} - ({}, {}) Alpha: {}, {}".format(
                        method, ycol[0][-1], yrange[0], yrange[1],
                        path_alphas[n], self.paramgrid[i])
                    modelkeys.append(modelkey)

                output_tmp['RMSEC'] = rmsec_train
            else:
                model = regression([method], [yrange], [self.paramgrid[i]])
                modelkey = "{} - {} - ({}, {}) {}".format(
                    method, ycol[0][-1], yrange[0], yrange[1],
                    self.paramgrid[i])
                models.append(model)
                modelkeys.append(modelkey)
                ypred_train = Train[ycol] * np.nan
                model.fit(Train[xcols], Train[ycol])
                #if the fit is good, then predict the training set
                if model.goodfit:
                    ypred_train = model.predict(Train[xcols])
                else:
                    models = models[:-1]
                    modelkeys = modelkeys[:-1]

                #add the calibration predictions to the appropriate column
                calcol = ('predict', '"' + method + '- Cal -' +
                          str(self.paramgrid[i]) + '"')
                predictkeys.append(calcol[-1])
                Train[calcol] = ypred_train
                #append the RMSEC for the current settings to the cllection of all RMSECs
                output_tmp['RMSEC'] = RMSE(ypred_train, Train[ycol])

            try:
                output = pd.concat((output, output_tmp))
            except:
                output = output_tmp
            pass

        #make the columns of the output data drame multi-indexed
        cols = output.columns.values
        cols = [('cv', i) for i in cols]
        output.columns = pd.MultiIndex.from_tuples(cols)

        return Train, output, models, modelkeys, predictkeys

示例#36

0

显示文件

文件： cv.py 项目： USGS-Astrogeology/PySAT

    def do_cv(self, Train, cv_iterator, xcols='wvl', ycol=('comp', 'SiO2'), method='PLS',
              yrange=[0, 100], calc_path = False, alphas = None, n_folds = 3):

        models = []
        modelkeys = []
        predictkeys = []
        cv_iterators = itertools.tee(cv_iterator,len(self.paramgrid))  #need to duplicate the cv_iterator so it can be used for each permutation in paramgrid

        for i in list(range(len(self.paramgrid))):
            print(self.paramgrid[i])
            # create an empty output data frame to serve as template
            output_tmp = pd.DataFrame()
            # add columns for RMSEC, RMSECV, and RMSE for the folds
            output_tmp['RMSEC'] = 0
            output_tmp['RMSECV'] = 0
            #for f in np.array(range(n_folds)) + 1:
            for f in np.array(range(n_folds)) + 1:
                output_tmp['Fold ' + str(f)] = 0
            #fill in the output template based on the current permutation parameters
            for k in self.paramgrid[i].keys():
                output_tmp.at[0,k]=self.paramgrid[i][k]
            if alphas is not None:
                output_tmp = pd.concat([output_tmp]*len(alphas))
                output_tmp['alphas'] = alphas


            rmsecv_folds_tmp = np.empty(shape=(0))  # Create empty array to hold RMSECV for each fold
            alphas_out = np.empty(shape=(0))
            cvcols_all = np.empty(shape=(0))

            foldcount = 1

            for train, holdout in cv_iterators[i]:  # Iterate through each of the folds in the training set

                cv_train = Train.iloc[train]  # extract the data to be used to create the model
                cv_holdout = Train.iloc[holdout]  # extract the data that will be held out of the model

                if calc_path:
                    # get X and y data
                    X = cv_train[xcols]
                    y = cv_train[ycol]

                    #do the path calculation
                    path_alphas,\
                    path_coefs,\
                    intercepts,\
                    path_n_iters,\
                    y_pred_holdouts,\
                    fold_rmses,\
                    cvcols = path_calc(X, y, cv_holdout[xcols], cv_holdout[ycol], alphas, self.paramgrid[i], yname = ycol[0][-1], method = method)

                    output_tmp['Fold '+str(foldcount)] = fold_rmses
                    for n in list(range(len(path_alphas))):
                        Train.set_value(Train.index[holdout], cvcols[n], y_pred_holdouts[n])

                else:
                    cvcols = [('predict', '"'+method+'- CV -' + str(self.paramgrid[i]) + '"')]
                    
                    #fit the model and predict the held-out data
                    model = regression([method], [yrange], [self.paramgrid[i]])
                    model.fit(cv_train[xcols], cv_train[ycol])
                    if model.goodfit:
                        y_pred_holdout = model.predict(cv_holdout[xcols])
                    else:
                        y_pred_holdout = cv_holdout[ycol] * np.nan
                    #add the predictions to the appropriate column in the training data
                    Train.set_value(Train.index[holdout], cvcols[0], y_pred_holdout)
                    #append the RMSECV to the list
                    output_tmp['Fold '+str(foldcount)]=RMSE(y_pred_holdout, cv_holdout[ycol])
                    pass

                foldcount = foldcount + 1

            #now that all the folds have been held out and predicted, calculate the overall rmsecv and add it to the output
            rmsecv = []
            for col in cvcols:
                rmsecv.append(RMSE(Train[col], Train[ycol]))
                predictkeys.append(col[-1])
            output_tmp['RMSECV']=rmsecv

            #fit the model on the full training set using the current settings
            if calc_path:
                X = Train[xcols]
                y = Train[ycol]

                path_alphas, \
                path_coefs, \
                intercepts, \
                path_n_iters, \
                ypred_train, \
                rmsec_train, \
                cols = path_calc(X, y, X, y, alphas, self.paramgrid[i], colname = 'Cal', yname = ycol[0][-1], method = method)


                for n in list(range(len(path_alphas))):
                    Train[cols[n]]=ypred_train[n] #put the training set predictions in the data frame
                    predictkeys.append(cols[n][-1])
                    #create the model and manually set its parameters based on the path results rather than training it
                    model = regression([method], [yrange], [self.paramgrid[i]])
                    model.model.set_params(alpha = path_alphas[n])
                    setattr(model.model, 'intercept_', intercepts[n])
                    setattr(model.model, 'coef_', np.squeeze(path_coefs)[:,n])
                    setattr(model.model, 'n_iter_', path_n_iters[n])

                    #add the model and its name to the list
                    models.append(model)
                    modelkey = "{} - {} - ({}, {}) Alpha: {}, {}".format(method, ycol[0][-1], yrange[0], yrange[1],path_alphas[n],
                                                              self.paramgrid[i])
                    modelkeys.append(modelkey)

                output_tmp['RMSEC'] = rmsec_train
            else:
                model = regression([method], [yrange], [self.paramgrid[i]])
                modelkey = "{} - {} - ({}, {}) {}".format(method, ycol[0][-1], yrange[0], yrange[1], self.paramgrid[i])
                models.append(model)
                modelkeys.append(modelkey)
                ypred_train = Train[ycol] * np.nan
                model.fit(Train[xcols], Train[ycol])
                #if the fit is good, then predict the training set
                if model.goodfit:
                    ypred_train = model.predict(Train[xcols])
                else:
                    models = models[:-1]
                    modelkeys = modelkeys[:-1]

                #add the calibration predictions to the appropriate column
                calcol = ('predict', '"'+method + '- Cal -' + str(self.paramgrid[i])+'"')
                predictkeys.append(calcol[-1])
                Train[calcol] = ypred_train
                #append the RMSEC for the current settings to the cllection of all RMSECs
                output_tmp['RMSEC'] = RMSE(ypred_train, Train[ycol])


            try:
                output = pd.concat((output, output_tmp))
            except:
                output = output_tmp
            pass


        #make the columns of the output data drame multi-indexed
        cols = output.columns.values
        cols = [('cv', i) for i in cols]
        output.columns = pd.MultiIndex.from_tuples(cols)

        return Train, output, models, modelkeys, predictkeys