def test_OMP_CV_false(): regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{ 'fit_intercept': True, 'CV': False }])
def setup(self): method = self.chooseAlgorithmComboBox.currentText() xvars = [str(x.text()) for x in self.xVariableList.selectedItems()] yvars = [('comp', str(y.text())) for y in self.yVariableList.selectedItems()] yrange = [ self.yMinDoubleSpinBox.value(), self.yMaxDoubleSpinBox.value() ] try: params, modelkey = self.alg[ self.chooseAlgorithmComboBox.currentText()].run() modelkey = "{} - {} - ({}, {}) {}".format(method, yvars[0][-1], yrange[0], yrange[1], modelkey) self.list_amend(self.modelkeys, self.curr_count, modelkey) #print(params, modelkey) self.models[modelkey] = regression.regression([method], [yrange], [params]) self.model_xvars[modelkey] = xvars self.model_yvars[modelkey] = yvars if 'Model Coefficients' not in self.datakeys: self.datakeys.append('Model Coefficients') else: pass except: pass
def test_PLS(): regress = regression(method=['PLS'], yrange=[0.0, 100.0], params=[{ 'n_components': 0, 'scale': False }])
def test_OMP(): regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{ 'fit_intercept': True, 'n_nonzero_coefs': 615 }])
def test_KRR(): regress = regression(method=['KRR'], yrange=[0.0, 100.0], params=[{'alpha': 0, 'kernel': 'linear', 'gamma': 'None', 'degree': 3.0, 'coef0': 1.0, 'kernel_params': 'None'}])
def test_GP(): regress = regression(method=['GP'], yrange=[0.0, 100.0], params=[{'reduce_dim': 'PCA', 'n_components': 0, 'random_start': 1, 'theta0': 1.0, 'thetaL': 0.1, 'thetaU': 100.0}])
def test_LASSO_CV_none(): regress = regression(method=['LASSO'], yrange=[0.0, 100.0], params=[{'alpha': 1.0, 'fit_intercept': True, 'max_iter': 1000, 'tol': 0.0001, 'positive': False, 'selection': 'random'}])
def test_KRR(): regress = regression(method=['KRR'], yrange=[0.0, 100.0], params=[{'alpha': 0, 'kernel': 'linear', 'gamma': 'None', 'degree': 3.0, 'coef0': 1.0, 'kernel_params': 'None'}])
def test_OMP_CV_true(): regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{ 'fit_intercept': True, 'CV': True, 'precompute': True }])
def test_Ridge_CV_true(): regress = regression(method=['Ridge'], yrange=[0.0, 100.0], params=[{ 'fit_intercept': True, 'normalize': False, 'CV': True }])
def test_Lasso(): regress = regression(method=['Lasso'], yrange=[0.0, 100.0], params=[{'alpha': 1.0, 'fit_intercept': True, 'max_iter': 1000, 'tol': 0.0001, 'positive': False, 'selection': 'random'}])
def test_GP(): regress = regression(method=['GP'], yrange=[0.0, 100.0], params=[{'reduce_dim': 'PCA', 'n_components': 0, 'random_start': 1, 'theta0': 1.0, 'thetaL': 0.1, 'thetaU': 100.0}])
def test_LARS2_CV_true(): regress = regression(method=['LARS'], yrange=[0.0, 100.0], params=[{'fit_intercept': True, 'positive': False, 'verbose': False, 'normalize': False, 'precompute': True, 'copy_X': True, 'eps': 2.220445, 'CV': True}])
def test_Ridge(): regress = regression(method=['Ridge'], yrange=[0.0, 100.0], params=[{'alpha': 1.0, 'copy_X': True, 'fit_intercept': True, 'max_iter': 'None', 'normalize': False, 'solver': 'auto', 'tol': 0.0, 'random_state': ''}])
def test_Ridge_CV_none(): regress = regression(method=['Ridge'], yrange=[0.0, 100.0], params=[{'alpha': 1.0, 'copy_X': True, 'fit_intercept': True, 'max_iter': 'None', 'normalize': False, 'solver': 'auto', 'tol': 0.0, 'random_state': ''}])
def test_Lasso_LARS_model_none(): regress = regression(method=['Lasso LARS'], yrange=[0.0, 100.0], params=[{'fit_intercept': True, 'positive': False, 'verbose': False, 'normalize': True, 'copy_X': True, 'precompute': 'Auto', 'max_iter': 500, 'model': None, 'eps': 2.220446}])
def test_LARS_CV_none(): regress = regression(method=['LARS'], yrange=[0.0, 100.0], params=[{'n_nonzero_coefs': 500, 'fit_intercept': True, 'positive': False, 'verbose': False, 'normalize': False, 'precompute': True, 'copy_X': True, 'eps': 2.220445, 'fit_path': True}])
def test_LARS(): regress = regression(method=['LARS'], yrange=[0.0, 100.0], params=[{'n_nonzero_coefs': 500, 'fit_intercept': True, 'positive': False, 'verbose': False, 'normalize': False, 'precompute': True, 'copy_X': True, 'eps': 2.220445, 'fit_path': True}])
def test_SVR(): regress = regression(method=['SVR'], yrange=[0.0, 100.0], params=[{'C': 1.0, 'epsilon': 0.1, 'kernel': 'rbf', 'degree': 0, 'gamma': 'auto', 'coef0': 0.0, 'shrinking': False, 'tol': 0.001, 'cache_size': 200, 'verbose': False, 'max_iter': -1}])
def test_SVR(): regress = regression(method=['SVR'], yrange=[0.0, 100.0], params=[{'C': 1.0, 'epsilon': 0.1, 'kernel': 'rbf', 'degree': 0, 'gamma': 'auto', 'coef0': 0.0, 'shrinking': False, 'tol': 0.001, 'cache_size': 200, 'verbose': False, 'max_iter': -1}])
def test_Lasso_LARS(): regress = regression(method=['Lasso LARS'], yrange=[0.0, 100.0], params=[{'alpha': 0.0, 'fit_intercept': True, 'positive': False, 'verbose': False, 'normalize': True, 'copy_X': True, 'precompute': 'Auto', 'max_iter': 500, 'model': 0, 'eps': 2.220446, 'fit_path': True}])
def test_Bayesian_Ridge(): regress = regression(method=['Bayesian Ridge'], yrange=[0.0, 100.0], params=[{'n_iter': 300, 'tol': 0.001, 'alpha_1': 0.001, 'alpha_2': 1e-06, 'lambda_1': 1e-06, 'lambda_2': 1e-06, 'compute_score': False, 'fit_intercept': True, 'normalize': False, 'copy_X': True, 'verbose': False}])
def test_Bayesian_Ridge(): regress = regression(method=['Bayesian Ridge'], yrange=[0.0, 100.0], params=[{'n_iter': 300, 'tol': 0.001, 'alpha_1': 0.001, 'alpha_2': 1e-06, 'lambda_1': 1e-06, 'lambda_2': 1e-06, 'compute_score': False, 'fit_intercept': True, 'normalize': False, 'copy_X': True, 'verbose': False}])
def test_Elastic_Net_CV_true(): regress = regression(method=['Elastic Net'], yrange=[0.0, 100.0], params=[{'l1_ratio': 0.5, 'fit_intercept': True, 'normalize': False, 'precompute': 'False', 'max_iter': 1000, 'copy_X': True, 'tol': 0.0001, 'positive': False, 'selection': 'cyclic', 'random_state': 'None', 'CV': True}])
def run(self): method = self.chooseAlgorithmComboBox.currentText() datakey = self.chooseDataComboBox.currentText() xvars = [str(x.text()) for x in self.xVariableList.selectedItems()] yvars = [('comp', str(y.text())) for y in self.yVariableList.selectedItems()] yrange = [ self.yMinDoubleSpinBox.value(), self.yMaxDoubleSpinBox.value() ] params, modelkey = self.alg[ self.chooseAlgorithmComboBox.currentText()].run() modelkey = "{} - {} - ({}, {}) {}".format(method, yvars[0][-1], yrange[0], yrange[1], modelkey) self.list_amend(self.modelkeys, self.curr_count, modelkey) #print(params, modelkey) self.models[modelkey] = regression.regression([method], [yrange], [params]) x = self.data[datakey].df[xvars] y = self.data[datakey].df[yvars] x = np.array(x) y = np.array(y) ymask = np.squeeze((y > yrange[0]) & (y < yrange[1])) y = y[ymask] x = x[ymask, :] self.models[modelkey].fit(x, y) self.model_xvars[modelkey] = xvars self.model_yvars[modelkey] = yvars try: coef = np.squeeze(self.models[modelkey].model.coef_) coef = pd.DataFrame(coef) coef.index = pd.MultiIndex.from_tuples( self.data[datakey].df[xvars].columns.values) coef = coef.T coef[('meta', 'Model')] = modelkey try: coef[('meta', 'Intercept')] = self.models[modelkey].model.intercept_ except: pass try: self.data['Model Coefficients'] = spectral_data( pd.concat([self.data['Model Coefficients'].df, coef])) except: self.data['Model Coefficients'] = spectral_data(coef) self.datakeys.append('Model Coefficients') except: pass
def test_ARD(): regress = regression(method=['ARD'], yrange=[0.0, 100.0], params=[{'n_iter': 300, 'tol': 0.001, 'alpha_1': 0.001, 'alpha_2': 1e-06, 'lambda_1': 1e-06, 'lambda_2': 1e-06, 'compute_score': False, 'threshold_lambda': 100000, 'fit_intercept': True, 'normalize': False, 'copy_X': True, 'verbose': False}])
def test_ARD(): regress = regression(method=['ARD'], yrange=[0.0, 100.0], params=[{'n_iter': 300, 'tol': 0.001, 'alpha_1': 0.001, 'alpha_2': 1e-06, 'lambda_1': 1e-06, 'lambda_2': 1e-06, 'compute_score': False, 'threshold_lambda': 100000, 'fit_intercept': True, 'normalize': False, 'copy_X': True, 'verbose': False}])
def test_Elastic_Net(): regress = regression(method=['Elastic Net'], yrange=[0.0, 100.0], params=[{'alpha': 1.0, 'l1_ratio': 0.5, 'fit_intercept': True, 'normalize': False, 'precompute': 'False', 'max_iter': 1000, 'copy_X': True, 'tol': 0.0001, 'warm_start': False, 'positive': False, 'selection': 'cyclic', 'random_state': 'None'}])
def test_OMP_CV_true(): regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{'fit_intercept': True, <<<<<<< HEAD:libpysat/tests/test_regression.py 'n_nonzero_coefs': 615}])
def test_OMP(): regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{'fit_intercept': True, 'n_nonzero_coefs': 615}])
def test_OMP(): regress = regression(method=['OMP'], yrange=[0.0, 100.0], params=[{'fit_intercept': True, 'CV': True}])
def test_PLS(): regress = regression(method=['PLS'], yrange=[0.0, 100.0], params=[{'n_components': 0,'scale': False}])
def test_OLS(): regress = regression(method=['OLS'], yrange=[0.0, 100.0], params=[{ 'fit_intercept': True }])
def do_cv(self, Train, xcols='wvl', ycol=('comp', 'SiO2'), method='PLS', yrange=[0, 100]): try: cv_iterator = LeaveOneLabelOut( Train[('meta', 'Folds')]) # create an iterator for cross validation based on the predefined folds except: print('***No folds found! Did you remember to define folds before running cross validation?***') rmsecv_folds = [] rmsec = [] rmsecv = [] models = [] modelkeys = [] # loop through the grid of parameters, do cross validation for each permutation # try: # self.progress.setMaximum(len(self.paramgrid)) # self.progress.setValue(0) # self.progress.show() # except: # pass for i in list(range(len(self.paramgrid))): print(self.paramgrid[i]) # self.progress.setValue(i) model = regression([method], [yrange], [self.paramgrid[i]]) modelkey = "{} - {} - ({}, {}) {}".format(method, ycol[0][-1], yrange[0], yrange[1], self.paramgrid[i]) rmsecv_folds_tmp = [] # Create empty list to hold RMSECV for each fold for train, holdout in cv_iterator: # Iterate through each of the folds in the training set cvcol = ('predict', '"'+method + '-CV-' + str(self.paramgrid[ i])+'"') # ycol[-1]+'_cv_'+method+'_param'+str(i)) #create the name of the column in which results will be stored cv_train = Train.iloc[train] # extract the data to be used to create the model cv_holdout = Train.iloc[holdout] # extract the data that will be held out of the model model.fit(cv_train[xcols], cv_train[ycol]) if model.goodfit: y_pred_holdout = model.predict(cv_holdout[xcols]) else: y_pred_holdout = cv_holdout[ycol] * np.nan Train.set_value(Train.index[holdout], cvcol, y_pred_holdout) rmsecv_folds_tmp.append(RMSE(y_pred_holdout, cv_holdout[ycol])) rmsecv_folds.append(rmsecv_folds_tmp) rmsecv.append(RMSE(Train[ycol], Train[cvcol])) model.fit(Train[xcols], Train[ycol]) if model.goodfit: models.append(model) modelkeys.append(modelkey) ypred_train = model.predict(Train[xcols]) else: ypred_train = Train[ycol] * np.nan calcol = ('predict', '"'+method + '-Cal-' + str(self.paramgrid[i])+'"') Train[calcol] = ypred_train rmsec.append(RMSE(ypred_train, Train[ycol])) output = pd.DataFrame(self.paramgrid) output['RMSEC'] = rmsec output['RMSECV'] = rmsecv rmsecv_folds = np.array(rmsecv_folds) for i in list(range(len(rmsecv_folds[0, :]))): label = 'Fold' + str(i) output[label] = rmsecv_folds[:, i] cols = output.columns.values cols = [('cv', i) for i in cols] output.columns = pd.MultiIndex.from_tuples(cols) return Train, output, models, modelkeys
def do_cv(self, Train, cv_iterator, xcols='wvl', ycol=('comp', 'SiO2'), method='PLS', yrange=[0, 100], calc_path=False, alphas=None, n_folds=3): models = [] modelkeys = [] predictkeys = [] cv_iterators = itertools.tee( cv_iterator, len(self.paramgrid) ) #need to duplicate the cv_iterator so it can be used for each permutation in paramgrid for i in list(range(len(self.paramgrid))): print(self.paramgrid[i]) # create an empty output data frame to serve as template output_tmp = pd.DataFrame() # add columns for RMSEC, RMSECV, and RMSE for the folds output_tmp['RMSEC'] = 0 output_tmp['RMSECV'] = 0 #for f in np.array(range(n_folds)) + 1: for f in np.array(range(n_folds)) + 1: output_tmp['Fold ' + str(f)] = 0 #fill in the output template based on the current permutation parameters for k in self.paramgrid[i].keys(): output_tmp.at[0, k] = self.paramgrid[i][k] if alphas is not None: output_tmp = pd.concat([output_tmp] * len(alphas)) output_tmp['alphas'] = alphas rmsecv_folds_tmp = np.empty( shape=(0)) # Create empty array to hold RMSECV for each fold alphas_out = np.empty(shape=(0)) cvcols_all = np.empty(shape=(0)) foldcount = 1 for train, holdout in cv_iterators[ i]: # Iterate through each of the folds in the training set cv_train = Train.iloc[ train] # extract the data to be used to create the model cv_holdout = Train.iloc[ holdout] # extract the data that will be held out of the model if calc_path: # get X and y data X = cv_train[xcols] y = cv_train[ycol] #do the path calculation path_alphas,\ path_coefs,\ intercepts,\ path_n_iters,\ y_pred_holdouts,\ fold_rmses,\ cvcols = path_calc(X, y, cv_holdout[xcols], cv_holdout[ycol], alphas, self.paramgrid[i], yname = ycol[0][-1], method = method) output_tmp['Fold ' + str(foldcount)] = fold_rmses for n in list(range(len(path_alphas))): Train.set_value(Train.index[holdout], cvcols[n], y_pred_holdouts[n]) else: cvcols = [('predict', '"' + method + '- CV -' + str(self.paramgrid[i]) + '"')] #fit the model and predict the held-out data model = regression([method], [yrange], [self.paramgrid[i]]) model.fit(cv_train[xcols], cv_train[ycol]) if model.goodfit: y_pred_holdout = model.predict(cv_holdout[xcols]) else: y_pred_holdout = cv_holdout[ycol] * np.nan #add the predictions to the appropriate column in the training data Train.set_value(Train.index[holdout], cvcols[0], y_pred_holdout) #append the RMSECV to the list output_tmp['Fold ' + str(foldcount)] = RMSE( y_pred_holdout, cv_holdout[ycol]) pass foldcount = foldcount + 1 #now that all the folds have been held out and predicted, calculate the overall rmsecv and add it to the output rmsecv = [] for col in cvcols: rmsecv.append(RMSE(Train[col], Train[ycol])) predictkeys.append(col[-1]) output_tmp['RMSECV'] = rmsecv #fit the model on the full training set using the current settings if calc_path: X = Train[xcols] y = Train[ycol] path_alphas, \ path_coefs, \ intercepts, \ path_n_iters, \ ypred_train, \ rmsec_train, \ cols = path_calc(X, y, X, y, alphas, self.paramgrid[i], colname = 'Cal', yname = ycol[0][-1], method = method) for n in list(range(len(path_alphas))): Train[cols[n]] = ypred_train[ n] #put the training set predictions in the data frame predictkeys.append(cols[n][-1]) #create the model and manually set its parameters based on the path results rather than training it model = regression([method], [yrange], [self.paramgrid[i]]) model.model.set_params(alpha=path_alphas[n]) setattr(model.model, 'intercept_', intercepts[n]) setattr(model.model, 'coef_', np.squeeze(path_coefs)[:, n]) setattr(model.model, 'n_iter_', path_n_iters[n]) #add the model and its name to the list models.append(model) modelkey = "{} - {} - ({}, {}) Alpha: {}, {}".format( method, ycol[0][-1], yrange[0], yrange[1], path_alphas[n], self.paramgrid[i]) modelkeys.append(modelkey) output_tmp['RMSEC'] = rmsec_train else: model = regression([method], [yrange], [self.paramgrid[i]]) modelkey = "{} - {} - ({}, {}) {}".format( method, ycol[0][-1], yrange[0], yrange[1], self.paramgrid[i]) models.append(model) modelkeys.append(modelkey) ypred_train = Train[ycol] * np.nan model.fit(Train[xcols], Train[ycol]) #if the fit is good, then predict the training set if model.goodfit: ypred_train = model.predict(Train[xcols]) else: models = models[:-1] modelkeys = modelkeys[:-1] #add the calibration predictions to the appropriate column calcol = ('predict', '"' + method + '- Cal -' + str(self.paramgrid[i]) + '"') predictkeys.append(calcol[-1]) Train[calcol] = ypred_train #append the RMSEC for the current settings to the cllection of all RMSECs output_tmp['RMSEC'] = RMSE(ypred_train, Train[ycol]) try: output = pd.concat((output, output_tmp)) except: output = output_tmp pass #make the columns of the output data drame multi-indexed cols = output.columns.values cols = [('cv', i) for i in cols] output.columns = pd.MultiIndex.from_tuples(cols) return Train, output, models, modelkeys, predictkeys
def do_cv(self, Train, cv_iterator, xcols='wvl', ycol=('comp', 'SiO2'), method='PLS', yrange=[0, 100], calc_path = False, alphas = None, n_folds = 3): models = [] modelkeys = [] predictkeys = [] cv_iterators = itertools.tee(cv_iterator,len(self.paramgrid)) #need to duplicate the cv_iterator so it can be used for each permutation in paramgrid for i in list(range(len(self.paramgrid))): print(self.paramgrid[i]) # create an empty output data frame to serve as template output_tmp = pd.DataFrame() # add columns for RMSEC, RMSECV, and RMSE for the folds output_tmp['RMSEC'] = 0 output_tmp['RMSECV'] = 0 #for f in np.array(range(n_folds)) + 1: for f in np.array(range(n_folds)) + 1: output_tmp['Fold ' + str(f)] = 0 #fill in the output template based on the current permutation parameters for k in self.paramgrid[i].keys(): output_tmp.at[0,k]=self.paramgrid[i][k] if alphas is not None: output_tmp = pd.concat([output_tmp]*len(alphas)) output_tmp['alphas'] = alphas rmsecv_folds_tmp = np.empty(shape=(0)) # Create empty array to hold RMSECV for each fold alphas_out = np.empty(shape=(0)) cvcols_all = np.empty(shape=(0)) foldcount = 1 for train, holdout in cv_iterators[i]: # Iterate through each of the folds in the training set cv_train = Train.iloc[train] # extract the data to be used to create the model cv_holdout = Train.iloc[holdout] # extract the data that will be held out of the model if calc_path: # get X and y data X = cv_train[xcols] y = cv_train[ycol] #do the path calculation path_alphas,\ path_coefs,\ intercepts,\ path_n_iters,\ y_pred_holdouts,\ fold_rmses,\ cvcols = path_calc(X, y, cv_holdout[xcols], cv_holdout[ycol], alphas, self.paramgrid[i], yname = ycol[0][-1], method = method) output_tmp['Fold '+str(foldcount)] = fold_rmses for n in list(range(len(path_alphas))): Train.set_value(Train.index[holdout], cvcols[n], y_pred_holdouts[n]) else: cvcols = [('predict', '"'+method+'- CV -' + str(self.paramgrid[i]) + '"')] #fit the model and predict the held-out data model = regression([method], [yrange], [self.paramgrid[i]]) model.fit(cv_train[xcols], cv_train[ycol]) if model.goodfit: y_pred_holdout = model.predict(cv_holdout[xcols]) else: y_pred_holdout = cv_holdout[ycol] * np.nan #add the predictions to the appropriate column in the training data Train.set_value(Train.index[holdout], cvcols[0], y_pred_holdout) #append the RMSECV to the list output_tmp['Fold '+str(foldcount)]=RMSE(y_pred_holdout, cv_holdout[ycol]) pass foldcount = foldcount + 1 #now that all the folds have been held out and predicted, calculate the overall rmsecv and add it to the output rmsecv = [] for col in cvcols: rmsecv.append(RMSE(Train[col], Train[ycol])) predictkeys.append(col[-1]) output_tmp['RMSECV']=rmsecv #fit the model on the full training set using the current settings if calc_path: X = Train[xcols] y = Train[ycol] path_alphas, \ path_coefs, \ intercepts, \ path_n_iters, \ ypred_train, \ rmsec_train, \ cols = path_calc(X, y, X, y, alphas, self.paramgrid[i], colname = 'Cal', yname = ycol[0][-1], method = method) for n in list(range(len(path_alphas))): Train[cols[n]]=ypred_train[n] #put the training set predictions in the data frame predictkeys.append(cols[n][-1]) #create the model and manually set its parameters based on the path results rather than training it model = regression([method], [yrange], [self.paramgrid[i]]) model.model.set_params(alpha = path_alphas[n]) setattr(model.model, 'intercept_', intercepts[n]) setattr(model.model, 'coef_', np.squeeze(path_coefs)[:,n]) setattr(model.model, 'n_iter_', path_n_iters[n]) #add the model and its name to the list models.append(model) modelkey = "{} - {} - ({}, {}) Alpha: {}, {}".format(method, ycol[0][-1], yrange[0], yrange[1],path_alphas[n], self.paramgrid[i]) modelkeys.append(modelkey) output_tmp['RMSEC'] = rmsec_train else: model = regression([method], [yrange], [self.paramgrid[i]]) modelkey = "{} - {} - ({}, {}) {}".format(method, ycol[0][-1], yrange[0], yrange[1], self.paramgrid[i]) models.append(model) modelkeys.append(modelkey) ypred_train = Train[ycol] * np.nan model.fit(Train[xcols], Train[ycol]) #if the fit is good, then predict the training set if model.goodfit: ypred_train = model.predict(Train[xcols]) else: models = models[:-1] modelkeys = modelkeys[:-1] #add the calibration predictions to the appropriate column calcol = ('predict', '"'+method + '- Cal -' + str(self.paramgrid[i])+'"') predictkeys.append(calcol[-1]) Train[calcol] = ypred_train #append the RMSEC for the current settings to the cllection of all RMSECs output_tmp['RMSEC'] = RMSE(ypred_train, Train[ycol]) try: output = pd.concat((output, output_tmp)) except: output = output_tmp pass #make the columns of the output data drame multi-indexed cols = output.columns.values cols = [('cv', i) for i in cols] output.columns = pd.MultiIndex.from_tuples(cols) return Train, output, models, modelkeys, predictkeys