def fit(self,trainsets,ranges,ncs,ycol,figpath=None): self.ranges=ranges self.ncs=ncs self.ycol=ycol submodels=[] mean_vects=[] for i,rangei in enumerate(ranges): data_tmp=within_range.within_range(trainsets[i],rangei,ycol) x=data_tmp.xs('wvl',axis=1,level=0,drop_level=False) y=data_tmp['meta'][ycol] x_centered,x_mean_vect=meancenter(x) #mean center training data pls=PLSRegression(n_components=ncs[i],scale=False) pls.fit(x,y) submodels.append(pls) mean_vects.append(x_mean_vect) if figpath is not None: E=x_centered-np.dot(pls.x_scores_,pls.x_loadings_.transpose()) Q_res=np.dot(E,E.transpose()).diagonal() T=pls.x_scores_ leverage=np.diag([email protected](T.transpose()@T)@T.transpose()) plot.figure() plot.scatter(leverage,Q_res,color='r',edgecolor='k') plot.title(ycol+' ('+str(rangei[0])+'-'+str(rangei[1])+')') plot.xlabel('Leverage') plot.ylabel('Q') plot.savefig(figpath+'/'+ycol+'_'+str(rangei[0])+'-'+str(rangei[1])+'Qres_vs_Leverage.png',dpi=600) self.leverage=leverage self.Q_res=Q_res self.submodels=submodels self.mean_vects=mean_vects
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def __init__(self, n_components=2, scale=True, max_iter=500, tol=1e-06, copy=True): self._hyperparams = { 'n_components': n_components, 'scale': scale, 'max_iter': max_iter, 'tol': tol, 'copy': copy} self._wrapped_model = Op(**self._hyperparams)
def fit(self, trainsets, ranges, ncs, ycol, figpath=None): self.ranges = ranges self.ncs = ncs self.ycol = ycol submodels = [] mean_vects = [] for i, rangei in enumerate(ranges): data_tmp = within_range.within_range(trainsets[i], rangei, ycol) x = data_tmp.xs('wvl', axis=1, level=0, drop_level=False) y = data_tmp['meta'][ycol] x_centered, x_mean_vect = meancenter( x, 'wvl') # mean center training data pls = PLSRegression(n_components=ncs[i], scale=False) pls.fit(x, y) submodels.append(pls) mean_vects.append(x_mean_vect) if figpath is not None: # calculate spectral residuals E = x_centered - np.dot(pls.x_scores_, pls.x_loadings_.transpose()) Q_res = np.dot(E, E.transpose()).diagonal() # calculate leverage T = pls.x_scores_ leverage = np.diag( T @ np.linalg.inv(T.transpose() @ T) @ T.transpose()) plot.figure() plot.scatter(leverage, Q_res, color='r', edgecolor='k') plot.title(ycol + ' (' + str(rangei[0]) + '-' + str(rangei[1]) + ')') plot.xlabel('Leverage') plot.ylabel('Q') plot.ylim([0, 1.1 * np.max(Q_res)]) plot.xlim([0, 1.1 * np.max(leverage)]) plot.savefig(figpath + '/' + ycol + '_' + str(rangei[0]) + '-' + str(rangei[1]) + 'Qres_vs_Leverage.png', dpi=600) self.leverage = leverage self.Q_res = Q_res self.submodels = submodels self.mean_vects = mean_vects
class PLSRegressionImpl(): def __init__(self, n_components=2, scale=True, max_iter=500, tol=1e-06, copy=True): self._hyperparams = { 'n_components': n_components, 'scale': scale, 'max_iter': max_iter, 'tol': tol, 'copy': copy } def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def transform(self, X): return self._sklearn_model.transform(X) def predict(self, X): return self._sklearn_model.predict(X)
def __init__(self, method, yrange, params, i=0, ransacparams={}): self.method = method self.outliers = None self.inliers = None self.ransac = False self.yrange = yrange[i] if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'GP': #get the method for dimensionality reduction and the number of components self.reduce_dim = params[i]['reduce_dim'] self.n_components = params[i]['n_components'] #create a temporary set of parameters params_temp = copy.copy(params[i]) #Remove parameters not accepted by Gaussian Process params_temp.pop('reduce_dim') params_temp.pop('n_components') self.model = GaussianProcess(**params_temp)
def load_h5(file_name='blue.h5'): try: hf = h5py.File(os.path.join(get_resource_path(), file_name), 'r') d1 = hf.get('coef') d2 = hf.get('x_mean') d3 = hf.get('y_mean') d4 = hf.get('x_std') model = PLSRegression(len(d1)) model.coef_ = np.array(d1) model.x_mean_ = np.array(d2) model.y_mean_ = np.array(d3) model.x_std_ = np.array(d4) hf.close() except Exception as e: print('Unable to load data ', file_name, ':', e) return model
def __init__( self, method, yrange, params, i=0 ): #TODO: yrange doesn't currently do anything. Remove or do something with it! self.algorithm_list = [ 'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge', 'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', ] self.method = method self.outliers = None self.ransac = False print(params) if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.OrthogonalMatchingPursuit(**params_temp) else: params_temp.pop('precompute') self.model = linear.OrthogonalMatchingPursuitCV(**params_temp) if self.method[i] == 'LASSO': # create a temporary set of parameters params_temp = copy.copy(params[i]) # check whether to do CV or not try: self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.Lasso(**params_temp) else: params_temp.pop('alpha') self.model = linear.LassoCV(**params_temp) if self.method[i] == 'Elastic Net': params_temp = copy.copy(params[i]) try: self.do_cv = params[i]['CV'] params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.ElasticNet(**params_temp) else: params_temp['l1_ratio'] = [.1, .5, .7, .9, .95, .99, 1] self.model = linear.ElasticNetCV(**params_temp) if self.method[i] == 'Ridge': # create a temporary set of parameters params_temp = copy.copy(params[i]) try: # check whether to do CV or not self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv: self.model = linear.RidgeCV(**params_temp) else: self.model = linear.Ridge(**params_temp) if self.method[i] == 'BRR': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # create a temporary set of parameters params_temp = copy.copy(params[i]) try: # check whether to do CV or not self.do_cv = params[i]['CV'] # Remove CV parameter params_temp.pop('CV') except: self.do_cv = False if self.do_cv is False: self.model = linear.Lars(**params_temp) else: self.model = linear.LarsCV(**params_temp) if self.method[i] == 'LASSO LARS': model = params[i]['model'] params_temp = copy.copy(params[i]) params_temp.pop('model') if model == 0: self.model = linear.LassoLars(**params_temp) elif model == 1: self.model = linear.LassoLarsCV(**params_temp) elif model == 2: self.model = linear.LassoLarsIC(**params_temp) else: print("Something went wrong, \'model\' should be 0, 1, or 2") if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i]) if self.method[i] == 'GP': # get the method for dimensionality reduction and the number of components self.reduce_dim = params[i]['reduce_dim'] self.n_components = params[i]['n_components'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove parameters not accepted by Gaussian Process params_temp.pop('reduce_dim') params_temp.pop('n_components') self.model = GaussianProcess(**params_temp)
def pls_cv(Train,Test=None,nc=20,nfolds=5,ycol='SiO2',doplot=True,outpath='.',plotfile='pls_cv.png'): #create empty arrays for the RMSE values pls_rmsecv=np.empty(nc) pls_rmsec=np.empty(nc) #If there is a test set provided, create the RMSEP array to hold test set errors if Test is not None: pls_rmsep=np.empty(nc) #loop through each number of components for i in range(1,nc+1): print('nc='+str(i)) Train[('meta',ycol+'_cv_PLS_nc'+str(i))]=0 #create a column to hold the PLS cross validation results for this nc Train[('meta',ycol+'_PLS_nc'+str(i))]=0 #create a column to hold the PLS training set results for this nc if Test is not None: Test[('meta',ycol+'_PLS_nc'+str(i))]=0 #create a column to hold the PLS test set results for this nc #Do the cross validation cv_iterator=LeaveOneLabelOut(Train[('meta','Folds')]) #create the iterator for cross validation within the training data for train,holdout in cv_iterator: #Iterate through each of the folds in the training set cv_train=Train.iloc[train] cv_holdout=Train.iloc[holdout] #Do PLS for this number of components cv_train_centered,cv_train_mean_vect=meancenter(cv_train) #mean center training data cv_holdout_centered,cv_holdout_mean_vect=meancenter(cv_holdout,previous_mean=cv_train_mean_vect) #apply same mean centering to holdout data pls=PLSRegression(n_components=i,scale=False) pls.fit(cv_train_centered['wvl'],cv_train_centered['meta'][ycol]) y_pred_holdout=pls.predict(cv_holdout_centered['wvl']) Train.set_value(Train.index[holdout],('meta',ycol+'_cv_PLS_nc'+str(i)),y_pred_holdout) pls_rmsecv[i-1]=np.sqrt(np.mean(np.subtract(Train[('meta',ycol)],Train[('meta',ycol+'_cv_PLS_nc'+str(i))])**2,axis=0)) #Do train and test set PLS predictions for this number of components Train_centered,Train_mean_vect=meancenter(Train) pls=PLSRegression(n_components=i,scale=False) pls.fit(Train_centered['wvl'],Train_centered['meta'][ycol]) y_pred=pls.predict(Train_centered['wvl']) Train.set_value(Train.index,('meta',ycol+'_PLS_nc'+str(i)),y_pred) pls_rmsec[i-1]=np.sqrt(np.mean(np.subtract(Train[('meta',ycol)],Train[('meta',ycol+'_PLS_nc'+str(i))])**2,axis=0)) if Test is not None: Test_centered,Train_mean_vect=meancenter(Test,previous_mean=Train_mean_vect) y_pred=pls.predict(Test_centered['wvl']) Test.set_value(Test.index,('meta',ycol+'_PLS_nc'+str(i)),y_pred) pls_rmsep[i-1]=np.sqrt(np.mean(np.subtract(Test[('meta',ycol)],Test[('meta',ycol+'_PLS_nc'+str(i))])**2,axis=0)) if doplot==True: plot.figure() plot.title(ycol) plot.xlabel('# of components') plot.ylabel(ycol+' RMSE (wt.%)') plot.plot(range(1,nc+1),pls_rmsecv,label='RMSECV',color='r') plot.plot(range(1,nc+1),pls_rmsec,label='RMSEC',color='b') if Test is not None: plot.plot(range(1,nc+1),pls_rmsep,label='RMSEP',color='g') plot.legend(loc=0,fontsize=6) plot.savefig(outpath+'/'+plotfile,dpi=600) rmses={'RMSEC':pls_rmsec,'RMSECV':pls_rmsecv} if Test is not None: rmses['RMSEP']=pls_rmsep return rmses
'MultiTaskLassoCV':MultiTaskLassoCV(), 'MultinomialNB':MultinomialNB(), 'NMF':NMF(), 'NearestCentroid':NearestCentroid(), 'NearestNeighbors':NearestNeighbors(), 'Normalizer':Normalizer(), 'NuSVC':NuSVC(), 'NuSVR':NuSVR(), 'Nystroem':Nystroem(), 'OAS':OAS(), 'OneClassSVM':OneClassSVM(), 'OrthogonalMatchingPursuit':OrthogonalMatchingPursuit(), 'OrthogonalMatchingPursuitCV':OrthogonalMatchingPursuitCV(), 'PCA':PCA(), 'PLSCanonical':PLSCanonical(), 'PLSRegression':PLSRegression(), 'PLSSVD':PLSSVD(), 'PassiveAggressiveClassifier':PassiveAggressiveClassifier(), 'PassiveAggressiveRegressor':PassiveAggressiveRegressor(), 'Perceptron':Perceptron(), 'ProjectedGradientNMF':ProjectedGradientNMF(), 'QuadraticDiscriminantAnalysis':QuadraticDiscriminantAnalysis(), 'RANSACRegressor':RANSACRegressor(), 'RBFSampler':RBFSampler(), 'RadiusNeighborsClassifier':RadiusNeighborsClassifier(), 'RadiusNeighborsRegressor':RadiusNeighborsRegressor(), 'RandomForestClassifier':RandomForestClassifier(), 'RandomForestRegressor':RandomForestRegressor(), 'RandomizedLasso':RandomizedLasso(), 'RandomizedLogisticRegression':RandomizedLogisticRegression(), 'RandomizedPCA':RandomizedPCA(),
def __init__(self, method, yrange, params, i=0): #TODO: yrange doesn't currently do anything. Remove or do something with it! self.algorithm_list = ['PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge', 'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', ] self.method = method self.outliers = None self.ransac = False print(params) if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.OrthogonalMatchingPursuit(**params_temp) if self.method[i] == 'LASSO': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Lasso(**params_temp) if self.method[i] == 'Elastic Net': params_temp = copy.copy(params[i]) self.model = linear.ElasticNet(**params_temp) if self.method[i] == 'Ridge': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Ridge(**params_temp) if self.method[i] == 'BRR': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # create a temporary set of parameters params_temp = copy.copy(params[i]) self.model = linear.Lars(**params_temp) if self.method[i] == 'LASSO LARS': self.model = linear.LassoLars(**params) if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i]) if self.method[i] == 'GP': # get the method for dimensionality reduction and the number of components self.reduce_dim = params[i]['reduce_dim'] self.n_components = params[i]['n_components'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove parameters not accepted by Gaussian Process params_temp.pop('reduce_dim') params_temp.pop('n_components') self.model = GaussianProcess(**params_temp)
def __init__(self, method, yrange, params, i=0, ransacparams={}): self.method = method self.outliers = None self.inliers = None self.ransac = False self.yrange = yrange[i] if self.method[i] == 'PLS': self.model = PLSRegression(**params[i]) if self.method[i] == 'OLS': self.model = linear.LinearRegression(**params[i]) if self.method[i] == 'OMP': #check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.OrthogonalMatchingPursuit(**params_temp) else: params_temp.pop('n_nonzero_coefs') self.model = linear.OrthogonalMatchingPursuitCV(**params_temp) if self.method[i] == 'Lasso': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.Lasso(**params_temp) else: params_temp.pop('alpha') self.model = linear.LassoCV(**params_temp) if self.method[i] == 'Elastic Net': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.ElasticNet(**params_temp) else: params_temp.pop('alpha') self.model = linear.ElasticNetCV(**params_temp) if self.method[i] == 'Ridge': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.Ridge(**params_temp) else: #Ridge requires a specific set of alphas to be provided... this needs more work to be implemented correctly self.model = linear.RidgeCV(**params_temp) if self.method[i] == 'Bayesian Ridge': self.model = linear.BayesianRidge(**params[i]) if self.method[i] == 'ARD': self.model = linear.ARDRegression(**params[i]) if self.method[i] == 'LARS': # check whether to do CV or not self.do_cv = params[i]['CV'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV parameter params_temp.pop('CV') if self.do_cv is False: self.model = linear.Lars(**params_temp) else: self.model = linear.LarsCV(**params_temp) if self.method[i] == 'Lasso LARS': # check whether to do CV or not self.do_cv = params[i]['CV'] # check whether to do IC or not self.do_ic = params[i]['IC'] # create a temporary set of parameters params_temp = copy.copy(params[i]) # Remove CV and IC parameter params_temp.pop('CV') params_temp.pop('IC') if self.do_cv is False and self.do_ic is False: self.model = linear.LassoLars(**params[i]) if self.do_cv is True and self.do_ic is False: self.model = linear.LassoLarsCV(**params[i]) if self.do_cv is False and self.do_ic is True: self.model = linear.LassoLarsIC(**params[i]) if self.do_cv is True and self.do_ic is True: print( "Can't use both cross validation AND information criterion to optimize!" ) if self.method[i] == 'SVR': self.model = svm.SVR(**params[i]) if self.method[i] == 'KRR': self.model = kernel_ridge.KernelRidge(**params[i]) if self.method[i] == 'GP': #get the method for dimensionality reduction and the number of components self.reduce_dim = params[i]['reduce_dim'] self.n_components = params[i]['n_components'] #create a temporary set of parameters params_temp = copy.copy(params[i]) #Remove parameters not accepted by Gaussian Process params_temp.pop('reduce_dim') params_temp.pop('n_components') self.model = GaussianProcess(**params_temp)