示例#1
0
class PLSRegressionImpl():
    def __init__(self,
                 n_components=2,
                 scale=True,
                 max_iter=500,
                 tol=1e-06,
                 copy=True):
        self._hyperparams = {
            'n_components': n_components,
            'scale': scale,
            'max_iter': max_iter,
            'tol': tol,
            'copy': copy
        }

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def transform(self, X):
        return self._sklearn_model.transform(X)

    def predict(self, X):
        return self._sklearn_model.predict(X)
示例#2
0
def pls_cv(Train,Test=None,nc=20,nfolds=5,ycol='SiO2',doplot=True,outpath='.',plotfile='pls_cv.png'):
    #create empty arrays for the RMSE values    
    pls_rmsecv=np.empty(nc)
    pls_rmsec=np.empty(nc)
    #If there is a test set provided, create the RMSEP array to hold test set errors
    if Test is not None:
        pls_rmsep=np.empty(nc)
        

    #loop through each number of components
    for i in range(1,nc+1):
        print('nc='+str(i))
        Train[('meta',ycol+'_cv_PLS_nc'+str(i))]=0 #create a column to hold the PLS cross validation results for this nc
        Train[('meta',ycol+'_PLS_nc'+str(i))]=0 #create a column to hold the PLS training set results for this nc
        if Test is not None:
            Test[('meta',ycol+'_PLS_nc'+str(i))]=0 #create a column to hold the PLS test set results for this nc
        
        #Do the cross validation
        cv_iterator=LeaveOneLabelOut(Train[('meta','Folds')]) #create the iterator for cross validation within the training data
        
        for train,holdout in cv_iterator:  #Iterate through each of the folds in the training set
            cv_train=Train.iloc[train]
            cv_holdout=Train.iloc[holdout]
            
            #Do PLS for this number of components
            cv_train_centered,cv_train_mean_vect=meancenter(cv_train) #mean center training data
            cv_holdout_centered,cv_holdout_mean_vect=meancenter(cv_holdout,previous_mean=cv_train_mean_vect) #apply same mean centering to holdout data           
            pls=PLSRegression(n_components=i,scale=False)
            pls.fit(cv_train_centered['wvl'],cv_train_centered['meta'][ycol])
            y_pred_holdout=pls.predict(cv_holdout_centered['wvl'])
            Train.set_value(Train.index[holdout],('meta',ycol+'_cv_PLS_nc'+str(i)),y_pred_holdout)
 
        pls_rmsecv[i-1]=np.sqrt(np.mean(np.subtract(Train[('meta',ycol)],Train[('meta',ycol+'_cv_PLS_nc'+str(i))])**2,axis=0))
       
        #Do train and test set PLS predictions for this number of components
        Train_centered,Train_mean_vect=meancenter(Train)
        pls=PLSRegression(n_components=i,scale=False)
        pls.fit(Train_centered['wvl'],Train_centered['meta'][ycol])
        
        y_pred=pls.predict(Train_centered['wvl'])
        Train.set_value(Train.index,('meta',ycol+'_PLS_nc'+str(i)),y_pred)    
        pls_rmsec[i-1]=np.sqrt(np.mean(np.subtract(Train[('meta',ycol)],Train[('meta',ycol+'_PLS_nc'+str(i))])**2,axis=0))

        if Test is not None:
            Test_centered,Train_mean_vect=meancenter(Test,previous_mean=Train_mean_vect)
            y_pred=pls.predict(Test_centered['wvl'])
            Test.set_value(Test.index,('meta',ycol+'_PLS_nc'+str(i)),y_pred)    
            pls_rmsep[i-1]=np.sqrt(np.mean(np.subtract(Test[('meta',ycol)],Test[('meta',ycol+'_PLS_nc'+str(i))])**2,axis=0))


               
    if doplot==True:
        plot.figure()
        plot.title(ycol)   
        plot.xlabel('# of components')
        plot.ylabel(ycol+' RMSE (wt.%)')
        plot.plot(range(1,nc+1),pls_rmsecv,label='RMSECV',color='r')
        plot.plot(range(1,nc+1),pls_rmsec,label='RMSEC',color='b')
        if Test is not None:
            plot.plot(range(1,nc+1),pls_rmsep,label='RMSEP',color='g')
        plot.legend(loc=0,fontsize=6)    
        plot.savefig(outpath+'/'+plotfile,dpi=600)
        
    rmses={'RMSEC':pls_rmsec,'RMSECV':pls_rmsecv}
    if Test is not None:
        rmses['RMSEP']=pls_rmsep
    return rmses
示例#3
0
def pls_cv(Train,Test=None,nc=20,nfolds=5,ycol='SiO2',doplot=True,outpath='.',plotfile='pls_cv.png'):
    #create empty arrays for the RMSE values    
    pls_rmsecv=np.empty(nc)
    pls_rmsec=np.empty(nc)
    #If there is a test set provided, create the RMSEP array to hold test set errors
    if Test is not None:
        pls_rmsep=np.empty(nc)
        

    #loop through each number of components
    for i in range(1,nc+1):
        print('nc='+str(i))
        Train[('meta',ycol+'_cv_PLS_nc'+str(i))]=0 #create a column to hold the PLS cross validation results for this nc
        Train[('meta',ycol+'_PLS_nc'+str(i))]=0 #create a column to hold the PLS training set results for this nc
        if Test is not None:
            Test[('meta',ycol+'_PLS_nc'+str(i))]=0 #create a column to hold the PLS test set results for this nc
        
        #Do the cross validation
        cv_iterator=LeaveOneLabelOut(Train[('meta','Folds')]) #create the iterator for cross validation within the training data
        
        for train,holdout in cv_iterator:  #Iterate through each of the folds in the training set
            cv_train=Train.iloc[train]
            cv_holdout=Train.iloc[holdout]
            
            #Do PLS for this number of components
            cv_train_centered,cv_train_mean_vect=meancenter(cv_train) #mean center training data
            cv_holdout_centered,cv_holdout_mean_vect=meancenter(cv_holdout,previous_mean=cv_train_mean_vect) #apply same mean centering to holdout data           
            pls=PLSRegression(n_components=i,scale=False)
            pls.fit(cv_train_centered['wvl'],cv_train_centered['meta'][ycol])
            y_pred_holdout=pls.predict(cv_holdout_centered['wvl'])
            Train.set_value(Train.index[holdout],('meta',ycol+'_cv_PLS_nc'+str(i)),y_pred_holdout)
 
        pls_rmsecv[i-1]=np.sqrt(np.mean(np.subtract(Train[('meta',ycol)],Train[('meta',ycol+'_cv_PLS_nc'+str(i))])**2,axis=0))
       
        #Do train and test set PLS predictions for this number of components
        Train_centered,Train_mean_vect=meancenter(Train)
        pls=PLSRegression(n_components=i,scale=False)
        pls.fit(Train_centered['wvl'],Train_centered['meta'][ycol])
        
        y_pred=pls.predict(Train_centered['wvl'])
        Train.set_value(Train.index,('meta',ycol+'_PLS_nc'+str(i)),y_pred)    
        pls_rmsec[i-1]=np.sqrt(np.mean(np.subtract(Train[('meta',ycol)],Train[('meta',ycol+'_PLS_nc'+str(i))])**2,axis=0))

        if Test is not None:
            Test_centered,Train_mean_vect=meancenter(Test,previous_mean=Train_mean_vect)
            y_pred=pls.predict(Test_centered['wvl'])
            Test.set_value(Test.index,('meta',ycol+'_PLS_nc'+str(i)),y_pred)    
            pls_rmsep[i-1]=np.sqrt(np.mean(np.subtract(Test[('meta',ycol)],Test[('meta',ycol+'_PLS_nc'+str(i))])**2,axis=0))


               
    if doplot==True:
        plot.figure()
        plot.title(ycol)   
        plot.xlabel('# of components')
        plot.ylabel(ycol+' RMSE (wt.%)')
        plot.plot(range(1,nc+1),pls_rmsecv,label='RMSECV',color='r')
        plot.plot(range(1,nc+1),pls_rmsec,label='RMSEC',color='b')
        if Test is not None:
            plot.plot(range(1,nc+1),pls_rmsep,label='RMSEP',color='g')
        plot.legend(loc=0,fontsize=6)    
        plot.savefig(outpath+'/'+plotfile,dpi=600)
        
    rmses={'RMSEC':pls_rmsec,'RMSECV':pls_rmsecv}
    if Test is not None:
        rmses['RMSEP']=pls_rmsep
    return rmses