示例#1
0
 def predict(self,x):
     #x is a list of data frames to feed into each submodel. 
     #This allows different normalizations to be used with each submodel
     predictions=[]
     for i,k in enumerate(self.submodels):
         xtemp=x[i].xs('wvl',axis=1,level=0,drop_level=False)
         xtemp,mean_vect=meancenter(xtemp,'wvl',previous_mean=self.mean_vects[i])
         predictions.append(k.predict(xtemp['wvl']))
     return predictions
示例#2
0
 def predict(self, x):
     #x is a list of data frames to feed into each submodel.
     #This allows different normalizations to be used with each submodel
     predictions = []
     for i, k in enumerate(self.submodels):
         xtemp = x[i].xs('wvl', axis=1, level=0, drop_level=False)
         xtemp, mean_vect = meancenter(xtemp,
                                       'wvl',
                                       previous_mean=self.mean_vects[i])
         predictions.append(k.predict(xtemp['wvl']))
     return predictions
示例#3
0
    def fit(self, trainsets, ranges, ncs, ycol, figpath=None):
        self.ranges = ranges
        self.ncs = ncs
        self.ycol = ycol
        submodels = []
        mean_vects = []
        for i, rangei in enumerate(ranges):
            data_tmp = within_range.within_range(trainsets[i], rangei, ycol)
            x = data_tmp.xs('wvl', axis=1, level=0, drop_level=False)
            y = data_tmp['meta'][ycol]
            x_centered, x_mean_vect = meancenter(
                x, 'wvl')  #mean center training data
            pls = PLSRegression(n_components=ncs[i], scale=False)
            pls.fit(x, y)
            submodels.append(pls)
            mean_vects.append(x_mean_vect)
            if figpath is not None:
                #calculate spectral residuals
                E = x_centered - np.dot(pls.x_scores_,
                                        pls.x_loadings_.transpose())
                Q_res = np.dot(E, E.transpose()).diagonal()
                #calculate leverage
                T = pls.x_scores_
                leverage = np.diag(
                    T @ np.linalg.inv(T.transpose() @ T) @ T.transpose())

                plot.figure()
                plot.scatter(leverage, Q_res, color='r', edgecolor='k')
                plot.title(ycol + ' (' + str(rangei[0]) + '-' +
                           str(rangei[1]) + ')')
                plot.xlabel('Leverage')
                plot.ylabel('Q')
                plot.ylim([0, 1.1 * np.max(Q_res)])
                plot.xlim([0, 1.1 * np.max(leverage)])

                plot.savefig(figpath + '/' + ycol + '_' + str(rangei[0]) +
                             '-' + str(rangei[1]) + 'Qres_vs_Leverage.png',
                             dpi=600)
                self.leverage = leverage
                self.Q_res = Q_res
            self.submodels = submodels
            self.mean_vects = mean_vects
示例#4
0
 def fit(self,trainsets,ranges,ncs,ycol,figpath=None):
     self.ranges=ranges
     self.ncs=ncs        
     self.ycol=ycol
     submodels=[]    
     mean_vects=[]
     for i,rangei in enumerate(ranges):
         data_tmp=within_range.within_range(trainsets[i],rangei,ycol)
         x=data_tmp.xs('wvl',axis=1,level=0,drop_level=False)
         y=data_tmp['meta'][ycol]
         x_centered,x_mean_vect=meancenter(x,'wvl') #mean center training data
         pls=PLSRegression(n_components=ncs[i],scale=False)
         pls.fit(x,y)
         submodels.append(pls)
         mean_vects.append(x_mean_vect)
         if figpath is not None:
             #calculate spectral residuals
             E=x_centered-np.dot(pls.x_scores_,pls.x_loadings_.transpose())
             Q_res=np.dot(E,E.transpose()).diagonal()
             #calculate leverage                
             T=pls.x_scores_
             leverage=np.diag([email protected](T.transpose()@T)@T.transpose())
             
             plot.figure()
             plot.scatter(leverage,Q_res,color='r',edgecolor='k')
             plot.title(ycol+' ('+str(rangei[0])+'-'+str(rangei[1])+')')
             plot.xlabel('Leverage')
             plot.ylabel('Q')
             plot.ylim([0,1.1*np.max(Q_res)])
             plot.xlim([0,1.1*np.max(leverage)])
                 
             plot.savefig(figpath+'/'+ycol+'_'+str(rangei[0])+'-'+str(rangei[1])+'Qres_vs_Leverage.png',dpi=600)
             self.leverage=leverage
             self.Q_res=Q_res
         self.submodels=submodels
         self.mean_vects=mean_vects
示例#5
0
def pls_cv(Train,Test=None,nc=20,nfolds=5,ycol='SiO2',doplot=True,outpath='.',plotfile='pls_cv.png'):
    #create empty arrays for the RMSE values    
    pls_rmsecv=np.empty(nc)
    pls_rmsec=np.empty(nc)
    #If there is a test set provided, create the RMSEP array to hold test set errors
    if Test is not None:
        pls_rmsep=np.empty(nc)
        

    #loop through each number of components
    for i in range(1,nc+1):
        print('nc='+str(i))
        Train[('meta',ycol+'_cv_PLS_nc'+str(i))]=0 #create a column to hold the PLS cross validation results for this nc
        Train[('meta',ycol+'_PLS_nc'+str(i))]=0 #create a column to hold the PLS training set results for this nc
        if Test is not None:
            Test[('meta',ycol+'_PLS_nc'+str(i))]=0 #create a column to hold the PLS test set results for this nc
        
        #Do the cross validation
        cv_iterator=LeaveOneLabelOut(Train[('meta','Folds')]) #create the iterator for cross validation within the training data
        
        for train,holdout in cv_iterator:  #Iterate through each of the folds in the training set
            cv_train=Train.iloc[train]
            cv_holdout=Train.iloc[holdout]
            
            #Do PLS for this number of components
            cv_train_centered,cv_train_mean_vect=meancenter(cv_train) #mean center training data
            cv_holdout_centered,cv_holdout_mean_vect=meancenter(cv_holdout,previous_mean=cv_train_mean_vect) #apply same mean centering to holdout data           
            pls=PLSRegression(n_components=i,scale=False)
            pls.fit(cv_train_centered['wvl'],cv_train_centered['meta'][ycol])
            y_pred_holdout=pls.predict(cv_holdout_centered['wvl'])
            Train.set_value(Train.index[holdout],('meta',ycol+'_cv_PLS_nc'+str(i)),y_pred_holdout)
 
        pls_rmsecv[i-1]=np.sqrt(np.mean(np.subtract(Train[('meta',ycol)],Train[('meta',ycol+'_cv_PLS_nc'+str(i))])**2,axis=0))
       
        #Do train and test set PLS predictions for this number of components
        Train_centered,Train_mean_vect=meancenter(Train)
        pls=PLSRegression(n_components=i,scale=False)
        pls.fit(Train_centered['wvl'],Train_centered['meta'][ycol])
        
        y_pred=pls.predict(Train_centered['wvl'])
        Train.set_value(Train.index,('meta',ycol+'_PLS_nc'+str(i)),y_pred)    
        pls_rmsec[i-1]=np.sqrt(np.mean(np.subtract(Train[('meta',ycol)],Train[('meta',ycol+'_PLS_nc'+str(i))])**2,axis=0))

        if Test is not None:
            Test_centered,Train_mean_vect=meancenter(Test,previous_mean=Train_mean_vect)
            y_pred=pls.predict(Test_centered['wvl'])
            Test.set_value(Test.index,('meta',ycol+'_PLS_nc'+str(i)),y_pred)    
            pls_rmsep[i-1]=np.sqrt(np.mean(np.subtract(Test[('meta',ycol)],Test[('meta',ycol+'_PLS_nc'+str(i))])**2,axis=0))


               
    if doplot==True:
        plot.figure()
        plot.title(ycol)   
        plot.xlabel('# of components')
        plot.ylabel(ycol+' RMSE (wt.%)')
        plot.plot(range(1,nc+1),pls_rmsecv,label='RMSECV',color='r')
        plot.plot(range(1,nc+1),pls_rmsec,label='RMSEC',color='b')
        if Test is not None:
            plot.plot(range(1,nc+1),pls_rmsep,label='RMSEP',color='g')
        plot.legend(loc=0,fontsize=6)    
        plot.savefig(outpath+'/'+plotfile,dpi=600)
        
    rmses={'RMSEC':pls_rmsec,'RMSECV':pls_rmsecv}
    if Test is not None:
        rmses['RMSEP']=pls_rmsep
    return rmses