def blend_predict(data,wvl,filelist,blendranges,inrange,refpredict,toblend,masterlist,name_subs,ranges,ncs,maskfile,filenames,outputstr): y_full,fullnorm=ccam.pls_predict(data,ncs['full'],wvl,maskfile,loadfile=filenames['loadfile']['full'],mean_file=filenames['means_file']['full']) y_low,lownorm=ccam.pls_predict(data,ncs['low'],wvl,maskfile,loadfile=filenames['loadfile']['low'],mean_file=filenames['means_file']['low']) y_mid,midnorm=ccam.pls_predict(data,ncs['mid'],wvl,maskfile,loadfile=filenames['loadfile']['mid'],mean_file=filenames['means_file']['mid']) y_high,highnorm=ccam.pls_predict(data,ncs['high'],wvl,maskfile,loadfile=filenames['loadfile']['high'],mean_file=filenames['means_file']['high']) predicts=[y_full,y_low,y_mid,y_high] blended=ccam.submodels_blend(predicts,blendranges,inrange,refpredict,toblend,overwrite=False,noneg=False) targetlist,targetdists,targetamps,nshots=ccam.target_lookup(filelist,masterlist,name_subs) y_combined=numpy.zeros_like(y_high) print('Writing results to'+filenames['pred_csv_out'][outputstr]) with open(filenames['pred_csv_out'][outputstr],'w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['','','','','Full ('+str(ranges['full'][0])+'-'+str(ranges['full'][1])+')','Low ('+str(ranges['low'][0])+'-'+str(ranges['low'][1])+')','Mid ('+str(ranges['mid'][0])+'-'+str(ranges['mid'][1])+')','High ('+str(ranges['high'][0])+'-'+str(ranges['high'][1])+')','Blended'] writer.writerow(row) row=['','','','Norm=',fullnorm,lownorm,midnorm,highnorm] writer.writerow(row) row=['','','','nc=',str(ncs['full']),str(ncs['low']),str(ncs['mid']),str(ncs['high'])] writer.writerow(row) row=['File','Target','Distance','Power',which_elem,which_elem,which_elem,which_elem,which_elem] writer.writerow(row) for i in range(0,len(y_combined)): row=[filelist[i],targetlist[i],targetdists[i],targetamps[i],y_full[i],y_low[i],y_mid[i],y_high[i],blended[i]] writer.writerow(row)
def read_single_ccs( filename, skiprows=0, shots=False, masterlist=None, name_sub_file=None): #,minsol=0,maxsol=10000,masterlist=None): filetrim = filename[-40] if shots is True: file_targets, file_dists, file_amps, nshots = ccam.target_lookup( filename, masterlist, name_sub_file) nshots = numpy.array(nshots, dtype='int') sum_shots = numpy.sum(nshots) if shots is not True: means = numpy.zeros([6144], dtype='float64') if shots is True: singleshots = numpy.zeros([6144, sum_shots], dtype='float64') files_singleshot = numpy.zeros_like([filetrim[0]] * sum_shots) shotnums = numpy.zeros([sum_shots]) rowcount = 0 tempdata = ccam.read_csv(filename, skiprows, labelrow=False) wvl = numpy.array(tempdata[:, 0], dtype='float') if shots is False: means = tempdata[:, -1] if shots is True: shotnums[rowcount:rowcount + nshots] = range(nshots) files_singleshot[rowcount:rowcount + nshots] = filetrim singleshots[:, rowcount:rowcount + nshots] = tempdata[:, 1:-2] rowcount = rowcount + nshots if shots is True: singleshots = numpy.transpose(singleshots) return singleshots, wvl, files_singleshot, shotnums if shots is False: return means, wvl, filetrim
y_high, highnorm = ccam.pls_predict(data, nc_high, wvl, maskfile, loadfile=loadfile_high, mean_file=means_file_high) predicts = [y_full, y_low, y_mid, y_high] blended = ccam.submodels_blend(predicts, ranges, inrange, refpredict, toblend, overwrite=False) targetlist, targetdists, targetamps = ccam.target_lookup( filelist, masterlist, name_subs) y_combined = numpy.zeros_like(y_high) print 'Writing results' with open(outputfile_apxs, 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') row = [ '', '', '', '', 'Full', 'Low (' + str(lowmin) + '-' + str(lowmax) + ')', 'Mid (' + str(midmin) + '-' + str(midmax) + ')', 'High (' + str(highmin) + '-' + str(highmax) + ')', 'Blended' ] writer.writerow(row) row = ['', '', '', 'Norm=', fullnorm, lownorm, midnorm, highnorm] writer.writerow(row) row = [
def read_ccs(searchdir, skiprows=15, shots=False, masterlist=None, name_sub_file=None): #,minsol=0,maxsol=10000,masterlist=None): searchstring = '*CCS*csv' #Recursively search for CCS files in the specified directory filelist = [] for root, dirnames, filenames in os.walk(searchdir): for filename in fnmatch.filter(filenames, searchstring): filelist.append(os.path.join(root, filename)) filelist = numpy.array(filelist) #Remove duplicates files = numpy.zeros_like(filelist) sclocks = numpy.zeros_like(filelist) fileversion = numpy.zeros(len(filelist), dtype='int') for i in range(len(filelist)): files[i] = filelist[i][-40:] sclocks[i] = filelist[i][-36:-27] fileversion[i] = filelist[i][-5:-4] keep = numpy.zeros(len(files), dtype='bool') for i in range(len(files)): sclock_match = numpy.in1d(sclocks, sclocks[i]) maxversion = max(fileversion[sclock_match]) if fileversion[i] == maxversion: keep[i] = True files = files[keep] filelist = filelist[keep] sclocks = sclocks[keep] files, unique_index = numpy.unique(files, return_index=True) filelist = filelist[unique_index] sclocks = sclocks[unique_index] if shots is True: file_targets, file_dists, file_amps, nshots = ccam.target_lookup( filelist, masterlist, name_sub_file) nshots = numpy.array(nshots, dtype='int') sum_shots = numpy.sum(nshots) print('Reading ' + str(len(filelist)) + ' files...') if shots is not True: means = numpy.zeros([len(filelist), 6144], dtype='float64') if shots is True: singleshots = numpy.zeros([6144, sum_shots], dtype='float64') files_singleshot = numpy.zeros_like([files[0]] * sum_shots) shotnums = numpy.zeros([sum_shots]) rowcount = 0 for i in range(len(filelist)): if numpy.mod(i + 1, 100) == 0: print('Reading file #' + str(i + 1)) tempdata, templabels = ccam.read_csv(filelist[i], skiprows, labelrow=False) wvl = numpy.array(tempdata[:, 0], dtype='float') if shots is False: means[i, :] = tempdata[:, -1] if shots is True: shotnums[rowcount:rowcount + nshots[i]] = range(nshots[i]) files_singleshot[rowcount:rowcount + nshots[i]] = files[i] singleshots[:, rowcount:rowcount + nshots[i]] = tempdata[:, 1:-2] rowcount = rowcount + nshots[i] # if i==0: # wvl=numpy.array(tempdata[:,0],dtype='float64') # if shots is True: # singleshots=numpy.array(tempdata[:,1:-2],dtype='float64') # shotnums=numpy.array(range(len(tempdata[:,1:-2]))) # files_singleshot=numpy.array([files[i]]*len(tempdata[:,1:-2])) # medians=numpy.array(tempdata[:,-2],dtype='float64') # means=numpy.array(tempdata[:,-1],dtype='float64') # # if i>0: # if shots is True: # singleshots=numpy.vstack([singleshots,numpy.array(tempdata[:,1:-2],dtype='float64')]) # shotnums=numpy.hstack([shotnums,numpy.array(range(len(tempdata[:,1:-2])))]) # files_singleshot=numpy.hstack([files_singleshot,numpy.array([files[i]]*len(tempdata[:,1:-2]))]) # # medians=numpy.vstack([medians,numpy.array(tempdata[:,-2],dtype='float64')]) # means=numpy.vstack([means,numpy.array(tempdata[:,-1],dtype='float64')]) if shots is True: singleshots = numpy.transpose(singleshots) return singleshots, wvl, files_singleshot, shotnums if shots is False: return means, wvl, files
def calc_comp(self): #Choose whether to do single shots self.shots = self.myWidget.singleshots_checkbox.isChecked() filelist, files = ccam.search_ccs(self.searchdir) self.myWidget.progressBar.setMaximum(len(filelist)) targets, dists, amps, nshots = ccam.target_lookup( filelist, self.masterlist, self.name_sub_file) nshots = numpy.array(nshots, dtype='int') #Loop through each file in the file list for i in range(0, len(filelist)): app.processEvents() self.myWidget.progressBar.setValue(i) print filelist[i] if self.shots is True: #print 'Single shots' singleshots, wvl, filename, shotnum = ccam.read_ccs( filelist[i], skiprows=0, shots=self.shots, masterlist=self.masterlist, name_sub_file=self.name_sub_file, singlefile=True) singleshots_masked, wvl_masked = ccam.mask( singleshots, wvl, self.maskfile) self.spectra_masked_norm1 = ccam.normalize(singleshots_masked, wvl_masked, normtype=1) self.spectra_masked_norm3 = ccam.normalize(singleshots_masked, wvl_masked, normtype=3) if self.shots is False: #print 'Means' nshots[i] = 1 meanspect, wvl, filename = ccam.read_ccs( filelist[i], skiprows=0, shots=self.shots, masterlist=self.masterlist, name_sub_file=self.name_sub_file, singlefile=True) meanspect_masked, wvl_masked = ccam.mask( meanspect, wvl, self.maskfile) self.spectra_masked_norm1 = ccam.normalize(meanspect_masked, wvl_masked, normtype=1) self.spectra_masked_norm3 = ccam.normalize(meanspect_masked, wvl_masked, normtype=3) comps_temp = self.pls_submodels(nshots[i]) if i == 0: comps_all = comps_temp filename_all = filename if self.shots is True: shotnum_all = shotnum targets_all = numpy.tile(targets[i], nshots[i]) dists_all = numpy.tile(dists[i], nshots[i]) amps_all = numpy.tile(amps[i], nshots[i]) else: comps_all = [ numpy.vstack([comps_all[0], comps_temp[0]]), numpy.vstack([comps_all[1], comps_temp[1]]), numpy.vstack([comps_all[2], comps_temp[2]]), numpy.vstack([comps_all[3], comps_temp[3]]) ] filename_all = numpy.hstack([filename_all, filename]) if self.shots is True: shotnum_all = numpy.hstack([shotnum_all, shotnum]) targets_all = numpy.hstack( [targets_all, numpy.tile(targets[i], nshots[i])]) dists_all = numpy.hstack( [dists_all, numpy.tile(dists[i], nshots[i])]) amps_all = numpy.hstack( [amps_all, numpy.tile(amps[i], nshots[i])]) blended_all = self.pls_blend(comps_all) self.myWidget.progressBar.setValue(len(filelist)) if self.shots is False: shotnum_all = 'placeholder' targets_all = targets dists_all = dists amps_all = amps self.write_results(blended_all, shotnum_all, targets_all, dists_all, amps_all, filename_all)
def pls_cal(dbfile,maskfile,outpath,which_elem,testfold,nc,normtype=1,mincomp=0,maxcomp=100,plstype='mlpy',keepfile=None,removefile=None,cal_dir=None,masterlist_file=None,compfile=None,name_sub_file=None,foldfile=None,nfolds=7,seed=None,n_bag=None,skscale=False,n_boost=None,max_samples=0.1,n_elems=9): plstype_string=plstype if n_bag!=None: plstype_string=plstype+'_bag' if n_boost!=None: plstype_string=plstype+'_boost' if skscale==True: plstype_string=plstype+'_scale' print('Reading database') sys.stdout.flush() spectra,comps,spect_index,names,labels,wvl=ccam.read_db(dbfile,compcheck=True,n_elems=n_elems) oxides=labels[2:] compindex=numpy.where(oxides==which_elem)[0] print('Choosing spectra') which_removed=outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_removed.csv' spectra,names,spect_index,comps=ccam.choose_spectra(spectra,spect_index,names,comps,compindex,mincomp=mincomp,maxcomp=maxcomp,keepfile=keepfile,removefile=removefile,which_removed=which_removed) print('Masking spectra') spectra,wvl=ccam.mask(spectra,wvl,maskfile) print('Normalizing spectra') spectra=ccam.normalize(spectra,wvl,normtype=normtype) print('Assigning Folds') if foldfile!=None: #if a fold file is specified, use it folds=ccam.folds(foldfile,names) else: #otherwise, define random folds folds=ccam.random_folds(names,nfolds,seed=seed) names_nofold=names[(folds==0)] spect_index_nofold=spect_index[(folds==0)] #write a file containing the samples not assigned to folds with open(which_removed,'ab') as writefile: writer=csv.writer(writefile,delimiter=',',) for i in range(len(names_nofold)): writer.writerow([names_nofold[i],spect_index_nofold[i],'No Fold']) #remove spectra that are not assigned to any fold spectra=spectra[(folds!=0),:] spect_index=spect_index[(folds!=0)] names=names[(folds!=0)] comps=comps[(folds!=0),:] folds=folds[(folds!=0)] print('Defining Training and Test Sets') spectra_train=spectra[(folds!=testfold)] spect_index_train=spect_index[(folds!=testfold)] names_train=names[(folds!=testfold)] comps_train=comps[(folds!=testfold),compindex] folds_train=folds[(folds!=testfold)] folds_train_unique=numpy.unique(folds_train) spectra_test=spectra[(folds==testfold)] spect_index_test=spect_index[(folds==testfold)] names_test=names[(folds==testfold)] comps_test=comps[(folds==testfold),compindex] folds_test=folds[(folds==testfold)] print('Do Leave One Label Out (LOLO) cross validation with all folds but the test set') #define array to hold cross validation predictions and RMSEs train_predict_cv=numpy.zeros((len(names_train),nc)) RMSECV=numpy.zeros(nc) for i in folds_train_unique: print('Holding out fold #'+str(i)) if skscale==False: #mean center those spectra left in #X_cv_in1,X_cv_in_mean1=meancenter.ccam_meancenter(spectra_train[(folds_train!=i),:]) X_cv_in,X_cv_in_mean=ccam.meancenter(spectra_train[(folds_train!=i),:]) #and those left out X_cv_out=ccam.meancenter(spectra_train[(folds_train==i),:],X_mean=X_cv_in_mean)[0] #mean center compositions left in Y_cv_in,Y_cv_in_mean=ccam.meancenter(comps_train[(folds_train!=i)]) if skscale==True: X_cv_in=spectra_train[(folds_train!=i),:] X_cv_out=spectra_train[(folds_train==i),:] Y_cv_in=comps_train[(folds_train!=i)] Y_cv_in_mean=0 #step through each number of components for j in range(1,nc+1): print('Training Model for '+str(j)+' components') #train the model if plstype=='mlpy': PLS1model=ccam.mlpy_pls.PLS(j) PLS1model.learn(X_cv_in,Y_cv_in) #predict the samples held out train_predict_cv[(folds_train==i),j-1]=PLS1model.pred(X_cv_out)+Y_cv_in_mean if plstype=='sklearn': PLS1model=PLSRegression(n_components=j,scale=skscale) if n_bag==None and n_boost==None: PLS1model.fit(X_cv_in,Y_cv_in) train_predict_cv[(folds_train==i),j-1]=numpy.squeeze(PLS1model.predict(X_cv_out)+Y_cv_in_mean) if n_bag!=None: PLS1bagged=ensemble.BaggingRegressor(PLS1model,n_estimators=n_bag,max_samples=max_samples,verbose=1) PLS1bagged.fit(X_cv_in,Y_cv_in) train_predict_cv[(folds_train==i),j-1]=numpy.squeeze(PLS1bagged.predict(X_cv_out)+Y_cv_in_mean) if n_boost!=None: PLS1boosted=ensemble.AdaBoostRegressor(PLS1model,n_estimators=n_boost) PLS1boosted.fit(X_cv_in,Y_cv_in) train_predict_cv[(folds_train==i),j-1]=numpy.squeeze(PLS1boosted.predict(X_cv_out)+Y_cv_in_mean) #calculate RMSECV for i in range(0,nc): sqerr=(train_predict_cv[:,i]-comps_train)**2.0 RMSECV[i]=numpy.sqrt(numpy.mean(sqerr)) #mean center full model if skscale==False: X,X_mean=ccam.meancenter(spectra_train) X_test=ccam.meancenter(spectra_test,X_mean=X_mean)[0] X_all=ccam.meancenter(spectra,X_mean=X_mean)[0] Y,Y_mean=ccam.meancenter(comps_train) if skscale==True: X=spectra_train X_test=spectra_test X_all=spectra Y=comps_train Y_mean=0 #create arrays for results and RMSEs trainset_results=numpy.zeros((len(names_train),nc)) testset_results=numpy.zeros((len(names_test),nc)) results=numpy.zeros((len(names),nc)) RMSEP=numpy.zeros(nc) RMSEC=numpy.zeros(nc) beta=numpy.zeros((len(X[0,:]),nc)) Q_res=numpy.zeros((len(X[:,0]),nc)) T2=numpy.zeros((len(X[:,0]),nc)) [a,evals,b]=numpy.linalg.svd(numpy.cov(numpy.dot(X,X.transpose()))) evals=numpy.diag(evals**2) if cal_dir!=None: print('Reading cal target data') cal_data,cal_wvl,cal_filelist=ccam.read_ccs(cal_dir) cal_data,cal_wvl=ccam.mask(cal_data,cal_wvl,maskfile) cal_data=ccam.normalize(cal_data,cal_wvl,normtype=normtype) if skscale==True: cal_data_centered=cal_data if skscale==False: cal_data_centered=ccam.meancenter(cal_data,X_mean=X_mean)[0] RMSEP_cal=numpy.zeros(nc) RMSEP_cal_good=numpy.zeros(nc) RMSEP_KGAMEDS=numpy.zeros(nc) RMSEP_MACUSANITE=numpy.zeros(nc) RMSEP_NAU2HIS=numpy.zeros(nc) RMSEP_NAU2LOS=numpy.zeros(nc) RMSEP_NAU2MEDS=numpy.zeros(nc) RMSEP_NORITE=numpy.zeros(nc) RMSEP_PICRITE=numpy.zeros(nc) RMSEP_SHERGOTTITE=numpy.zeros(nc) targets,dists,amps,nshots=ccam.target_lookup(cal_filelist,masterlist_file,name_sub_file) target_comps=ccam.target_comp_lookup(targets,compfile,which_elem) cal_results=numpy.zeros((len(targets),nc)) model_list=[] #Now step through each # of components with the full model for j in range(1,nc+1): print('Training full model for '+str(j)+' components') if plstype=='mlpy': PLS1model=ccam.mlpy_pls.PLS(j) PLS1model.learn(X,Y) beta[:,j-1]=PLS1model.beta() model_list.append([PLS1model]) trainset_results[:,j-1]=PLS1model.pred(X)+Y_mean testset_results[:,j-1]=PLS1model.pred(X_test)+Y_mean results[:,j-1]=PLS1model.pred(X_all)+Y_mean if cal_dir != None: comps_copy=copy.copy(target_comps) # if skscale==True: # cal_results[:,j-1]=PLS1model.pred(cal_data) # if skscale==False: cal_results[:,j-1]=PLS1model.pred(cal_data_centered)+Y_mean RMSEP_KGAMEDS[j-1],RMSEP_MACUSANITE[j-1],RMSEP_NAU2HIS[j-1],RMSEP_NAU2LOS[j-1],RMSEP_NAU2MEDS[j-1],RMSEP_NORITE[j-1],RMSEP_PICRITE[j-1],RMSEP_SHERGOTTITE[j-1],RMSEP_cal_good[j-1]=cal_rmses(targets,nc,target_comps,j,cal_data_centered,Y_mean,mincomp,maxcomp,cal_results) if plstype=='sklearn': PLS1model=PLSRegression(n_components=j,scale=skscale) if n_bag==None and n_boost==None: PLS1model.fit(X,Y) T=PLS1model.x_scores_ #There's probably a more efficient way to calculate T2... for k in range(len(X[:,0])): T2[k,j-1]=numpy.dot(T[k,:],numpy.dot(numpy.linalg.inv(numpy.dot(T.transpose(),T)),T[k,:])) E=X-numpy.dot(PLS1model.x_scores_,PLS1model.x_loadings_.transpose()) Q_res[:,j-1]=numpy.dot(E,E.transpose()).diagonal() trainset_results[:,j-1]=numpy.squeeze(PLS1model.predict(X)+Y_mean) testset_results[:,j-1]=numpy.squeeze(PLS1model.predict(X_test)+Y_mean) results[:,j-1]=numpy.squeeze(PLS1model.predict(X_all)+Y_mean) beta[:,j-1]=numpy.squeeze(PLS1model.coefs) model_list.append([PLS1model]) if cal_dir != None: comps_copy=copy.copy(target_comps) cal_results[:,j-1]=numpy.squeeze(PLS1model.predict(cal_data_centered)+Y_mean) RMSEP_KGAMEDS[j-1],RMSEP_MACUSANITE[j-1],RMSEP_NAU2HIS[j-1],RMSEP_NAU2LOS[j-1],RMSEP_NAU2MEDS[j-1],RMSEP_NORITE[j-1],RMSEP_PICRITE[j-1],RMSEP_SHERGOTTITE[j-1],RMSEP_cal_good[j-1]=cal_rmses(targets,nc,target_comps,j,cal_data_centered,Y_mean,mincomp,maxcomp,cal_results) if n_bag!=None: PLS1bagged=ensemble.BaggingRegressor(PLS1model,n_estimators=n_bag,max_samples=max_samples,verbose=1) PLS1bagged.fit(X,Y) trainset_results[:,j-1]=numpy.squeeze(PLS1bagged.predict(X)+Y_mean) testset_results[:,j-1]=numpy.squeeze(PLS1bagged.predict(X_test)+Y_mean) results[:,j-1]=numpy.squeeze(PLS1bagged.predict(X_all)+Y_mean) beta[:,j-1]=None model_list.append([PLS1bagged]) if cal_dir != None: comps_copy=copy.copy(target_comps) cal_results[:,j-1]=numpy.squeeze(PLS1bagged.predict(cal_data_centered)+Y_mean) RMSEP_KGAMEDS[j-1],RMSEP_MACUSANITE[j-1],RMSEP_NAU2HIS[j-1],RMSEP_NAU2LOS[j-1],RMSEP_NAU2MEDS[j-1],RMSEP_NORITE[j-1],RMSEP_PICRITE[j-1],RMSEP_SHERGOTTITE[j-1],RMSEP_cal_good[j-1]=cal_rmses(targets,nc,target_comps,j,cal_data_centered,Y_mean,mincomp,maxcomp,cal_results) if n_boost!=None: PLS1boosted=ensemble.AdaBoostRegressor(PLS1model,n_estimators=n_boost) PLS1boosted.fit(X,Y) trainset_results[:,j-1]=numpy.squeeze(PLS1boosted.predict(X)+Y_mean) testset_results[:,j-1]=numpy.squeeze(PLS1boosted.predict(X_test)+Y_mean) results[:,j-1]=numpy.squeeze(PLS1boosted.predict(X_all)+Y_mean) beta[:,j-1]=None model_list.append([PLS1boosted]) if cal_dir != None: comps_copy=copy.copy(target_comps) cal_results[:,j-1]=numpy.squeeze(PLS1boosted.predict(cal_data_centered)+Y_mean) RMSEP_KGAMEDS[j-1],RMSEP_MACUSANITE[j-1],RMSEP_NAU2HIS[j-1],RMSEP_NAU2LOS[j-1],RMSEP_NAU2MEDS[j-1],RMSEP_NORITE[j-1],RMSEP_PICRITE[j-1],RMSEP_SHERGOTTITE[j-1],RMSEP_cal_good[j-1]=cal_rmses(targets,nc,target_comps,j,cal_data_centered,Y_mean,mincomp,maxcomp,cal_results) RMSEC[j-1]=numpy.sqrt(numpy.mean((trainset_results[:,j-1]-comps_train)**2.0)) RMSEP[j-1]=numpy.sqrt(numpy.mean((testset_results[:,j-1]-comps_test)**2.0)) with open(outpath+which_elem+'_'+plstype_string+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'.pkl','wb') as picklefile: pickle.dump(model_list,picklefile) #if cal_dir is specified, read cal target data and calculate RMSEs if cal_dir!=None: n_good_cal=numpy.sum(numpy.array([RMSEP_KGAMEDS,RMSEP_MACUSANITE,RMSEP_NAU2HIS,RMSEP_NAU2LOS,RMSEP_NAU2MEDS,RMSEP_NORITE,RMSEP_PICRITE,RMSEP_SHERGOTTITE])[:,0]!=0) print(n_good_cal) RMSEP_cal=(RMSEP_KGAMEDS+RMSEP_MACUSANITE+RMSEP_NAU2HIS+RMSEP_NAU2LOS+RMSEP_NAU2MEDS+RMSEP_NORITE+RMSEP_PICRITE+RMSEP_SHERGOTTITE)/n_good_cal RMSEP_single_cals=[RMSEP_KGAMEDS,RMSEP_MACUSANITE,RMSEP_NAU2HIS,RMSEP_NAU2LOS,RMSEP_NAU2MEDS,RMSEP_NORITE,RMSEP_PICRITE,RMSEP_SHERGOTTITE,RMSEP_cal] with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_caltargets_predict.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['File','Target','Laser Energy','True_Comp'] row.extend(list(range(1,nc+1))) writer.writerow(row) for i in range(0,len(targets)): row=[cal_filelist[i],targets[i],amps[i],target_comps[i]] row.extend(cal_results[i,:]) writer.writerow(row) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSEP_caltargets.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') writer.writerow(['NC','RMSEP Cal Targets (wt.%)']) for i in range(0,nc): writer.writerow([i+1,RMSEP_cal[i]]) ccam.RMSE(RMSECV,RMSEP,RMSEC,which_elem+' RMSEs',outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSE_plot_cal.png',RMSEP_cals=RMSEP_single_cals) ccam.RMSE(RMSECV,RMSEP,RMSEC,which_elem+' RMSEs',outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSE_plot_cal_good.png',RMSEP_good=RMSEP_cal_good) # plot RMSEs ccam.RMSE(RMSECV,RMSEP,RMSEC,which_elem+' RMSEs',outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSE_plot.png') #Write output info to files with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_Q_res.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=["Sample","Spectrum","Fold","True Comp"] row.extend(range(1,nc+1)) writer.writerow(row) for i in range(0,len(names_train)): row=[names_train[i],spect_index_train[i],folds_train[i],comps_train[i]] row.extend(Q_res[i,:]) writer.writerow(row) with open(outpath+which_elem+'_'+str(mincomp)+'-'+str(maxcomp)+'_quartiles.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=[which_elem] writer.writerow(row) row=['Min',numpy.percentile(comps[:,compindex],0)] writer.writerow(row) row=['1st Quartile',numpy.percentile(comps[:,compindex],25)] writer.writerow(row) row=['Median',numpy.percentile(comps[:,compindex],50)] writer.writerow(row) row=['3rd Quartile',numpy.percentile(comps[:,compindex],75)] writer.writerow(row) row=['Max',numpy.percentile(comps[:,compindex],100)] writer.writerow(row) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_HotellingT2.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=["Sample","Spectrum","Fold","True Comp"] row.extend(range(1,nc+1)) writer.writerow(row) for i in range(0,len(names_train)): row=[names_train[i],spect_index_train[i],folds_train[i],comps_train[i]] row.extend(T2[i,:]) writer.writerow(row) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSECV.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') writer.writerow(['NC','RMSECV (wt.%)']) for i in range(0,nc): writer.writerow([i+1,RMSECV[i]]) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSEC.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') writer.writerow(['NC','RMSEC (wt.%)']) for i in range(0,nc): writer.writerow([i+1,RMSEC[i]]) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_RMSEP.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') writer.writerow(['NC','RMSEP (wt.%)']) for i in range(0,nc): writer.writerow([i+1,RMSEP[i]]) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_cv_predict.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['Sample','Spectrum','Fold','True_Comp'] row.extend(range(1,nc+1)) writer.writerow(row) for i in range(0,len(names_train)): row=[names_train[i],spect_index_train[i],folds_train[i],comps_train[i]] row.extend(train_predict_cv[i,:]) writer.writerow(row) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_train_predict.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['Sample','Spectrum','Fold','True_Comp'] row.extend(range(1,nc+1)) writer.writerow(row) for i in range(0,len(names_train)): row=[names_train[i],spect_index_train[i],folds_train[i],comps_train[i]] row.extend(trainset_results[i,:]) writer.writerow(row) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_test_predict.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['Sample','Spectrum','Fold','True_Comp'] row.extend(range(1,nc+1)) writer.writerow(row) for i in range(0,len(names_test)): row=[names_test[i],spect_index_test[i],folds_test[i],comps_test[i]] row.extend(testset_results[i,:]) writer.writerow(row) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_all_predict.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['Sample','Spectrum','Fold','True_Comp'] row.extend(range(1,nc+1)) writer.writerow(row) for i in range(0,len(names)): row=[names[i],spect_index[i],folds[i],comps[i,compindex]] row.extend(results[i,:]) writer.writerow(row) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_beta_coeffs.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') row=['wvl'] row.extend(range(1,nc+1)) writer.writerow(row) for i in range(0,len(wvl)): row=[wvl[i]] row.extend(beta[i,:]) writer.writerow(row) if skscale==False: with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_meancenters.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') writer.writerow([which_elem+' mean',Y_mean]) for i in range(0,len(wvl)): row=[wvl[i],X_mean[i]] writer.writerow(row) with open(outpath+which_elem+'_'+plstype_string+'_nc'+str(nc)+'_norm'+str(normtype)+'_'+str(mincomp)+'-'+str(maxcomp)+'_inputinfo.csv','w',newline='') as writefile: writer=csv.writer(writefile,delimiter=',') writer.writerow(['Spectral database =',dbfile]) writer.writerow(['Spectra Kept =',keepfile]) writer.writerow(['Spectra Removed =',which_removed]) writer.writerow(['Fold Definition =',foldfile]) writer.writerow(['Test Fold =',maskfile]) writer.writerow(['Mask File =',maskfile]) writer.writerow(['Algorithm =',plstype_string]) writer.writerow(['# of components =',nc]) writer.writerow(['Normalization Type =',normtype]) writer.writerow(['Composition Min. =',mincomp]) writer.writerow(['Composition Max. =',maxcomp])
midnorm = 1 highnorm = 3 low_cutoff = 6 high_cutoff = 15 data, wvl, filelist = ccam.read_ccs(searchdir) #pickle.dump(data, open( "ccamdata.pkl", "wb" )) #pickle.dump(wvl,open( "ccamwvl.pkl", "wb" )) #pickle.dump(filelist,open( "ccamfilelist.pkl", "wb" )) #pickle.load(open( r"C:\Users\rbanderson\Documents\MSL\ChemCam\Data Processing\Working\ccam\ccamdata.pkl", "rb" )) #pickle.load(open( r"C:\Users\rbanderson\Documents\MSL\ChemCam\Data Processing\Working\ccam\ccamwvl.pkl", "rb" )) #pickle.load(open( r"C:\Users\rbanderson\Documents\MSL\ChemCam\Data Processing\Working\ccam\ccamfilelist.pkl", "rb" )) targetlist, distslist, amplist = ccam.target_lookup(filelist, masterlist, name_subs) y_full = ccam.pls_unk(data_norm3, nc_full, coeff_file=coeff_file_full, means_file=means_file_full) y_low = ccam.pls_unk(data_norm3, nc_low, coeff_file=coeff_file_low, means_file=means_file_low) y_mid = ccam.pls_unk(data_norm1, nc_mid, coeff_file=coeff_file_mid, means_file=means_file_mid) y_high = ccam.pls_unk(data_norm3, nc_high,
def pls_cal(dbfile, foldfile, maskfile, outpath, which_elem, testfold, nc, normtype=3, mincomp=0, maxcomp=100, plstype='mlpy', keepfile=None, removefile=None, cal_dir=None, masterlist_file=None, compfile=None, name_sub_file=None): print 'Reading database' sys.stdout.flush() spectra, comps, spect_index, names, labels, wvl = ccam.read_db( dbfile, compcheck=True) oxides = labels[2:] compindex = numpy.where(oxides == which_elem)[0] print 'Choosing spectra' which_removed = outpath + which_elem + '_' + plstype + '_nc' + str( nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str( maxcomp) + '_removed.csv' spectra, names, spect_index, comps = ccam.choose_spectra( spectra, spect_index, names, comps, compindex, mincomp=mincomp, maxcomp=maxcomp, keepfile=keepfile, removefile=removefile, which_removed=which_removed) print 'Masking spectra' spectra, wvl = ccam.mask(spectra, wvl, maskfile) print 'Normalizing spectra' spectra = ccam.normalize(spectra, wvl, normtype=normtype) print 'Assigning Folds' folds = ccam.folds(foldfile, names) names_nofold = names[(folds == 0)] spect_index_nofold = spect_index[(folds == 0)] #write a file containing the samples not assigned to folds with open(which_removed, 'ab') as writefile: writer = csv.writer( writefile, delimiter=',', ) for i in range(len(names_nofold)): writer.writerow( [names_nofold[i], spect_index_nofold[i], 'No Fold']) #remove spectra that are not assigned to any fold spectra = spectra[(folds != 0), :] spect_index = spect_index[(folds != 0)] names = names[(folds != 0)] comps = comps[(folds != 0), :] folds = folds[(folds != 0)] print 'Defining Training and Test Sets' spectra_train = spectra[(folds != testfold)] spect_index_train = spect_index[(folds != testfold)] names_train = names[(folds != testfold)] comps_train = comps[(folds != testfold), compindex] folds_train = folds[(folds != testfold)] folds_train_unique = numpy.unique(folds_train) spectra_test = spectra[(folds == testfold)] spect_index_test = spect_index[(folds == testfold)] names_test = names[(folds == testfold)] comps_test = comps[(folds == testfold), compindex] folds_test = folds[(folds == testfold)] print 'Do Leave One Label Out (LOLO) cross validation with all folds but the test set' #define array to hold cross validation predictions and RMSEs train_predict_cv = numpy.zeros((len(names_train), nc)) RMSECV = numpy.zeros(nc) for i in folds_train_unique: print 'Holding out fold #' + str(i) #mean center those spectra left in #X_cv_in1,X_cv_in_mean1=meancenter.ccam_meancenter(spectra_train[(folds_train!=i),:]) X_cv_in, X_cv_in_mean = ccam.meancenter( spectra_train[(folds_train != i), :]) #and those left out X_cv_out = ccam.meancenter(spectra_train[(folds_train == i), :], X_mean=X_cv_in_mean)[0] #mean center compositions left in Y_cv_in, Y_cv_in_mean = ccam.meancenter( comps_train[(folds_train != i)]) #step through each number of components for j in range(1, nc + 1): print 'Training PLS Model for ' + str(j) + ' components' #train the model if plstype == 'mlpy': PLS1model = mlpy.pls.PLS(j) PLS1model.learn(X_cv_in, Y_cv_in) #predict the samples held out train_predict_cv[(folds_train == i), j - 1] = PLS1model.pred(X_cv_out) + Y_cv_in_mean if plstype == 'sklearn': PLS1model = PLSRegression(n_components=nc) PLS1model.fit(X_cv_in, Y_cv_in) train_predict_cv[ (folds_train == i), j - 1] = PLS1model.predict(X_cv_out) + Y_cv_in_mean #calculate RMSECV for i in range(0, nc): sqerr = (train_predict_cv[:, i] - comps_train)**2.0 RMSECV[i] = numpy.sqrt(numpy.mean(sqerr)) #mean center full model X, X_mean = ccam.meancenter(spectra_train) X_test = ccam.meancenter(spectra_test, X_mean=X_mean)[0] Y, Y_mean = ccam.meancenter(comps_train) #create arrays for results and RMSEs trainset_results = numpy.zeros((len(names_train), nc)) testset_results = numpy.zeros((len(names_test), nc)) RMSEP = numpy.zeros(nc) RMSEC = numpy.zeros(nc) beta = numpy.zeros((len(X_mean), nc)) #Now step through each # of components with the full model for j in range(1, nc + 1): print 'Training full model for ' + str(j) + ' components' if plstype == 'mlpy': PLS1model = mlpy.pls.PLS(j) PLS1model.learn(X, Y) beta[:, j - 1] = PLS1model.beta() trainset_results[:, j - 1] = PLS1model.pred(X) + Y_mean testset_results[:, j - 1] = PLS1model.pred(X_test) + Y_mean if plstype == 'sklearn': PLS1model = PLSRegression(n_components=nc) PLS1model.fit(X, Y) print 'stop' RMSEC[j - 1] = numpy.sqrt( numpy.mean((trainset_results[:, j - 1] - comps_train)**2.0)) RMSEP[j - 1] = numpy.sqrt( numpy.mean((testset_results[:, j - 1] - comps_test)**2.0)) #if cal_dir is specified, read cal target data and calculate RMSEs if cal_dir != None: cal_data, cal_wvl, cal_filelist = ccam.read_ccs(cal_dir) cal_data, cal_wvl = ccam.mask(cal_data, cal_wvl, maskfile) cal_data = ccam.normalize(cal_data, cal_wvl, normtype=normtype) RMSEP_cal = numpy.zeros(nc) RMSEP_KGAMEDS = numpy.zeros(nc) RMSEP_MACUSANITE = numpy.zeros(nc) RMSEP_NAU2HIS = numpy.zeros(nc) RMSEP_NAU2LOS = numpy.zeros(nc) RMSEP_NAU2MEDS = numpy.zeros(nc) RMSEP_NORITE = numpy.zeros(nc) RMSEP_PICRITE = numpy.zeros(nc) RMSEP_SHERGOTTITE = numpy.zeros(nc) targets, dists, amps = ccam.target_lookup(cal_filelist, masterlist_file, name_sub_file) target_comps = ccam.target_comp_lookup(targets, compfile, which_elem) cal_results = numpy.zeros((len(targets), nc)) for i in range(nc): comps_copy = copy.copy(target_comps) cal_results[:, i] = ccam.pls_unk(cal_data, i + 1, beta=beta[:, i], X_mean=X_mean, Y_mean=Y_mean) #RMSEP_cal[i]=numpy.sqrt(numpy.mean((cal_results[:,i]-target_comps)**2)) cal_results[(comps_copy < mincomp), i] = 0 cal_results[(comps_copy > maxcomp), i] = 0 comps_copy[(comps_copy < mincomp)] = 0 comps_copy[(comps_copy > maxcomp)] = 0 RMSEP_KGAMEDS[i] = numpy.sqrt( numpy.mean((cal_results[(targets == 'KGAMEDS'), i] - comps_copy[(targets == 'KGAMEDS')])**2)) RMSEP_MACUSANITE[i] = numpy.sqrt( numpy.mean((cal_results[(targets == 'MACUSANITE'), i] - comps_copy[(targets == 'MACUSANITE')])**2)) RMSEP_NAU2HIS[i] = numpy.sqrt( numpy.mean((cal_results[(targets == 'NAU2HIS'), i] - comps_copy[(targets == 'NAU2HIS')])**2)) RMSEP_NAU2LOS[i] = numpy.sqrt( numpy.mean((cal_results[(targets == 'NAU2LOS'), i] - comps_copy[(targets == 'NAU2LOS')])**2)) RMSEP_NAU2MEDS[i] = numpy.sqrt( numpy.mean((cal_results[(targets == 'NAU2MEDS'), i] - comps_copy[(targets == 'NAU2MEDS')])**2)) RMSEP_NORITE[i] = numpy.sqrt( numpy.mean((cal_results[(targets == 'NORITE'), i] - comps_copy[(targets == 'NORITE')])**2)) RMSEP_PICRITE[i] = numpy.sqrt( numpy.mean((cal_results[(targets == 'PICRITE'), i] - comps_copy[(targets == 'PICRITE')])**2)) RMSEP_SHERGOTTITE[i] = numpy.sqrt( numpy.mean((cal_results[(targets == 'SHERGOTTITE'), i] - comps_copy[(targets == 'SHERGOTTITE')])**2)) n_good_cal = len(numpy.unique(comps_copy)) - 1 RMSEP_cal = (RMSEP_KGAMEDS + RMSEP_MACUSANITE + RMSEP_NAU2HIS + RMSEP_NAU2LOS + RMSEP_NAU2MEDS + RMSEP_NORITE + RMSEP_PICRITE + RMSEP_SHERGOTTITE) / n_good_cal RMSEP_single_cals = [ RMSEP_KGAMEDS, RMSEP_MACUSANITE, RMSEP_NAU2HIS, RMSEP_NAU2LOS, RMSEP_NAU2MEDS, RMSEP_NORITE, RMSEP_PICRITE, RMSEP_SHERGOTTITE, RMSEP_cal ] with open( outpath + which_elem + '_' + str(mincomp) + '-' + str(maxcomp) + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_caltargets_predict.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') row = ['File', 'Target', 'Laser Energy', 'True_Comp'] row.extend(range(1, nc + 1)) writer.writerow(row) for i in range(0, len(targets)): row = [cal_filelist[i], targets[i], amps[i], target_comps[i]] row.extend(cal_results[i, :]) writer.writerow(row) with open( outpath + which_elem + '_' + str(mincomp) + '-' + str(maxcomp) + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_RMSECP_caltargets.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') writer.writerow(['NC', 'RMSECP Cal Targets (wt.%)']) for i in range(0, nc): writer.writerow([i + 1, RMSEP_cal[i]]) ccam.plots.RMSE(RMSECV, RMSEP, RMSEC, which_elem + ' RMSEs', outpath + which_elem + '_' + str(mincomp) + '-' + str(maxcomp) + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_RMSE_plot_cal.png', RMSEP_cals=RMSEP_single_cals) # plot RMSEs ccam_plots.ccam_plot_RMSE( RMSECV, RMSEP, RMSEC, which_elem + 'RMSEs', outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_RMSE_plot.png') #Write output info to files print outpath + which_elem + '_' + plstype + '_nc' + str( nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str( maxcomp) + '_RMSECV.csv' with open( outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_RMSECV.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') writer.writerow(['NC', 'RMSECV (wt.%)']) for i in range(0, nc): writer.writerow([i + 1, RMSECV[i]]) with open( outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_RMSEC.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') writer.writerow(['NC', 'RMSEC (wt.%)']) for i in range(0, nc): writer.writerow([i + 1, RMSEC[i]]) with open( outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_RMSEP.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') writer.writerow(['NC', 'RMSEP (wt.%)']) for i in range(0, nc): writer.writerow([i + 1, RMSEP[i]]) with open( outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_cv_predict.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') row = ['Sample', 'Spectrum', 'Fold', 'True_Comp'] row.extend(range(1, nc + 1)) writer.writerow(row) for i in range(0, len(names_train)): row = [ names_train[i], spect_index_train[i], folds_train[i], comps_train[i] ] row.extend(train_predict_cv[i, :]) writer.writerow(row) with open( outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_train_predict.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') row = ['Sample', 'Spectrum', 'Fold', 'True_Comp'] row.extend(range(1, nc + 1)) writer.writerow(row) for i in range(0, len(names_train)): row = [ names_train[i], spect_index_train[i], folds_train[i], comps_train[i] ] row.extend(trainset_results[i, :]) writer.writerow(row) with open( outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_test_predict.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') row = ['Sample', 'Spectrum', 'Fold', 'True_Comp'] row.extend(range(1, nc + 1)) writer.writerow(row) for i in range(0, len(names_test)): row = [ names_test[i], spect_index_test[i], folds_test[i], comps_test[i] ] row.extend(testset_results[i, :]) writer.writerow(row) with open( outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_beta_coeffs.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') row = ['wvl'] row.extend(range(1, nc + 1)) writer.writerow(row) for i in range(0, len(wvl)): row = [wvl[i]] row.extend(beta[i, :]) writer.writerow(row) with open( outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_meancenters.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') writer.writerow([which_elem + ' mean', Y_mean]) for i in range(0, len(wvl)): row = [wvl[i], X_mean[i]] writer.writerow(row) with open( outpath + which_elem + '_' + plstype + '_nc' + str(nc) + '_norm' + str(normtype) + '_' + str(mincomp) + '-' + str(maxcomp) + '_inputinfo.csv', 'wb') as writefile: writer = csv.writer(writefile, delimiter=',') writer.writerow(['Spectral database =', dbfile]) writer.writerow(['Spectra Kept =', keepfile]) writer.writerow(['Spectra Removed =', which_removed]) writer.writerow(['Fold Definition =', foldfile]) writer.writerow(['Test Fold =', maskfile]) writer.writerow(['Mask File =', maskfile]) writer.writerow(['Algorithm =', plstype]) writer.writerow(['# of components =', nc]) writer.writerow(['Normalization Type =', normtype]) writer.writerow(['Composition Min. =', mincomp]) writer.writerow(['Composition Max. =', maxcomp])
def read_ccs(searchdir, skiprows=0, shots=False, masterlist=None, name_sub_file=None, singlefile=False): #,minsol=0,maxsol=10000,masterlist=None): if singlefile is False: filelist, files = search_ccs(searchdir) if singlefile is True: filelist = numpy.array([searchdir]) files = [filelist[0][-40:]] if shots is True: file_targets, file_dists, file_amps, nshots = ccam.target_lookup( filelist, masterlist, name_sub_file) nshots = numpy.array(nshots, dtype='int') sum_shots = numpy.sum(nshots) if singlefile is False: print 'Reading ' + str(len(filelist)) + ' files...' if shots is not True: means = numpy.zeros([len(filelist), 6144], dtype='float64') if shots is True: singleshots = numpy.zeros([6144, sum_shots], dtype='float64') files_singleshot = numpy.zeros_like([files[0]] * sum_shots) shotnums = numpy.zeros([sum_shots]) rowcount = 0 for i in range(len(filelist)): if singlefile is False: if numpy.mod(i + 1, 100) == 0: print 'Reading file #' + str(i + 1) tempdata = ccam.read_csv(filelist[i], skiprows, labelrow=False) wvl = numpy.array(tempdata[:, 0], dtype='float') if shots is False: means[i, :] = tempdata[:, -1] if shots is True: shotnums[rowcount:rowcount + nshots[i]] = range(nshots[i]) files_singleshot[rowcount:rowcount + nshots[i]] = files[i] singleshots[:, rowcount:rowcount + nshots[i]] = tempdata[:, 1:-2] rowcount = rowcount + nshots[i] # if i==0: # wvl=numpy.array(tempdata[:,0],dtype='float64') # if shots is True: # singleshots=numpy.array(tempdata[:,1:-2],dtype='float64') # shotnums=numpy.array(range(len(tempdata[:,1:-2]))) # files_singleshot=numpy.array([files[i]]*len(tempdata[:,1:-2])) # medians=numpy.array(tempdata[:,-2],dtype='float64') # means=numpy.array(tempdata[:,-1],dtype='float64') # # if i>0: # if shots is True: # singleshots=numpy.vstack([singleshots,numpy.array(tempdata[:,1:-2],dtype='float64')]) # shotnums=numpy.hstack([shotnums,numpy.array(range(len(tempdata[:,1:-2])))]) # files_singleshot=numpy.hstack([files_singleshot,numpy.array([files[i]]*len(tempdata[:,1:-2]))]) # # medians=numpy.vstack([medians,numpy.array(tempdata[:,-2],dtype='float64')]) # means=numpy.vstack([means,numpy.array(tempdata[:,-1],dtype='float64')]) if shots is True: singleshots = numpy.transpose(singleshots) return singleshots, wvl, files_singleshot, shotnums if shots is False: return means, wvl, files