def target_lookup(filelist, masterlist_file, name_sub_file): data, labels = ccam.read_csv(masterlist_file, 1, labelrow=True) targets = numpy.array(data[:, 5], dtype='string') sclocks = numpy.array(data[:, 2], dtype='string') dists = numpy.array(data[:, 8], dtype='string') amps = numpy.array(data[:, 17], dtype='string') nshots = numpy.array(data[:, 11]) file_sclocks = numpy.zeros_like(filelist) file_targets = numpy.zeros_like(filelist) file_amps = numpy.zeros_like(filelist) file_dists = numpy.zeros_like(filelist) file_nshots = numpy.zeros(len(filelist)) filelist_unique = numpy.unique(filelist) for i in range(len(filelist_unique)): filelist_ind = filelist == filelist_unique[i] filelist_ind_true = (filelist_ind == True) file_sclocks[filelist_ind] = filelist_unique[i][-36:-27] if max(sclocks == file_sclocks[filelist_ind_true][0]) is not False: file_targets[filelist_ind] = targets[( sclocks == file_sclocks[filelist_ind_true][0])][0] file_dists[filelist_ind] = dists[( sclocks == file_sclocks[filelist_ind_true][0])][0] file_amps[filelist_ind] = amps[( sclocks == file_sclocks[filelist_ind_true][0])][0] file_nshots[filelist_ind] = nshots[( sclocks == file_sclocks[filelist_ind_true][0])][0] data = ccam.read_csv(name_sub_file, 0, labelrow=False) old_name = data[:, 0] new_name = data[:, 1] for i in range(len(old_name)): file_targets[(file_targets == old_name[i])] = new_name[i] return file_targets, file_dists, file_amps, file_nshots
def pls_unk(unk_spectra, nc, coeff_file=None, means_file=None, beta=None, X_mean=None, Y_mean=None): if coeff_file is not None: data, cols = ccam.read_csv(coeff_file, 0, labelrow=True) cols = numpy.array(cols[1:], dtype='int') coeffs = numpy.array(data[:, 1:], dtype='float') beta = coeffs[numpy.where(cols == nc)] if means_file is not None: data, temp = ccam.read_csv(means_file, 0, labelrow=True) Y_mean = numpy.array(temp[1], dtype='float') X_mean = numpy.array(data[:, 1], dtype='float') unk_spectra_centered = ccam.meancenter(unk_spectra, X_mean=X_mean)[0] predicts = numpy.zeros(len(unk_spectra[:, 0])) for i in range(len(predicts)): predicts[i] = numpy.dot(unk_spectra_centered[i, :], beta[:].T) + Y_mean return predicts
def outlier_plots(filenames,norms,ncs,which_elem): Q_res_full,Q_labels_full=ccam.read_csv(filenames['Qres_file']['full'],0,labelrow=True) Q_res_low,Q_labels_low=ccam.read_csv(filenames['Qres_file']['low'],0,labelrow=True) Q_res_mid,Q_labels_mid=ccam.read_csv(filenames['Qres_file']['mid'],0,labelrow=True) Q_res_high,Q_labels_high=ccam.read_csv(filenames['Qres_file']['high'],0,labelrow=True) Q_res_full=numpy.array(Q_res_full[:,4:],dtype='float') Q_res_low=numpy.array(Q_res_low[:,4:],dtype='float') Q_res_mid=numpy.array(Q_res_mid[:,4:],dtype='float') Q_res_high=numpy.array(Q_res_high[:,4:],dtype='float') T2_res_full,T2_labels_full=ccam.read_csv(filenames['T2_file']['full'],0,labelrow=True) T2_res_low,T2_labels_low=ccam.read_csv(filenames['T2_file']['low'],0,labelrow=True) T2_res_mid,T2_labels_mid=ccam.read_csv(filenames['T2_file']['mid'],0,labelrow=True) T2_res_high,T2_labels_high=ccam.read_csv(filenames['T2_file']['high'],0,labelrow=True) T2_res_full=numpy.array(T2_res_full[:,4:],dtype='float') T2_res_low=numpy.array(T2_res_low[:,4:],dtype='float') T2_res_mid=numpy.array(T2_res_mid[:,4:],dtype='float') T2_res_high=numpy.array(T2_res_high[:,4:],dtype='float') colors=['r'] markers=['o'] labels=['Full Norm='+str(norms['full'])+' NC='+str(ncs['full']), 'Low Norm='+str(norms['low'])+' NC='+str(ncs['low']), 'Mid Norm='+str(norms['mid'])+' NC='+str(ncs['mid']), 'High Norm='+str(norms['high'])+' NC='+str(ncs['high']), 'Blended'] plot_title=['Outlier check for '+which_elem] ccam.plots.Plot1to1(T2_res_full[ncs['full']-1],Q_res_full[ncs['full']-1],plot_title,labels[0],colors[0],markers[0],filenames['Q_T2_out']['full'],xminmax=[0,1.1*numpy.max(T2_res_full[ncs['full']-1])],yminmax=[0,1.1*numpy.max(Q_res_full[ncs['full']-1])],ylabel='Q Residual',xlabel='Hotelling T2',one_to_one=False) ccam.plots.Plot1to1(T2_res_low[ncs['low']-1],Q_res_low[-1],plot_title,labels[1],colors[0],markers[0],filenames['Q_T2_out']['low'],xminmax=[0,1.1*numpy.max(T2_res_low[ncs['low']-1])],yminmax=[0,1.1*numpy.max(Q_res_low[ncs['low']-1])],ylabel='Q Residual',xlabel='Hotelling T2',one_to_one=False) ccam.plots.Plot1to1(T2_res_mid[ncs['mid']-1],Q_res_mid[ncs['mid']-1],plot_title,labels[2],colors[0],markers[0],filenames['Q_T2_out']['mid'],xminmax=[0,1.1*numpy.max(T2_res_mid[ncs['mid']-1])],yminmax=[0,1.1*numpy.max(Q_res_mid[ncs['mid']-1])],ylabel='Q Residual',xlabel='Hotelling T2',one_to_one=False) ccam.plots.Plot1to1(T2_res_high[ncs['high']-1],Q_res_high[ncs['high']-1],plot_title,labels[3],colors[0],markers[0],filenames['Q_T2_out']['high'],xminmax=[0,1.1*numpy.max(T2_res_high[ncs['high']-1])],yminmax=[0,1.1*numpy.max(Q_res_high[ncs['high']-1])],ylabel='Q Residual',xlabel='Hotelling T2',one_to_one=False)
def __init__(self, *args): apply(QtGui.QMainWindow.__init__, (self, ) + args) loader = QtUiTools.QUiLoader() file = QtCore.QFile( r'C:\Users\rbanderson\Documents\MSL\ChemCam\DataProcessing\ccam_pdl_ui.ui' ) file.open(QtCore.QFile.ReadOnly) self.myWidget = loader.load(file, self) file.close() self.setCentralWidget(self.myWidget) self.myWidget.progressBar.reset() #read config file config = ccam.read_csv('pdl_tool_config.csv', 0, labelrow=False, skipsym='#') self.searchdir = config[config[:, 0] == 'searchdir', 1][0] self.myWidget.lineEdit.setText(self.searchdir) self.masterlist = config[config[:, 0] == 'masterlist', 1][0] self.name_sub_file = config[config[:, 0] == 'name_sub_file', 1][0] self.maskfile = config[config[:, 0] == 'maskfile', 1][0] self.meancenters_file = config[config[:, 0] == 'meancenters_file', 1][0] self.settings_coeffs_file = config[config[:, 0] == 'settings_coeffs_file', 1][0] self.blend_array_dir = config[config[:, 0] == 'blend_array_dir', 1][0] self.elems = [ 'SiO2', 'TiO2', 'Al2O3', 'FeOT', 'MgO', 'CaO', 'Na2O', 'K2O' ] #read PLS settings files self.meancenters, self.meancenter_labels = ccam.read_csv( self.meancenters_file, 0, labelrow=True) self.meancenter_labels = self.meancenter_labels[1:] self.ymeancenters = numpy.array(self.meancenters[0, 1:], dtype='float') self.meancenters = numpy.array(self.meancenters[1:, 1:], dtype='float') self.pls_settings, self.pls_settings_labels = ccam.read_csv( self.settings_coeffs_file, 0, labelrow=True) self.pls_settings_labels = self.pls_settings_labels[1:] self.pls_norms = numpy.array(self.pls_settings[0, 1:], dtype='int') self.pls_ncs = numpy.array(self.pls_settings[1, 1:], dtype='int') self.pls_coeffs = numpy.array(self.pls_settings[2:, 1:], dtype='float') #Choose search directory self.myWidget.browse_button.clicked.connect(self.choosedir) #Calculate compositions self.myWidget.calc_button.clicked.connect(self.calc_comp)
def pls_blend(self, comps_all): blended = numpy.zeros_like(comps_all[0]) for i in range(0, len(self.elems)): #reconstruct the blend input settings from the blend array file blendarray, blend_labels = ccam.read_csv(self.blend_array_dir + '\\' + self.elems[i] + '_blend_array.csv', 0, labelrow=True) blendarray = numpy.array(numpy.array(blendarray, dtype='float'), dtype='int') ranges = [] inrange = [] refpredict = [] toblend = [] predict = [] for k in comps_all: predict.append(k[:, i]) for j in range(len(blendarray[:, 0])): ranges.append(blendarray[j, 0:2].tolist()) inrange.append(blendarray[j, 2].tolist()) refpredict.append(blendarray[j, 3].tolist()) toblend.append(blendarray[j, 4:].tolist()) blended[:, i] = ccam.submodels_blend(predict, ranges, inrange, refpredict, toblend) return blended
def target_comp_lookup(targetlist, compfile, which_elem): data, labels = ccam.read_csv(compfile, 0, labelrow=True) colmatch = numpy.where(labels == which_elem) comps = numpy.array(data[:, colmatch[0]], dtype='float32') comp_targets = data[:, numpy.where(labels == 'Name')[0]] comp_targets, uniqueindex = numpy.unique(comp_targets, return_index=True) comps = comps[uniqueindex] complist = numpy.zeros(len(targetlist)) for i in range(len(complist)): matchtarget = (comp_targets == targetlist[i]) if sum(matchtarget) == 0: print('No match found for ' + targetlist[i]) complist[i] = numpy.nan if sum(matchtarget) == 1: complist[i] = comps[matchtarget] return complist
def target_lookup(filelist, masterlist_file, name_sub_file): data = pandas.read_csv(masterlist_file, header=1) targets = numpy.array(data['Target']) sclocks = numpy.array(data['Spacecraft Clock']) dists = numpy.array(data['Distance (m)']) amps = numpy.array(data['Laser Energy']) nshots = numpy.array(data['Nbr of Shots']) # data,labels=ccam.read_csv(masterlist_file,1,labelrow=True) # # targets=numpy.array(data[:,5],dtype='string') # sclocks=numpy.array(data[:,2],dtype='string') # dists=numpy.array(data[:,8],dtype='string') # amps=numpy.array(data[:,17],dtype='string') # nshots=numpy.array(data[:,11]) file_sclocks = numpy.zeros_like(filelist) file_targets = numpy.zeros_like(filelist) file_amps = numpy.zeros_like(filelist) file_dists = numpy.zeros_like(filelist) file_nshots = numpy.zeros(len(filelist)) filelist_unique = numpy.unique(filelist) for i in range(len(filelist_unique)): filelist_ind = filelist == filelist_unique[i] filelist_ind_true = (filelist_ind == True) file_sclocks[filelist_ind] = filelist_unique[i][-36:-27] # print max(sclocks==file_sclocks[filelist_ind_true][0]) if numpy.max(sclocks == file_sclocks[filelist_ind_true][0]): file_targets[filelist_ind] = targets[( sclocks == file_sclocks[filelist_ind_true][0])][0] file_dists[filelist_ind] = dists[( sclocks == file_sclocks[filelist_ind_true][0])][0] file_amps[filelist_ind] = amps[( sclocks == file_sclocks[filelist_ind_true][0])][0] file_nshots[filelist_ind] = nshots[( sclocks == file_sclocks[filelist_ind_true][0])][0] data, labels = ccam.read_csv(name_sub_file, 0, labelrow=False) old_name = data[:, 0] new_name = data[:, 1] for i in range(len(old_name)): file_targets[(file_targets == old_name[i])] = new_name[i] return file_targets, file_dists, file_amps, file_nshots
def pls_unk_load(unk_spectra, nc, modelfile, means_file=None): Y_mean = 0 if means_file is not None: data, temp = ccam.read_csv(means_file, 0, labelrow=True) Y_mean = numpy.array(temp[1], dtype='float') X_mean = numpy.array(data[:, 1], dtype='float') else: X_mean is None Y_mean = 0 #load the model from the appropriate pkl file with open(modelfile, 'rb') as picklefile: model = pickle.load(picklefile)[nc - 1][0] if X_mean is not None: unk_spectra_centered = ccam.meancenter(unk_spectra, X_mean=X_mean)[0] else: unk_spectra_centered = unk_spectra predicts = numpy.squeeze(model.predict(unk_spectra_centered) + Y_mean) return predicts
def read_db(dbfile, n_elems=9, compcheck=True): data, labels = ccam.read_csv(dbfile, 0, labelrow=True) names = numpy.array(data[:, 0], dtype='str') spect_index = numpy.array(data[:, 1], dtype='int') comps = numpy.array(data[:, 2:2 + n_elems], dtype='float32') spectra = numpy.array(data[:, 2 + n_elems:len(data[0, :])], dtype='float64') wvl = numpy.array(labels[2 + n_elems:], dtype='float32') labels = labels[0:2 + n_elems] if compcheck: index = (numpy.sum(comps, axis=1) != 0) spectra = spectra[index] comps = comps[index] names = names[index] spect_index = spect_index[index] return spectra, comps, spect_index, names, labels, wvl
def read_single_ccs( filename, skiprows=0, shots=False, masterlist=None, name_sub_file=None): #,minsol=0,maxsol=10000,masterlist=None): filetrim = filename[-40] if shots is True: file_targets, file_dists, file_amps, nshots = ccam.target_lookup( filename, masterlist, name_sub_file) nshots = numpy.array(nshots, dtype='int') sum_shots = numpy.sum(nshots) if shots is not True: means = numpy.zeros([6144], dtype='float64') if shots is True: singleshots = numpy.zeros([6144, sum_shots], dtype='float64') files_singleshot = numpy.zeros_like([filetrim[0]] * sum_shots) shotnums = numpy.zeros([sum_shots]) rowcount = 0 tempdata = ccam.read_csv(filename, skiprows, labelrow=False) wvl = numpy.array(tempdata[:, 0], dtype='float') if shots is False: means = tempdata[:, -1] if shots is True: shotnums[rowcount:rowcount + nshots] = range(nshots) files_singleshot[rowcount:rowcount + nshots] = filetrim singleshots[:, rowcount:rowcount + nshots] = tempdata[:, 1:-2] rowcount = rowcount + nshots if shots is True: singleshots = numpy.transpose(singleshots) return singleshots, wvl, files_singleshot, shotnums if shots is False: return means, wvl, filetrim
def read_ccs(searchdir, skiprows=15, shots=False, masterlist=None, name_sub_file=None): #,minsol=0,maxsol=10000,masterlist=None): searchstring = '*CCS*csv' #Recursively search for CCS files in the specified directory filelist = [] for root, dirnames, filenames in os.walk(searchdir): for filename in fnmatch.filter(filenames, searchstring): filelist.append(os.path.join(root, filename)) filelist = numpy.array(filelist) #Remove duplicates files = numpy.zeros_like(filelist) sclocks = numpy.zeros_like(filelist) fileversion = numpy.zeros(len(filelist), dtype='int') for i in range(len(filelist)): files[i] = filelist[i][-40:] sclocks[i] = filelist[i][-36:-27] fileversion[i] = filelist[i][-5:-4] keep = numpy.zeros(len(files), dtype='bool') for i in range(len(files)): sclock_match = numpy.in1d(sclocks, sclocks[i]) maxversion = max(fileversion[sclock_match]) if fileversion[i] == maxversion: keep[i] = True files = files[keep] filelist = filelist[keep] sclocks = sclocks[keep] files, unique_index = numpy.unique(files, return_index=True) filelist = filelist[unique_index] sclocks = sclocks[unique_index] if shots is True: file_targets, file_dists, file_amps, nshots = ccam.target_lookup( filelist, masterlist, name_sub_file) nshots = numpy.array(nshots, dtype='int') sum_shots = numpy.sum(nshots) print('Reading ' + str(len(filelist)) + ' files...') if shots is not True: means = numpy.zeros([len(filelist), 6144], dtype='float64') if shots is True: singleshots = numpy.zeros([6144, sum_shots], dtype='float64') files_singleshot = numpy.zeros_like([files[0]] * sum_shots) shotnums = numpy.zeros([sum_shots]) rowcount = 0 for i in range(len(filelist)): if numpy.mod(i + 1, 100) == 0: print('Reading file #' + str(i + 1)) tempdata, templabels = ccam.read_csv(filelist[i], skiprows, labelrow=False) wvl = numpy.array(tempdata[:, 0], dtype='float') if shots is False: means[i, :] = tempdata[:, -1] if shots is True: shotnums[rowcount:rowcount + nshots[i]] = range(nshots[i]) files_singleshot[rowcount:rowcount + nshots[i]] = files[i] singleshots[:, rowcount:rowcount + nshots[i]] = tempdata[:, 1:-2] rowcount = rowcount + nshots[i] # if i==0: # wvl=numpy.array(tempdata[:,0],dtype='float64') # if shots is True: # singleshots=numpy.array(tempdata[:,1:-2],dtype='float64') # shotnums=numpy.array(range(len(tempdata[:,1:-2]))) # files_singleshot=numpy.array([files[i]]*len(tempdata[:,1:-2])) # medians=numpy.array(tempdata[:,-2],dtype='float64') # means=numpy.array(tempdata[:,-1],dtype='float64') # # if i>0: # if shots is True: # singleshots=numpy.vstack([singleshots,numpy.array(tempdata[:,1:-2],dtype='float64')]) # shotnums=numpy.hstack([shotnums,numpy.array(range(len(tempdata[:,1:-2])))]) # files_singleshot=numpy.hstack([files_singleshot,numpy.array([files[i]]*len(tempdata[:,1:-2]))]) # # medians=numpy.vstack([medians,numpy.array(tempdata[:,-2],dtype='float64')]) # means=numpy.vstack([means,numpy.array(tempdata[:,-1],dtype='float64')]) if shots is True: singleshots = numpy.transpose(singleshots) return singleshots, wvl, files_singleshot, shotnums if shots is False: return means, wvl, files
def read_ccs(searchdir, skiprows=0, shots=False, masterlist=None, name_sub_file=None, singlefile=False): #,minsol=0,maxsol=10000,masterlist=None): if singlefile is False: filelist, files = search_ccs(searchdir) if singlefile is True: filelist = numpy.array([searchdir]) files = [filelist[0][-40:]] if shots is True: file_targets, file_dists, file_amps, nshots = ccam.target_lookup( filelist, masterlist, name_sub_file) nshots = numpy.array(nshots, dtype='int') sum_shots = numpy.sum(nshots) if singlefile is False: print 'Reading ' + str(len(filelist)) + ' files...' if shots is not True: means = numpy.zeros([len(filelist), 6144], dtype='float64') if shots is True: singleshots = numpy.zeros([6144, sum_shots], dtype='float64') files_singleshot = numpy.zeros_like([files[0]] * sum_shots) shotnums = numpy.zeros([sum_shots]) rowcount = 0 for i in range(len(filelist)): if singlefile is False: if numpy.mod(i + 1, 100) == 0: print 'Reading file #' + str(i + 1) tempdata = ccam.read_csv(filelist[i], skiprows, labelrow=False) wvl = numpy.array(tempdata[:, 0], dtype='float') if shots is False: means[i, :] = tempdata[:, -1] if shots is True: shotnums[rowcount:rowcount + nshots[i]] = range(nshots[i]) files_singleshot[rowcount:rowcount + nshots[i]] = files[i] singleshots[:, rowcount:rowcount + nshots[i]] = tempdata[:, 1:-2] rowcount = rowcount + nshots[i] # if i==0: # wvl=numpy.array(tempdata[:,0],dtype='float64') # if shots is True: # singleshots=numpy.array(tempdata[:,1:-2],dtype='float64') # shotnums=numpy.array(range(len(tempdata[:,1:-2]))) # files_singleshot=numpy.array([files[i]]*len(tempdata[:,1:-2])) # medians=numpy.array(tempdata[:,-2],dtype='float64') # means=numpy.array(tempdata[:,-1],dtype='float64') # # if i>0: # if shots is True: # singleshots=numpy.vstack([singleshots,numpy.array(tempdata[:,1:-2],dtype='float64')]) # shotnums=numpy.hstack([shotnums,numpy.array(range(len(tempdata[:,1:-2])))]) # files_singleshot=numpy.hstack([files_singleshot,numpy.array([files[i]]*len(tempdata[:,1:-2]))]) # # medians=numpy.vstack([medians,numpy.array(tempdata[:,-2],dtype='float64')]) # means=numpy.vstack([means,numpy.array(tempdata[:,-1],dtype='float64')]) if shots is True: singleshots = numpy.transpose(singleshots) return singleshots, wvl, files_singleshot, shotnums if shots is False: return means, wvl, files