def test_undefined(): params = {'method':'foo'} ct = cal_tran.cal_tran(params) assert ct.ct_obj == None params = {'method': 'LASSO DS','reg':'foo'} ct = cal_tran.cal_tran(params) ct.derive_transform(data1['wvl'],data2['wvl']) assert ct.ct_obj.proj_to_B == None
def run(self): datakeyA = self.chooseDataA.currentText() datakeyB = self.chooseDataB.currentText() datakeyC = self.chooseDatatoTransform.currentText() dataAmatchcol = self.chooseDataAMatch.currentText() dataBmatchcol = self.chooseDataBMatch.currentText() #get the data sets A = self.data[datakeyA].df B = self.data[datakeyB].df C = self.data[datakeyC].df assert (len(B['wvl'].columns) == len(C['wvl'].columns)),\ 'Data sets B and C have different numbers of spectral channels!' assert (B['wvl'].columns.values[-1] == C['wvl'].columns.values[-1]),\ "Data set B and C wavelengths are not identical. Check rounding and/or resample one data set onto the other's wavelengths" A_mean, B_mean = caltran_prepare_data.prepare_data( A, B, dataAmatchcol, dataBmatchcol) method = self.chooseMethod.currentText() params = self.alg[method].run() ct_obj = cal_tran.cal_tran(method, params) ct_obj.derive_transform(A_mean['wvl'], B_mean['wvl']) pass
def run(self): datakeyA = self.chooseDataA.currentText() datakeyB = self.chooseDataB.currentText() datakeyC = self.chooseDatatoTransform.currentText() dataAmatchcol = self.chooseDataAMatch.currentText() dataBmatchcol = self.chooseDataBMatch.currentText() #get the data sets A = self.data[datakeyA].df B = self.data[datakeyB].df C = self.data[datakeyC].df assert (len(B['wvl'].columns) == len(C['wvl'].columns)),\ 'Data sets B and C have different numbers of spectral channels!' assert (B['wvl'].columns.values[-1] == C['wvl'].columns.values[-1]),\ "Data set B and C wavelengths are not identical. Check rounding and/or resample one data set onto the other's wavelengths" A_mean, B_mean = caltran_utils.prepare_data(A, B, dataAmatchcol, dataBmatchcol) method = self.chooseMethod.currentText() params = self.alg[method].run() params['method'] = method ct_obj = cal_tran.cal_tran(params) print('Deriving transform from ' + datakeyA + ' to ' + datakeyB + ' using ' + method) ct_obj.derive_transform(A_mean['wvl'], B_mean['wvl']) print('Applying transform to ' + datakeyC) C_transform = ct_obj.apply_transform(C['wvl']) self.data[datakeyC].df['wvl'] = C_transform
def cal_tran_helper(data1,data2,params, expected, single_spect = False): ct = cal_tran.cal_tran(params) ct.derive_transform(data1['wvl'], data2['wvl']) if single_spect: result = ct.apply_transform(data1['wvl'].iloc[0,:]) else: result = ct.apply_transform(data1['wvl']) if len(result.shape)>1: np.testing.assert_array_almost_equal(np.array(result,dtype=float)[:, 4], expected) else: np.testing.assert_array_almost_equal(np.array(result,dtype=float)[4], expected)
def run(self): datakeyA = self.chooseDataA.currentText() datakeyB = self.chooseDataB.currentText() datakeyC = self.chooseDatatoTransform.currentText() dataAmatchcol = self.chooseDataAMatch.currentText() dataBmatchcol = self.chooseDataBMatch.currentText() #get the data sets A = self.data[datakeyA].df B = self.data[datakeyB].df C = self.data[datakeyC].df assert (len(B['wvl'].columns) == len(C['wvl'].columns)),\ 'Data sets B and C have different numbers of spectral channels!' assert (B['wvl'].columns.values[-1] == C['wvl'].columns.values[-1]),\ "Data set B and C wavelengths are not identical. Check rounding and/or resample one data set onto the other's wavelengths" A_mean, B_mean = caltran_utils.prepare_data(A, B, dataAmatchcol, dataBmatchcol) if self.save_inputs_checkbox.isChecked(): outfileA = datakeyA + '_caltran_averages.csv' outfileB = datakeyB + '_caltran_averages.csv' A.to_csv(self.outpath + '//' + outfileA) B.to_csv(self.outpath + '//' + outfileB) method = self.chooseMethod.currentText() params = self.alg[method].run() params['method'] = method ct_obj = cal_tran.cal_tran(params) print('Deriving transform from ' + datakeyA + ' to ' + datakeyB + ' using ' + method) ct_obj.derive_transform(A_mean['wvl'], B_mean['wvl']) if self.save_transform_checkbox.isChecked(): transform_filename = datakeyA + '_to_' + datakeyB + '_caltran_' + method + '.csv' ct_obj.save_transform(self.outpath + '//' + transform_filename, A_mean['wvl'].columns.values) print('Applying transform to ' + datakeyC) C_transform = ct_obj.apply_transform(C['wvl']) self.data[datakeyC].df['wvl'] = C_transform
def run(self): datakeyA = self.chooseDataA.currentText() datakeyB = self.chooseDataB.currentText() dataAmatchcol = self.chooseDataAMatch.currentText() dataBmatchcol = self.chooseDataBMatch.currentText() paramgrid = [{'method': 'None'}] if self.PDScheckbox.isChecked(): paramgrid.extend( list(ParameterGrid(self.alg['PDS - Piecewise DS'][0].run()))) if self.PDSPLScheckBox.isChecked(): paramgrid.extend( list( ParameterGrid( self.alg['PDS-PLS - PDS using Partial Least Squares'] [0].run()))) if self.DScheckbox.isChecked(): paramgrid.extend( list( ParameterGrid( self.alg['DS - Direct Standardization'][0].run()))) if self.LASSODScheckbox.isChecked(): paramgrid.extend(list(ParameterGrid( self.alg['LASSO DS'][0].run()))) if self.Ratiocheckbox.isChecked(): paramgrid.extend([{'method': 'Ratio'}]) if self.SparseDScheckBox.isChecked(): paramgrid.extend( list(ParameterGrid(self.alg['Sparse Low Rank DS'][0].run()))) if self.RidgeDScheckBox.isChecked(): paramgrid.extend(list(ParameterGrid( self.alg['Ridge DS'][0].run()))) if self.CCAcheckBox.isChecked(): paramgrid.extend( list( ParameterGrid( self.alg['CCA - Canonical Correlation Analysis'] [0].run()))) if self.NewCCAcheckBox.isChecked(): paramgrid.extend(list(ParameterGrid(self.alg['New CCA'][0].run()))) if self.ForwardBackwardcheckBox.isChecked(): paramgrid.extend( list(ParameterGrid(self.alg['Forward Backward DS'][0].run()))) if self.IPDDScheckBox.isChecked(): paramgrid.extend( list( ParameterGrid( self.alg['Incremental Proximal Descent DS'][0].run()))) #get the data sets A = self.data[datakeyA].df B = self.data[datakeyB].df A_mean, B_mean = caltran_utils.prepare_data(A, B, dataAmatchcol, dataBmatchcol) #prepare for cross validation uniquevals = np.unique(A_mean[('meta', dataAmatchcol)]) cv_results = pd.DataFrame() ind = 0 for params in paramgrid: #step through all the different permutations print(params) transformed_datakey = datakeyA + '-' + str(params) for key in params.keys(): # store parameters in the results file cv_results.loc[ind, key] = params[key] ct_obj = cal_tran.cal_tran( params) #create a caltran object using the current parameters A_mean_transformed = copy.deepcopy(A_mean) A_mean_transformed['wvl'] = A_mean_transformed['wvl'] * 0 rmses = [] for val in uniquevals: #hold out each unique spectrum in turn print(val) # define the validation data (the held out spectrum) # and the training data (the spectra that are not held out) # for both data sets val_data_A = np.squeeze( np.array(A_mean[A_mean[('meta', dataAmatchcol)] == val]['wvl'], dtype='float')) train_data_A = np.squeeze( np.array( A_mean[A_mean[('meta', dataAmatchcol)] != val]['wvl'], dtype='float')) val_data_B = np.squeeze( np.array(B_mean[B_mean[('meta', dataBmatchcol)] == val]['wvl'], dtype='float')) train_data_B = np.squeeze( np.array( B_mean[B_mean[('meta', dataBmatchcol)] != val]['wvl'], dtype='float')) ct_obj.derive_transform( train_data_A, train_data_B ) #derive the transform based on the training data val_data_A_transformed = ct_obj.apply_transform( val_data_A ) #apply the transform to the held out spectrum from A if self.keep_spectra_checkBox.isChecked(): A_mean_transformed.loc[ A_mean_transformed[('meta', dataAmatchcol)] == val, 'wvl'] = val_data_A_transformed #this step is very slow, can we speed it up? rmses.append(mismatch_rmse(val_data_A_transformed, val_data_B)) cv_results.loc[ind, val + '_RMSE'] = rmses[ -1] #record the RMSE for the held out spectrum cv_results.loc[ind, 'average_RMSE'] = np.mean(rmses) if self.keep_spectra_checkBox.isChecked(): Modules.data_count += 1 self.index = Modules.data_count self.list_amend(self.datakeys, self.index, transformed_datakey) self.data[transformed_datakey] = spectral_data.spectral_data( A_mean_transformed) ind = ind + 1 cv_results.columns = pd.MultiIndex.from_tuples([ ('cv', col) for col in cv_results.columns ]) cvid = 'Caltran CV Results' number = 1 while cvid in self.datakeys: number += 1 cvid = cvid + ' - ' + str(number) Modules.data_count += 1 self.index = Modules.data_count self.list_amend(self.datakeys, self.index, cvid) self.data[cvid] = cv_results
def test_no_transform(): params = {'method':'None'} ct = cal_tran.cal_tran(params) ct.derive_transform(data1['wvl'], data2['wvl']) result = ct.apply_transform(data1['wvl']) pd.testing.assert_frame_equal(data1['wvl'], result)