Пример #1
0
def test_undefined():
    params = {'method':'foo'}
    ct = cal_tran.cal_tran(params)
    assert ct.ct_obj == None

    params = {'method': 'LASSO DS','reg':'foo'}
    ct = cal_tran.cal_tran(params)
    ct.derive_transform(data1['wvl'],data2['wvl'])
    assert ct.ct_obj.proj_to_B == None
    def run(self):
        datakeyA = self.chooseDataA.currentText()
        datakeyB = self.chooseDataB.currentText()
        datakeyC = self.chooseDatatoTransform.currentText()
        dataAmatchcol = self.chooseDataAMatch.currentText()
        dataBmatchcol = self.chooseDataBMatch.currentText()

        #get the data sets
        A = self.data[datakeyA].df
        B = self.data[datakeyB].df
        C = self.data[datakeyC].df

        assert (len(B['wvl'].columns) == len(C['wvl'].columns)),\
            'Data sets B and C have different numbers of spectral channels!'
        assert (B['wvl'].columns.values[-1] == C['wvl'].columns.values[-1]),\
            "Data set B and C wavelengths are not identical. Check rounding and/or resample one data set onto the other's wavelengths"
        A_mean, B_mean = caltran_prepare_data.prepare_data(
            A, B, dataAmatchcol, dataBmatchcol)

        method = self.chooseMethod.currentText()
        params = self.alg[method].run()

        ct_obj = cal_tran.cal_tran(method, params)
        ct_obj.derive_transform(A_mean['wvl'], B_mean['wvl'])

        pass
    def run(self):
        datakeyA = self.chooseDataA.currentText()
        datakeyB = self.chooseDataB.currentText()
        datakeyC = self.chooseDatatoTransform.currentText()
        dataAmatchcol = self.chooseDataAMatch.currentText()
        dataBmatchcol = self.chooseDataBMatch.currentText()

        #get the data sets
        A = self.data[datakeyA].df
        B = self.data[datakeyB].df
        C = self.data[datakeyC].df

        assert (len(B['wvl'].columns) == len(C['wvl'].columns)),\
            'Data sets B and C have different numbers of spectral channels!'
        assert (B['wvl'].columns.values[-1] == C['wvl'].columns.values[-1]),\
            "Data set B and C wavelengths are not identical. Check rounding and/or resample one data set onto the other's wavelengths"
        A_mean, B_mean = caltran_utils.prepare_data(A, B, dataAmatchcol,
                                                    dataBmatchcol)

        method = self.chooseMethod.currentText()
        params = self.alg[method].run()
        params['method'] = method
        ct_obj = cal_tran.cal_tran(params)
        print('Deriving transform from ' + datakeyA + ' to ' + datakeyB +
              ' using ' + method)
        ct_obj.derive_transform(A_mean['wvl'], B_mean['wvl'])

        print('Applying transform to ' + datakeyC)
        C_transform = ct_obj.apply_transform(C['wvl'])
        self.data[datakeyC].df['wvl'] = C_transform
Пример #4
0
def cal_tran_helper(data1,data2,params, expected, single_spect = False):
    ct = cal_tran.cal_tran(params)
    ct.derive_transform(data1['wvl'], data2['wvl'])
    if single_spect:
        result = ct.apply_transform(data1['wvl'].iloc[0,:])
    else:
        result = ct.apply_transform(data1['wvl'])
    if len(result.shape)>1:
        np.testing.assert_array_almost_equal(np.array(result,dtype=float)[:, 4], expected)
    else:
        np.testing.assert_array_almost_equal(np.array(result,dtype=float)[4], expected)
    def run(self):
        datakeyA = self.chooseDataA.currentText()
        datakeyB = self.chooseDataB.currentText()
        datakeyC = self.chooseDatatoTransform.currentText()
        dataAmatchcol = self.chooseDataAMatch.currentText()
        dataBmatchcol = self.chooseDataBMatch.currentText()

        #get the data sets
        A = self.data[datakeyA].df
        B = self.data[datakeyB].df
        C = self.data[datakeyC].df

        assert (len(B['wvl'].columns) == len(C['wvl'].columns)),\
            'Data sets B and C have different numbers of spectral channels!'
        assert (B['wvl'].columns.values[-1] == C['wvl'].columns.values[-1]),\
            "Data set B and C wavelengths are not identical. Check rounding and/or resample one data set onto the other's wavelengths"
        A_mean, B_mean = caltran_utils.prepare_data(A, B, dataAmatchcol,
                                                    dataBmatchcol)
        if self.save_inputs_checkbox.isChecked():
            outfileA = datakeyA + '_caltran_averages.csv'
            outfileB = datakeyB + '_caltran_averages.csv'
            A.to_csv(self.outpath + '//' + outfileA)
            B.to_csv(self.outpath + '//' + outfileB)

        method = self.chooseMethod.currentText()
        params = self.alg[method].run()
        params['method'] = method
        ct_obj = cal_tran.cal_tran(params)
        print('Deriving transform from ' + datakeyA + ' to ' + datakeyB +
              ' using ' + method)
        ct_obj.derive_transform(A_mean['wvl'], B_mean['wvl'])
        if self.save_transform_checkbox.isChecked():
            transform_filename = datakeyA + '_to_' + datakeyB + '_caltran_' + method + '.csv'
            ct_obj.save_transform(self.outpath + '//' + transform_filename,
                                  A_mean['wvl'].columns.values)

        print('Applying transform to ' + datakeyC)
        C_transform = ct_obj.apply_transform(C['wvl'])
        self.data[datakeyC].df['wvl'] = C_transform
Пример #6
0
    def run(self):
        datakeyA = self.chooseDataA.currentText()
        datakeyB = self.chooseDataB.currentText()
        dataAmatchcol = self.chooseDataAMatch.currentText()
        dataBmatchcol = self.chooseDataBMatch.currentText()

        paramgrid = [{'method': 'None'}]
        if self.PDScheckbox.isChecked():
            paramgrid.extend(
                list(ParameterGrid(self.alg['PDS - Piecewise DS'][0].run())))
        if self.PDSPLScheckBox.isChecked():
            paramgrid.extend(
                list(
                    ParameterGrid(
                        self.alg['PDS-PLS - PDS using Partial Least Squares']
                        [0].run())))
        if self.DScheckbox.isChecked():
            paramgrid.extend(
                list(
                    ParameterGrid(
                        self.alg['DS - Direct Standardization'][0].run())))
        if self.LASSODScheckbox.isChecked():
            paramgrid.extend(list(ParameterGrid(
                self.alg['LASSO DS'][0].run())))
        if self.Ratiocheckbox.isChecked():
            paramgrid.extend([{'method': 'Ratio'}])
        if self.SparseDScheckBox.isChecked():
            paramgrid.extend(
                list(ParameterGrid(self.alg['Sparse Low Rank DS'][0].run())))
        if self.RidgeDScheckBox.isChecked():
            paramgrid.extend(list(ParameterGrid(
                self.alg['Ridge DS'][0].run())))
        if self.CCAcheckBox.isChecked():
            paramgrid.extend(
                list(
                    ParameterGrid(
                        self.alg['CCA - Canonical Correlation Analysis']
                        [0].run())))
        if self.NewCCAcheckBox.isChecked():
            paramgrid.extend(list(ParameterGrid(self.alg['New CCA'][0].run())))
        if self.ForwardBackwardcheckBox.isChecked():
            paramgrid.extend(
                list(ParameterGrid(self.alg['Forward Backward DS'][0].run())))
        if self.IPDDScheckBox.isChecked():
            paramgrid.extend(
                list(
                    ParameterGrid(
                        self.alg['Incremental Proximal Descent DS'][0].run())))

        #get the data sets
        A = self.data[datakeyA].df
        B = self.data[datakeyB].df
        A_mean, B_mean = caltran_utils.prepare_data(A, B, dataAmatchcol,
                                                    dataBmatchcol)

        #prepare for cross validation
        uniquevals = np.unique(A_mean[('meta', dataAmatchcol)])
        cv_results = pd.DataFrame()
        ind = 0

        for params in paramgrid:  #step through all the different permutations
            print(params)
            transformed_datakey = datakeyA + '-' + str(params)
            for key in params.keys():  # store parameters in the results file
                cv_results.loc[ind, key] = params[key]
            ct_obj = cal_tran.cal_tran(
                params)  #create a caltran object using the current parameters
            A_mean_transformed = copy.deepcopy(A_mean)
            A_mean_transformed['wvl'] = A_mean_transformed['wvl'] * 0
            rmses = []
            for val in uniquevals:  #hold out each unique spectrum in turn
                print(val)
                # define the validation data (the held out spectrum)
                # and the training data (the spectra that are not held out)
                # for both data sets
                val_data_A = np.squeeze(
                    np.array(A_mean[A_mean[('meta',
                                            dataAmatchcol)] == val]['wvl'],
                             dtype='float'))
                train_data_A = np.squeeze(
                    np.array(
                        A_mean[A_mean[('meta', dataAmatchcol)] != val]['wvl'],
                        dtype='float'))
                val_data_B = np.squeeze(
                    np.array(B_mean[B_mean[('meta',
                                            dataBmatchcol)] == val]['wvl'],
                             dtype='float'))
                train_data_B = np.squeeze(
                    np.array(
                        B_mean[B_mean[('meta', dataBmatchcol)] != val]['wvl'],
                        dtype='float'))

                ct_obj.derive_transform(
                    train_data_A, train_data_B
                )  #derive the transform based on the training data
                val_data_A_transformed = ct_obj.apply_transform(
                    val_data_A
                )  #apply the transform to the held out spectrum from A

                if self.keep_spectra_checkBox.isChecked():
                    A_mean_transformed.loc[
                        A_mean_transformed[('meta', dataAmatchcol)] == val,
                        'wvl'] = val_data_A_transformed  #this step is very slow, can we speed it up?
                rmses.append(mismatch_rmse(val_data_A_transformed, val_data_B))
                cv_results.loc[ind, val + '_RMSE'] = rmses[
                    -1]  #record the RMSE for the held out spectrum
            cv_results.loc[ind, 'average_RMSE'] = np.mean(rmses)
            if self.keep_spectra_checkBox.isChecked():
                Modules.data_count += 1
                self.index = Modules.data_count
                self.list_amend(self.datakeys, self.index, transformed_datakey)
                self.data[transformed_datakey] = spectral_data.spectral_data(
                    A_mean_transformed)
            ind = ind + 1
        cv_results.columns = pd.MultiIndex.from_tuples([
            ('cv', col) for col in cv_results.columns
        ])

        cvid = 'Caltran CV Results'
        number = 1
        while cvid in self.datakeys:
            number += 1
            cvid = cvid + ' - ' + str(number)

        Modules.data_count += 1
        self.index = Modules.data_count
        self.list_amend(self.datakeys, self.index, cvid)
        self.data[cvid] = cv_results
Пример #7
0
def test_no_transform():
    params = {'method':'None'}
    ct = cal_tran.cal_tran(params)
    ct.derive_transform(data1['wvl'], data2['wvl'])
    result = ct.apply_transform(data1['wvl'])
    pd.testing.assert_frame_equal(data1['wvl'], result)