Exemplo n.º 1
0
 def test_read_open_ms(self):
     exp = ca.read_ms(self.openms_csv, normalize=None, data_file_type='openms')
     # test we get the MZ and RT correct
     self.assertIn('MZ', exp.feature_metadata)
     self.assertIn('RT', exp.feature_metadata)
     self.assertEqual(exp.feature_metadata['MZ'].iloc[1], 118.0869)
     self.assertEqual(exp.feature_metadata['RT'].iloc[1], 23.9214)
     # test normalizing
     exp = ca.read_ms(self.openms_csv, normalize=10000, data_file_type='openms')
     assert_array_almost_equal(exp.data.sum(axis=1), np.ones(exp.shape[0]) * 10000)
     # test load sparse
     exp = ca.read_ms(self.openms_csv, sparse=True, normalize=None, data_file_type='openms')
     self.assertEqual(exp.sparse, True)
Exemplo n.º 2
0
 def test_read_gnps_ms(self):
     # load the gnps exported table with associated sample metadata and cluster info
     exp = ca.read_ms(self.gnps_table, sample_metadata_file=self.gnps_map,
                      data_file_type='gnps-ms2', normalize=None)
     # verify the load extracts required fields to metadata
     self.assertEqual(exp.data[2, 3], 139692)
     # # test normalizing
     exp = ca.read_ms(self.gnps_table, sample_metadata_file=self.gnps_map,
                      data_file_type='gnps-ms2', normalize=10000)
     assert_array_almost_equal(exp.data.sum(axis=1), np.ones(exp.shape[0]) * 10000)
     # # test load sparse
     exp = ca.read_ms(self.gnps_table, sample_metadata_file=self.gnps_map,
                      data_file_type='gnps-ms2', normalize=None, sparse=True)
     self.assertEqual(exp.sparse, True)
Exemplo n.º 3
0
    def test_get_spurious_duplicates(self):
        # load an mzmine2 metabolomics table, and associated gnps clusterinfo file
        exp = ca.read_ms(self.mzmine2_csv,
                         sample_metadata_file=self.gnps_map,
                         data_file_type='mzmine2',
                         use_gnps_id_from_AllFiles=False,
                         normalize=None)
        # get rid of the all 0s metabolite (to get rid of std=0 warning)
        exp = exp.filter_sum_abundance(0.1)

        res = exp.get_spurious_duplicates()
        # no samples filtered away
        self.assertEqual(res.shape[0], 6)
        # default parameters don't identify and suspicious features
        self.assertEqual(res.shape[1], 0)

        res = exp.get_spurious_duplicates(mz_tolerance=100, rt_tolerance=0.5)
        self.assertEqual(res.shape[1], 0)

        res = exp.get_spurious_duplicates(rt_tolerance=1)
        self.assertEqual(res.shape[1], 0)

        res = exp.get_spurious_duplicates(mz_tolerance=100, rt_tolerance=1)
        self.assertEqual(res.shape[1], 2)

        res = exp.get_spurious_duplicates(mz_tolerance=100,
                                          rt_tolerance=1,
                                          corr_thresh=0.2)
        self.assertEqual(res.shape[1], 4)
Exemplo n.º 4
0
 def test_read_open_ms_samples_rows(self):
     exp = ca.read_ms(self.openms_samples_rows_csv, normalize=None, sample_in_row=True, data_file_type='openms')
     # test we get the MZ and RT correct
     self.assertIn('MZ', exp.feature_metadata)
     self.assertIn('RT', exp.feature_metadata)
     self.assertAlmostEqual(exp.feature_metadata['MZ'].iloc[1], 118.0869)
     self.assertAlmostEqual(exp.feature_metadata['RT'].iloc[1], 23.9214)
Exemplo n.º 5
0
 def test_read_biom_ms(self):
     # load a biom table with MZ/RT in featureID, and associated gnps clusterinfo file
     exp = ca.read_ms(self.ms_biom_table, sample_metadata_file=self.gnps_map,
                      data_file_type='biom', use_gnps_id_from_AllFiles=False, normalize=None)
     self.assertIn('MZ', exp.feature_metadata)
     self.assertIn('RT', exp.feature_metadata)
     self.assertEqual(exp.feature_metadata['MZ'].iloc[1], 899.53)
     self.assertEqual(exp.feature_metadata['RT'].iloc[0], 314)
Exemplo n.º 6
0
 def test_read_mzmine2_ms_with_idstr(self):
     # load an mzmine2 metabolomics table with the sampleids inflated with additional info
     exp = ca.read_ms(self.mzmine2_with_idstr_csv, sample_metadata_file=self.gnps_map,
                      use_gnps_id_from_AllFiles=False, cut_sample_id_sep='_', normalize=None)
     self.assertEqual(exp.feature_metadata['MZ'].iloc[1], 200)
     self.assertEqual(exp.feature_metadata['RT'].iloc[0], 1)
     self.assertEqual(exp.sample_metadata['field2'][0], 'f')
     self.assertEqual(exp.data[2, 1], 35900)
     self.assertEqual(exp.data.shape, (6, 6))
Exemplo n.º 7
0
 def test_read_mzmine2_ms(self):
     # load an mzmine2 metabolomics table, and associated gnps clusterinfo file
     exp = ca.read_ms(self.mzmine2_csv, sample_metadata_file=self.gnps_map,
                      data_file_type='mzmine2', use_gnps_id_from_AllFiles=False, normalize=None)
     self.assertIn('MZ', exp.feature_metadata)
     self.assertIn('RT', exp.feature_metadata)
     self.assertEqual(exp.feature_metadata['MZ'].iloc[1], 200)
     self.assertEqual(exp.feature_metadata['RT'].iloc[0], 1)
     self.assertEqual(exp.data[2, 1], 35900)
Exemplo n.º 8
0
    def test_merge_similar_features(self):
        # load an mzmine2 metabolomics table, and associated gnps clusterinfo file
        exp = ca.read_ms(self.mzmine2_csv,
                         sample_metadata_file=self.gnps_map,
                         data_file_type='mzmine2',
                         use_gnps_id_from_AllFiles=False,
                         normalize=None)
        # no merging since features are far away
        res = exp.merge_similar_features()
        self.assertEqual(res.shape[1], 6)

        # a little merging
        res = exp.merge_similar_features(mz_tolerance=100, rt_tolerance=1)
        self.assertEqual(res.shape[1], 3)
        self.assertEqual(res.feature_metadata.at[85022, '_calour_merge_ids'],
                         '85022;93277')

        # a lot of merging
        res = exp.merge_similar_features(mz_tolerance=400, rt_tolerance=6)
        self.assertEqual(res.shape[1], 2)
        self.assertEqual(res.feature_metadata.at[121550, '_calour_merge_ids'],
                         '121550')
Exemplo n.º 9
0
    def test_filter_mz_rt(self):
        # load an mzmine2 metabolomics table, and associated gnps clusterinfo file
        exp = ca.read_ms(self.mzmine2_csv,
                         sample_metadata_file=self.gnps_map,
                         data_file_type='mzmine2',
                         use_gnps_id_from_AllFiles=False,
                         normalize=None)

        # mz filtering
        res = exp.filter_mz_rt(100)
        self.assertEqual(len(res.feature_metadata), 1)
        self.assertEqual(res.feature_metadata['MZ'].values, [100])

        res = exp.filter_mz_rt([100, 201])
        self.assertEqual(len(res.feature_metadata), 1)
        self.assertEqual(res.feature_metadata['MZ'].values, [100])

        res = exp.filter_mz_rt([100, 201], mz_tolerance=1)
        self.assertEqual(len(res.feature_metadata), 2)
        npt.assert_array_equal(res.feature_metadata['MZ'].values, [100, 200])

        res = exp.filter_mz_rt([100, 201], negate=True)
        self.assertEqual(len(res.feature_metadata), 5)

        # rt filtering
        res = exp.filter_mz_rt(rt=[1, 2.5])
        self.assertEqual(len(res.feature_metadata), 1)
        self.assertEqual(res.feature_metadata['RT'].values, [1])

        res = exp.filter_mz_rt(rt=[1, 2.5], rt_tolerance=0.5)
        self.assertEqual(len(res.feature_metadata), 3)
        npt.assert_array_equal(res.feature_metadata['RT'].values, [1, 2, 3])

        # complex - both mz and rt
        res = exp.filter_mz_rt([101, 200, 400, 505], [1, 3, 4, 5],
                               mz_tolerance=2)
        self.assertEqual(res.shape[1], 2)