Exemplo n.º 1
0
    def test_to_sirius(self):
        # Spectrum 1 -------------------
        spec = MBSpectrum(os.path.join(os.path.dirname(__file__), "example_massbank_records", "FIO00665.txt"))
        out = spec.to_sirius_format()
        self.assertIn("FIO00665.ms", out)
        self.assertIn("FIO00665.tsv", out)
        self.assertIsNone(out["FIO00665.tsv"])
        self.assertIn(">profile qtof", out["FIO00665.ms"])

        # check fragmentation peaks
        tmp = out["FIO00665.ms"].split("\n")
        for idx, _peak in enumerate(spec.get_peaks(), start=tmp.index(">ms2merged") + 1):
            _mz, _int = tmp[idx].split(" ")
            self.assertEqual(_peak, (float(_mz), float(_int)))

        # Spectrum 2 -------------------
        spec = MBSpectrum(os.path.join(os.path.dirname(__file__), "example_massbank_records", "EQ308406.txt"))
        out = spec.to_sirius_format()
        self.assertIn("EQ308406.ms", out)
        self.assertIn("EQ308406.tsv", out)
        self.assertIsNone(out["EQ308406.tsv"])
        self.assertIn(">profile orbitrap", out["EQ308406.ms"])

        # check fragmentation peaks
        tmp = out["EQ308406.ms"].split("\n")
        for idx, _peak in enumerate(spec.get_peaks(), start=tmp.index(">ms2merged") + 1):
            _mz, _int = tmp[idx].split(" ")
            self.assertEqual(_peak, (float(_mz), float(_int)))

        # Spectrum 3 --------------------
        spectra = []
        acc = []
        spec_cnt = 0
        original_accessions = [
            'EA000412', 'EA000414', 'EA000401', 'EA000413', 'EA000408', 'EA000409', 'EA000405', 'EA000402', 'EA000406',
            'EA000404', 'EA000411', 'EA000403', 'EA000407', 'EA000410'
        ]
        for oacc in original_accessions:
            mb_fn = os.path.join(os.path.dirname(__file__), "example_massbank_records", "%s.txt" % oacc)
            spectra.append(MBSpectrum(mb_fn))
            acc.append(spectra[-1].get("accession"))
            spec_cnt += 1

        self.assertIn(
            ">ms2merged",
            MBSpectrum.merge_spectra(spectra, merge_peak_lists=True).to_sirius_format()["EA33002987.ms"]
        )

        self.assertEqual(
            spec_cnt,
            MBSpectrum.merge_spectra(spectra, merge_peak_lists=False).to_sirius_format()["EA33002987.ms"].count(">ms2peaks")
        )
Exemplo n.º 2
0
    def test_spectra_merging__EAX000402(self):
        """
        We compare our spectra merging with the strategy applied in [1] and originally proposed in [2].

        References:
            [1] "MetFrag relaunched: incorporating strategies beyond in silico fragmentation" by Ruttkies et al. (2016)
            [2] "Alignment of high resolution mass spectra: development of a heuristic approach for metabolomics" by
                Kazmi et al. (2006)
        """
        # self.skipTest("Intensities are wired in the original file: EAX000402")

        # Load the list of spectra to merge: EA0004[56][0-9].txt --> EAX000402.txt
        spectra = []
        for mb_fn in glob.iglob(os.path.join(os.path.dirname(__file__), "example_massbank_records", "EA0004[56][0-9].txt")):
            spectra.append(MBSpectrum(mb_fn))

        # Run the spectra merging using hierarchical clustering
        merged_spectrum = MBSpectrum.merge_spectra(spectra)  # type: MBSpectrum

        # Merged spectrum as used by [1]
        peaks_ref = [
            (117.0347000, 999),
            (186.0677308, 999)
        ]

        mzs_ref = list(zip(*peaks_ref))[0]
        ints_ref = np.array(list(zip(*peaks_ref))[1])

        np.testing.assert_almost_equal(mzs_ref, merged_spectrum.get_mz())
        np.testing.assert_almost_equal(ints_ref / 999, merged_spectrum.get_int())
Exemplo n.º 3
0
    def test_metainformation_merging__FIO00665(self):
        # Apply merge function to a single spectrum
        mb_fn = os.path.join(os.path.dirname(__file__), "example_massbank_records", "FIO00665.txt")
        spec = MBSpectrum(mb_fn)
        spectra = [spec]
        acc_ref = [os.path.basename(mb_fn).split(".")[0]]
        rt_ref = spec.get("retention_time")
        precmz_ref = spec.get("precursor_mz")
        recordtitle_ref = spec.get("record_title")
        ce_ref = spec.get("collision_energy")

        # -------------------
        # WITH RT AGGREGATION
        # -------------------
        merged_spectrum = MBSpectrum.merge_spectra(spectra, rt_agg_fun=np.min)  # type: MBSpectrum

        self.assertEqual("FBZONXHGGPHHIY-UHFFFAOYSA-N", merged_spectrum.get("inchikey"))
        self.assertEqual(acc_ref, merged_spectrum.get("original_accessions"))
        self.assertEqual("FIO", merged_spectrum.get("accession")[:3])
        self.assertEqual(MBSpectrum._get_new_accession_id(merged_spectrum.get("original_accessions")),
                         merged_spectrum.get("accession"))
        self.assertEqual(rt_ref, merged_spectrum.get("retention_time"))
        self.assertEqual(precmz_ref, merged_spectrum.get("precursor_mz"))
        self.assertEqual(recordtitle_ref, merged_spectrum.get("record_title"))
        self.assertEqual([ce_ref], merged_spectrum.get("collision_energy"))
Exemplo n.º 4
0
    def test_spectra_merging__EAX281502(self):
        """
        We compare our spectra merging with the strategy applied in [1] and originally proposed in [2].

        References:
            [1] "MetFrag relaunched: incorporating strategies beyond in silico fragmentation" by Ruttkies et al. (2016)
            [2] "Alignment of high resolution mass spectra: development of a heuristic approach for metabolomics" by
                Kazmi et al. (2006)
        """
        # Load the list of spectra to merge: EA2815[56][0-9].txt --> EAX281502.txt
        spectra = []
        for mb_fn in glob.iglob(os.path.join(os.path.dirname(__file__), "example_massbank_records", "EA2815[56][0-9].txt")):
            spectra.append(MBSpectrum(mb_fn))

        # Run the spectra merging using hierarchical clustering
        merged_spectrum = MBSpectrum.merge_spectra(spectra)  # type: MBSpectrum

        # Merged spectrum as used by [1]
        peaks_ref = [
            (81.0220667, 257.0000000),
            (96.0095000, 226.0000000),
            (109.0170500, 259.0000000),
            (111.0198667, 290.0000000),
            (116.0504625, 559.0000000),
            (118.0663111, 999.0000000),
            (126.1288286, 348.0000000),
            (156.0821000, 84.0000000),
            (170.1188000, 2.0000000),
            (172.0770500, 54.0000000),
            (174.0561667, 114.0000000),
            (182.1189250, 95.0000000),
            (197.1296571, 173.0000000),
            (200.0717900, 999.0000000),
            (227.1402000, 67.0000000),
            (230.1551000, 290.0000000),
            (244.0616500, 15.0000000),
            (257.2023400, 218.0000000),
            (273.1971500, 176.0000000),
            (276.0872500, 3.0000000),
            (283.1819500, 70.0000000),
            (285.1611800, 381.0000000),
            (301.1921875, 459.0000000),
            (317.1871200, 142.0000000),
            (327.1710000, 128.0000000),
            (331.2034500, 3.0000000),
            (345.1821167, 999.0000000),
            (359.1974333, 177.0000000),
            (377.2083750, 999.0000000)
        ]

        mzs_ref = list(zip(*peaks_ref))[0]
        ints_ref = np.array(list(zip(*peaks_ref))[1])

        np.testing.assert_almost_equal(mzs_ref, merged_spectrum.get_mz())
        np.testing.assert_almost_equal(ints_ref / 999, merged_spectrum.get_int(), decimal=3)
Exemplo n.º 5
0
    def test_metainformation_merging__EAX000401(self):
        # Load the list of spectra to merge: EA0004[01][0-9].txt --> EAX000401.txt
        spectra = []
        acc_ref = []
        rt_ref = []
        precmz_ref = []
        recordtitle_ref = []
        ce_ref = []
        for mb_fn in glob.iglob(os.path.join(os.path.dirname(__file__), "example_massbank_records", "EA0004[01][0-9].txt")):
            spectra.append(MBSpectrum(mb_fn))

            # collect some reference meta-information
            acc_ref.append(os.path.basename(mb_fn).split(".")[0])
            rt_ref.append(spectra[-1].get("retention_time"))
            precmz_ref.append(spectra[-1].get("precursor_mz"))
            recordtitle_ref.append(spectra[-1].get("record_title"))
            ce_ref.append(spectra[-1].get("collision_energy"))

        # -------------------
        # WITH RT AGGREGATION
        # -------------------
        merged_spectrum = MBSpectrum.merge_spectra(spectra, rt_agg_fun=np.mean)  # type: MBSpectrum

        self.assertEqual("OUSYWCQYMPDAEO-UHFFFAOYSA-N", merged_spectrum.get("inchikey"))
        self.assertEqual(acc_ref, merged_spectrum.get("original_accessions"))
        self.assertEqual("EA", merged_spectrum.get("accession")[:2])
        self.assertEqual(MBSpectrum._get_new_accession_id(merged_spectrum.get("original_accessions")),
                         merged_spectrum.get("accession"))
        self.assertEqual("min", merged_spectrum.get("retention_time_unit"))
        self.assertEqual(np.mean(rt_ref), merged_spectrum.get("retention_time"))
        self.assertEqual(precmz_ref[0], merged_spectrum.get("precursor_mz"))
        self.assertEqual(recordtitle_ref, merged_spectrum.get("record_title"))
        self.assertEqual(ce_ref, merged_spectrum.get("collision_energy"))

        # ----------------------
        # WITHOUT RT AGGREGATION
        # ----------------------
        merged_spectrum = MBSpectrum.merge_spectra(spectra, rt_agg_fun=None)  # type: MBSpectrum

        self.assertEqual("min", merged_spectrum.get("retention_time_unit"))
        self.assertEqual(rt_ref, merged_spectrum.get("retention_time"))
Exemplo n.º 6
0
    def test_spectra_merging__EAX000401(self):
        """
        We compare our spectra merging with the strategy applied in [1] and originally proposed in [2].

        References:
            [1] "MetFrag relaunched: incorporating strategies beyond in silico fragmentation" by Ruttkies et al. (2016)
            [2] "Alignment of high resolution mass spectra: development of a heuristic approach for metabolomics" by
                Kazmi et al. (2006)
        """
        # Load the list of spectra to merge: EA0004[01][0-9].txt --> EAX000401.txt
        spectra = []
        for mb_fn in glob.iglob(os.path.join(os.path.dirname(__file__), "example_massbank_records", "EA0004[01][0-9].txt")):
            spectra.append(MBSpectrum(mb_fn))

        # Run the spectra merging using hierarchical clustering
        merged_spectrum = MBSpectrum.merge_spectra(spectra)  # type: MBSpectrum

        # Merged spectrum as used by [1]
        peaks_ref = [
            (53.03852, 55),
            (57.0447285714286, 93),
            (65.0386, 116),
            (77.0386, 884),
            (81.03345, 12),
            (85.0396545454545, 17),
            (91.0542714285714, 123),
            (92.0494875, 377),
            (95.04925, 112),
            (103.041733333333, 15),
            (104.049541666667, 999),
            (105.044757142857, 271),
            (105.069975, 12),
            (110.060033333333, 7),
            (119.060475, 631),
            (130.04005, 11),
            (130.0652, 49),
            (131.07295, 24),
            (142.0652, 9),
            (147.0554, 3),
            (160.087069230769, 999),
            (188.082038461538, 999)
        ]

        mzs_ref = list(zip(*peaks_ref))[0]
        ints_ref = np.array(list(zip(*peaks_ref))[1])

        np.testing.assert_almost_equal(mzs_ref, merged_spectrum.get_mz())
        np.testing.assert_almost_equal(ints_ref / 999, merged_spectrum.get_int(), decimal=3)
Exemplo n.º 7
0
    def test_to_metfrag(self):
        # Spectrum 1 --------------------
        out = MBSpectrum(os.path.join(os.path.dirname(__file__), "example_massbank_records", "FIO00665.txt")).to_metfrag_format(
            **{"MetFragScoreWeights": [0.8, 0.2],
               "MetFragScoreTypes": ["FragmenterScore", "PubChemNumberPatents"],
               "LocalDatabasePath": "/path/to/db",
               "ResultsPath": "/path/to/results",
               "NumberThreads": 4,
               "PeakListPath": "/path/to/peaks"}
        )

        self.assertIn("FIO00665.peaks", out)
        self.assertIn("FIO00665.conf", out)
        self.assertIn("PeakListPath=/path/to/peaks/FIO00665.peaks", out["FIO00665.conf"])
        self.assertIn("MetFragScoreWeights=0.8,0.2\n", out["FIO00665.conf"])
        self.assertIn("MetFragScoreTypes=FragmenterScore,PubChemNumberPatents\n", out["FIO00665.conf"])
        self.assertIn("PrecursorIonMode=-1\n", out["FIO00665.conf"])
        self.assertIn("IsPositiveIonMode=False\n", out["FIO00665.conf"])

        # Spectrum 2 --------------------
        out = MBSpectrum(os.path.join(os.path.dirname(__file__), "example_massbank_records", "EQ308406.txt")).to_metfrag_format(
            **{"MetFragScoreWeights": [1.0],
               "MetFragScoreTypes": ["FragmenterScore"],
               "LocalDatabasePath": "/path/to/db",
               "ResultsPath": "/path/to/results",
               "NumberThreads": 4,
               "PeakListPath": "/path/to/peaks"}
        )

        self.assertIn("EQ308406.peaks", out)
        self.assertIn("EQ308406.conf", out)
        self.assertIn("PeakListPath=/path/to/peaks/EQ308406.peaks", out["EQ308406.conf"])
        self.assertIn("MetFragScoreWeights=1.0\n", out["EQ308406.conf"])
        self.assertIn("MetFragScoreTypes=FragmenterScore\n", out["EQ308406.conf"])
        self.assertIn("PrecursorIonMode=1\n", out["EQ308406.conf"])
        self.assertIn("IsPositiveIonMode=True\n", out["EQ308406.conf"])

        # Spectrum 3 --------------------
        spectra = []
        acc = []
        for mb_fn in glob.iglob(os.path.join(os.path.dirname(__file__), "example_massbank_records", "EA0004[01][0-9].txt")):
            spectra.append(MBSpectrum(mb_fn))
            acc.append(spectra[-1].get("accession"))

        merged_spectrum = MBSpectrum.merge_spectra(spectra)
        out = merged_spectrum.to_metfrag_format(
            **{"MetFragScoreWeights": [1.0],
               "MetFragScoreTypes": ["FragmenterScore"],
               "LocalDatabasePath": "/path/to/db",
               "ResultsPath": "/path/to/results",
               "NumberThreads": 4,
               "PeakListPath": "/path/to/peaks"}
        )

        peaks_fn = merged_spectrum.get("accession") + ".peaks"
        config_fn = merged_spectrum.get("accession") + ".conf"

        self.assertIn("PeakListPath=/path/to/peaks/" + peaks_fn, out[config_fn])
        self.assertIn("MetFragScoreWeights=1.0\n", out[config_fn])
        self.assertIn("MetFragScoreTypes=FragmenterScore\n", out[config_fn])
        self.assertIn("PrecursorIonMode=1\n", out[config_fn])
        self.assertIn("IsPositiveIonMode=True\n", out[config_fn])