示例#1
0
def test_idzip():
    runner = CliRunner(mix_stderr=False)
    path = datafile("20150710_3um_AGP_001_29_30.mzML.gz")
    stdin_data = io.BytesIO(open(path, 'rb').read())
    result = runner.invoke(
        indexing.idzip_compression,
        ['-'],
        input=stdin_data)
    assert b"Detected gzip input file" in result.stderr_bytes
    outbuff = io.BytesIO(result.stdout_bytes)
    outstream = _compression.GzipFile(fileobj=outbuff, mode='rb')
    instream = _compression.GzipFile(path, mode='rb')
    in_data = instream.read()
    out_data = outstream.read()
    assert in_data == out_data

    path = datafile("small.mzML")
    stdin_data = io.BytesIO(open(path, 'rb').read())
    result = runner.invoke(
        indexing.idzip_compression,
        ['-'],
        input=stdin_data)
    assert b"Detected gzip input file" not in result.stderr_bytes
    outbuff = io.BytesIO(result.stdout_bytes)
    outstream = _compression.GzipFile(fileobj=outbuff, mode='rb')
    instream = io.open(path, mode='rb')
    in_data = instream.read()
    out_data = outstream.read()
    assert in_data == out_data
示例#2
0
def test_ms_deisotope():
    runner = CliRunner(mix_stderr=False)
    path = datafile("20150710_3um_AGP_001_29_30.mzML.gz")
    reference = datafile("20150710_3um_AGP_001_29_30.preprocessed.mzML.gz")
    outpath = tempfile.mktemp()
    result = runner.invoke(deisotoper.deisotope, [
        "-b", 0, "-t", 20, "-tn", 10, "-m", 3, "-mn", 1, path, outpath
    ])
    result_reader = ProcessedMzMLDeserializer(outpath)
    reference_reader = ProcessedMzMLDeserializer(_compression.get_opener(reference))
    assert len(result_reader) == len(reference_reader)
    for a_bunch, b_bunch in zip(result_reader, reference_reader):
        assert len(a_bunch.products) == len(b_bunch.products)
        aprec = a_bunch.precursor
        bprec = b_bunch.precursor
        assert aprec.id == bprec.id
        diffa, diffb = diff_deconvoluted_peak_set(
            aprec.deconvoluted_peak_set, bprec.deconvoluted_peak_set)
        assert len(aprec.deconvoluted_peak_set) == len(
            bprec.deconvoluted_peak_set), "Peak Counts Diff On %r, (%r, %r)" % (aprec.id, diffa, diffb)
        assert aprec.deconvoluted_peak_set == bprec.deconvoluted_peak_set, "Peaks Diff On %r, (%r, %r)" % (
            aprec.id, diffa, diffb)

        for aprod, bprod in zip(a_bunch.products, b_bunch.products):
            assert aprod.id == bprod.id
            diffa, diffb = diff_deconvoluted_peak_set(aprod.deconvoluted_peak_set, bprod.deconvoluted_peak_set)
            assert len(aprod.deconvoluted_peak_set) == len(
                bprod.deconvoluted_peak_set), "Peak Counts Diff On %r, (%r, %r)" % (aprod.id, diffa, diffb)
            assert aprod.deconvoluted_peak_set == bprod.deconvoluted_peak_set, "Peaks Diff On %r" % (
                aprod.id, diffa, diffb)

    result_reader.close()
    reference_reader.close()
    os.remove(outpath)
示例#3
0
def run_ms_deisotope():
    runner = CliRunner(mix_stderr=False)
    path = datafile("20150710_3um_AGP_001_29_30.mzML.gz")
    reference = datafile("20150710_3um_AGP_001_29_30.preprocessed.mzML")
    result = runner.invoke(
        deisotoper.deisotope,
        ["-b", 0, "-t", 20, "-tn", 10, "-m", 3, "-mn", 1, path, reference])
    print(runner.exit_code)
    print(result.stdout)
    runner.invoke(indexing.idzip_compression, reference, "-o",
                  reference + '.gz')
    print(result.exit_code)
    os.remove(reference)
class TestFileMetadata(unittest.TestCase):
    path = datafile("three_test_scans.mzML")

    @property
    def reader(self):
        return infer_type.MSFileLoader(self.path)

    def test_file_information(self):
        reader = self.reader
        finfo = reader.file_description()
        assert "MS1 spectrum" in finfo
        assert reader.id_format == "no nativeID format"

    def test_source_file(self):
        id_fmt, fmt = file_information.SourceFile.guess_format(self.path)
        assert fmt == "mzML format"
        assert id_fmt == "no nativeID format"

        sf = file_information.SourceFile.from_path(self.path)
        assert not sf.has_checksum()
        sf.add_checksum('sha1')
        assert sf.has_checksum('sha1') and sf.has_checksum()
        assert sf.parameters["SHA-1"] == sf.checksum("sha1")
        assert sf.validate_checksum()

        other = sf.copy()
        assert sf == other
示例#5
0
class TestMzXMLLoaderScanBehavior(unittest.TestCase):
    path = datafile("microscans.mzXML")

    @property
    def reader(self):
        return infer_type.MSFileLoader(self.path)

    @property
    def first_scan(self):
        return self.reader.next().precursor

    def test_id(self):
        loader = self.reader
        scan = next(loader).precursor
        self.assertEqual(scan.id, "210")
        scan = loader.get_scan_by_id("210")
        self.assertEqual(scan.id, "210")

    def test_polarity(self):
        self.assertEqual(self.first_scan.polarity, 1)

    def test_index(self):
        self.assertEqual(self.first_scan.polarity, 1)

    def test_arrays(self):
        self.assertEqual(len(self.first_scan.arrays), 2)

    def test_precursor_info(self):
        self.assertEqual(self.first_scan.precursor_information, None)
class TestScanTraits(unittest.TestCase):
    path = datafile("three_test_scans.mzML")

    @property
    def reader(self):
        return infer_type.MSFileLoader(self.path)

    def test_traits(self):
        bunch = next(self.reader)
        scan = bunch.precursor
        acquisition = scan.acquisition_information
        assert len(acquisition) == 1
        scan_event = acquisition[0]
        assert not scan_event.has_ion_mobility()
        assert len(scan_event) == 1
        scan_window = scan_event[0]
        assert scan_window.lower == 350
        assert scan_window.upper == 1500
        assert not scan_window.is_empty()
        assert scan_traits.ScanWindow(0, 0).is_empty()
        assert scan_traits.ScanWindow(350, 1500) == scan_window
        assert scan_window == scan_event.total_scan_window()
        assert scan_event == scan_event
        assert acquisition == acquisition

        scan = bunch.products[0]
        isolation = scan.isolation_window
        assert not isolation.is_empty()
        assert scan_traits.IsolationWindow(None, 200, None).is_empty()
        assert scan_traits.IsolationWindow(0, 200, 0).is_empty()
        assert isolation.spans(isolation.target)
        assert isolation == isolation
示例#7
0
def test_mgf():
    runner = CliRunner(mix_stderr=False)
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists before running test")
    path = datafile("small.mzML")
    result = runner.invoke(conversion.mgf, [path, '-'], catch_exceptions=False)
    lines = result.output.splitlines()
    count = 0
    for line in lines:
        if "BEGIN" in line:
            count += 1
    assert count == 34
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists after running uncompressed test")
    result = runner.invoke(conversion.mgf, [path, '-z', '-'])
    assert _compression.starts_with_gz_magic(result.stdout_bytes)
    buff = io.BytesIO(result.stdout_bytes)
    reader = _compression.GzipFile(fileobj=buff, mode='rb')
    count = 0
    for line in reader:
        if b"BEGIN" in line:
            count += 1
    assert count == 34
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists after running compressed test")
示例#8
0
    def test_extraction_quick_charge(self):
        scan = self.scan

        peak3, deconvoluter = self.build_deconvoluter(scan,
                                                      peptide,
                                                      use_quick_charge=True)
        deconvoluter._deconvolution_step(0,
                                         truncate_after=0.8,
                                         charge_range=(1, 4))

        with open(datafile("extraction_quick_charge_averagine.pkl"),
                  'rb') as fh:
            reference_averagine = pickle.load(fh)

        diff = set(deconvoluter.averagine.backend) - set(
            reference_averagine.backend)
        assert len(diff) == 0
        assert len(deconvoluter.averagine.backend) == 5134
        assert reference_averagine == deconvoluter.averagine

        cluster3 = deconvoluter.peak_dependency_network.find_cluster_for(peak3)
        spanned3 = cluster3.fits_using_mz(peak3.mz)
        assert len(cluster3) == 1
        assert len(spanned3) == 1
        assert np.isclose(cluster3.best_fit.monoisotopic_peak.mz, 138.53090)
        assert cluster3.best_fit.charge == 4
示例#9
0
class TestMzMLbLoaderScanBehavior(unittest.TestCase):
    path = datafile("20150710_3um_AGP_001_29_30.mzMLb")
    ref_path = datafile("20150710_3um_AGP_001_29_30.mzMLb")
    reader = None
    reference_reader = None

    @classmethod
    def setUpClass(cls):
        cls.reader = MzMLbLoader(cls.path)
        cls.reference_reader = infer_type.MSFileLoader(cls.ref_path)
        super(TestMzMLbLoaderScanBehavior, cls).setUpClass()

    @classmethod
    def tearDownClass(cls):
        cls.reader.close()
        cls.reference_reader.close()
        super(TestMzMLbLoaderScanBehavior, cls).tearDownClass()

    def test_infer(self):
        reader = infer_type.MSFileLoader(self.path)
        assert isinstance(reader, MzMLbLoader)

    def test_get_by_id_equiv(self):
        reader = self.reader
        reference_reader = self.reference_reader
        scan = reader.get_scan_by_id("scanId=1740226")
        ref = reference_reader.get_scan_by_id("scanId=1740226")
        assert scan == ref

    def test_start_from_equiv(self):
        reader = self.reader
        reference_reader = self.reference_reader

        n = len(reader)
        mid = reader[n // 2].scan_time
        reader.start_from_scan(rt=mid)
        reference_reader.start_from_scan(rt=mid)
        i = 0
        for a, b in zip(reader, reference_reader):
            assert a == b
            i += 1
            if i > 5:
                break
示例#10
0
def test_describe():
    runner = CliRunner(mix_stderr=False)

    path = datafile("small.mzML")
    result = runner.invoke(indexing.describe, [path])
    lines = result.output.splitlines()
    assert "small.mzML" in lines[0]
    assert lines[1] == "File Format: mzML format"
    assert lines[2] == "ID Format: Thermo nativeID format"
    assert lines[3] == "Format Supports Random Access: True"
示例#11
0
def test_task():
    runner = CliRunner()

    path = datafile("small.mzML")
    result = runner.invoke(indexing.describe, [path])
    print(result.output)
    lines = result.output.splitlines()
    assert "small.mzML" in lines[0]
    assert lines[1] == "File Format: mzML format"
    assert lines[2] == "ID Format: Thermo nativeID format"
    assert lines[3] == "Format Supports Random Access: True"
示例#12
0
class TestFancyIterator(unittest.TestCase):
    complex_compressed_mzml = datafile("20150710_3um_AGP_001_29_30.mzML.gz")

    def _get_reader(self):
        return ms_deisotope.MSFileLoader(self.complex_compressed_mzml)

    def test_time_interval_iterator(self):
        reader = self._get_reader()
        tii = query.TimeIntervalIterator(reader, 29.5, 31)
        assert tii.has_ms1_scans()
        assert tii.has_msn_scans()
        n = 0
        for precursor, products in tii:
            if n == 0:
                assert abs(precursor.scan_time - 29.5) < 1e-2
            else:
                assert precursor.scan_time >= 29.5
            n += 1
        assert n == 24

    def test_index_interval_iterator(self):
        reader = self._get_reader()
        iii = query.IndexIntervalIterator(reader, end=31)
        assert iii.has_ms1_scans()
        assert iii.has_msn_scans()
        assert iii.start == 0

        n = 0
        for precursor, products in iii:
            n += 1
            assert precursor.index <= 31
        assert n == 6

    def test_ms_level_filter(self):
        reader = self._get_reader()
        flt = query.MSLevelFilter(reader, 2)
        n = 0
        for batch in flt:
            assert batch.precursor is None
            n += 1
        assert n == 52

    def test_ms1_merger(self):
        reader = self._get_reader()
        trf = query.MS1MergingTransformer(reader)
        n = 0
        n2 = 0
        for batch in trf:
            n += 1
            n2 += len(batch.products)
        assert n == 10
        assert n2 == 260
示例#13
0
class TestScanProcessor(unittest.TestCase):
    mzml_path = datafile("three_test_scans.mzML")

    def test_processor(self):
        proc = processor.ScanProcessor(self.mzml_path,
                                       ms1_deconvolution_args={
                                           "averagine":
                                           glycopeptide,
                                           "scorer":
                                           PenalizedMSDeconVFitter(5., 2.)
                                       })
        for scan_bunch in iter(proc):
            self.assertIsNotNone(scan_bunch)
            self.assertIsNotNone(scan_bunch.precursor)
            self.assertIsNotNone(scan_bunch.products)
示例#14
0
class TestMzXMLLoaderScanBehavior(unittest.TestCase):
    path = datafile("microscans.mzXML")

    @property
    def reader(self):
        return infer_type.MSFileLoader(self.path)

    @property
    def first_scan(self):
        return self.reader.next().precursor

    def test_id(self):
        loader = self.reader
        scan = next(loader).precursor
        self.assertEqual(scan.id, "210")
        scan = loader.get_scan_by_id("210")
        self.assertEqual(scan.id, "210")

    def test_start_from_scan(self):
        loader = self.reader
        time = 0.4856916666666667
        bunch = next(loader.start_from_scan(rt=time))
        self.assertAlmostEqual(bunch.precursor.scan_time, time, 3)
        ix = bunch.precursor.index
        assert next(
            loader.start_from_scan(index=ix)).precursor == bunch.precursor

    def test_polarity(self):
        self.assertEqual(self.first_scan.polarity, 1)

    def test_index(self):
        self.assertEqual(self.first_scan.polarity, 1)

    def test_arrays(self):
        self.assertEqual(len(self.first_scan.arrays), 2)

    def test_precursor_info(self):
        self.assertEqual(self.first_scan.precursor_information, None)

    def test_file_description(self):
        file_info = self.reader.file_description()
        source_file = file_info.source_files[0]
        assert source_file.name == "AGP_tryptic_300ng_3microscans_glycoproteomics_nCE_27-35.raw"
        assert "location" not in source_file.parameters

    def test_data_processing(self):
        proc_info = self.reader.data_processing()
        assert len(proc_info) == 2
class TestMemoryScanSource(unittest.TestCase):
    path = datafile("three_test_scans.mzML")

    @property
    def source_reader(self):
        return infer_type.MSFileLoader(self.path)

    @property
    def prepare_source(self):
        source = self.source_reader
        loader = memory.MemoryScanLoader.build(source)
        return loader

    def test_iteration(self):
        g = iter(scan_ids)
        bunch = next(self.prepare_source)
        assert bunch.precursor.id == next(g)
        for product in bunch.products:
            assert product.id == next(g)
示例#16
0
class TestMGFLoaderScanBehavior(unittest.TestCase):
    path = datafile("small.mgf")

    @property
    def reader(self):
        return infer_type.MSFileLoader(self.path)

    def test_index(self):
        reader = self.reader
        assert len(reader.index) == 34
        scan = reader.get_scan_by_id('small.10.10')
        assert scan.id ==\
            'small.10.10'

    def test_scan_interface(self):
        reader = self.reader
        scan = next(reader)
        assert isinstance(scan, Scan)
        assert not scan.is_profile
        assert scan.precursor_information.precursor_scan_id is None
示例#17
0
def test_mzml():
    runner = CliRunner(mix_stderr=False)
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists before running test")
    path = datafile("small.mzML")
    result = runner.invoke(conversion.mzml, ['-p', '-c', path, '-'])
    buff = io.BytesIO(result.output.encode("utf-8"))
    reader = MzMLLoader(buff)
    n = len(reader)
    assert n == 48
    if os.path.exists("-idx.json"):
        raise IOError(
            "Orphan index file exists after running uncompressed test")

    result = runner.invoke(
        conversion.mzml, ['-p', '-z', '-c', path, '-'], catch_exceptions=False)
    buff = io.BytesIO(result.stdout_bytes)
    reader = MzMLLoader(_compression.get_opener(buff))
    n = len(reader)
    assert n == 48
    if os.path.exists("-idx.json"):
        raise IOError("Orphan index file exists after running compressed test")
示例#18
0
    def test_extraction(self):
        scan = self.scan

        peak, deconvoluter = self.build_deconvoluter(scan, peptide)
        deconvoluter._deconvolution_step(0,
                                         truncate_after=0.8,
                                         charge_range=(1, 4))

        with open(datafile("extraction_base_averagine.pkl"), 'rb') as fh:
            reference_averagine = pickle.load(fh)

        diff = set(deconvoluter.averagine.backend) - set(
            reference_averagine.backend)
        assert len(diff) == 0
        assert len(deconvoluter.averagine.backend) == 8865
        assert reference_averagine == deconvoluter.averagine

        cluster = deconvoluter.peak_dependency_network.find_cluster_for(peak)
        spanned = cluster.fits_using_mz(peak.mz)
        assert len(cluster) == 4
        assert len(spanned) == 2
        assert np.isclose(cluster.best_fit.monoisotopic_peak.mz, 138.19520)
        assert cluster.best_fit.charge == 3
示例#19
0
class TestMGFLoaderScanBehavior(unittest.TestCase):
    path = datafile("small.mgf")

    @property
    def reader(self):
        return infer_type.MSFileLoader(self.path)

    def test_source_file_name(self):
        reader = self.reader
        assert reader.source_file_name.endswith("small.mgf")

    def test_index(self):
        reader = self.reader
        assert len(reader.index) == 34
        scan = reader.get_scan_by_id('small.10.10')
        assert scan.id ==\
            'small.10.10'
        scan = reader[10]
        assert scan.index == 10

    def test_get_time(self):
        reader = self.reader
        scan = reader.get_scan_by_time(0.3)
        assert scan.id == 'small.31.31'
        scan = next(reader.start_from_scan(rt=0.3, grouped=False))
        assert scan.id == 'small.31.31'

    def test_annotations(self):
        scan = self.reader[10]
        assert scan.annotations == {}

    def test_scan_interface(self):
        reader = self.reader
        scan = next(reader)
        assert isinstance(scan, Scan)
        assert not scan.is_profile
        assert scan.precursor_information.precursor_scan_id is None
示例#20
0
    def test_extraction_cached_averagine(self):
        scan = self.scan
        cache = AveragineCache(peptide)
        cache.populate(truncate_after=0.8)
        peak2, deconvoluter = self.build_deconvoluter(scan, cache)
        deconvoluter._deconvolution_step(0,
                                         truncate_after=0.8,
                                         charge_range=(1, 4))

        with open(datafile("extraction_cached_averagine.pkl"), 'rb') as fh:
            reference_averagine = pickle.load(fh)

        diff = set(deconvoluter.averagine.backend) - set(
            reference_averagine.backend)
        assert len(diff) == 0
        assert len(deconvoluter.averagine.backend) == 23960
        assert reference_averagine == deconvoluter.averagine

        cluster2 = deconvoluter.peak_dependency_network.find_cluster_for(peak2)
        spanned2 = cluster2.fits_using_mz(peak2.mz)
        assert len(cluster2) == 4
        assert len(spanned2) == 2
        assert np.isclose(cluster2.best_fit.monoisotopic_peak.mz, 138.19520)
        assert cluster2.best_fit.charge == 3
示例#21
0
class TestScanClustering(unittest.TestCase):
    path = datafile(
        "AGP_tryptic_300ng_2microscans_glycoproteomics_nCE_27-30.preprocessed.mzML.gz"
    )

    @property
    def reader(self):
        reader = ProcessedMzMLDeserializer(get_opener(self.path))
        return reader

    def load_msms_scans(self, reader):
        products = list(
            map(reader.get_scan_by_id, reader.extended_index.msn_ids.keys()))
        return products

    def cluster_scans(self, scans):
        clusters = scan_clustering.cluster_scans(scans)
        return clusters

    def test_cluster_scans(self):
        reader = self.reader
        scans = self.load_msms_scans(reader)
        clusters = self.cluster_scans(scans)
        assert len(clusters) == 1124
 def get_scan(self):
     scan_data = gzload(datafile("test_scan.pkl.gz"))
     scan = common.Scan(scan_data, mzml.MzMLDataInterface())
     return scan
 def make_scan(self):
     complex_compressed_mzml = datafile("20150710_3um_AGP_001_29_30.mzML.gz")
     reader = MSFileLoader(complex_compressed_mzml)
     bunch = next(reader)
     return bunch
 def make_scan(self):
     complex_compressed_mzml = datafile("20150710_3um_AGP_001_29_30.mzML.gz")
     reader = MSFileLoader(complex_compressed_mzml)
     bunch = next(reader)
     return bunch
import unittest

import ms_deisotope
from ms_deisotope.feature_map import feature_map
from ms_deisotope.test.common import datafile

complex_compressed_mzml = datafile("20150710_3um_AGP_001_29_30.mzML.gz")


class LCMSFeatureMapTest(unittest.TestCase):

    features = None

    @classmethod
    def setUpClass(cls):
        reader = ms_deisotope.MSFileLoader(complex_compressed_mzml)
        features = feature_map.LCMSFeatureForest.from_reader(reader)
        cls.features = features

    @classmethod
    def tearDownClass(cls):
        cls.features = None

    def test_forest(self):
        features = self.features
        assert len(features) == 4151
        f = features.search(1161.50875)
        assert f is not None

    def test_search(self):
        features = self.features
class TestMzMLSerializer(unittest.TestCase):
    source_data_path = datafile("three_test_scans.mzML")

    def test_writer(self):
        source_reader = MzMLLoader(self.source_data_path)
        fd, name = tempfile.mkstemp()
        with open(name, 'wb') as fh:
            writer = MzMLSerializer(fh,
                                    n_spectra=len(source_reader.index),
                                    deconvoluted=True)
            description = source_reader.file_description()
            writer.add_file_information(description)
            writer.add_file_contents("profile spectrum")
            writer.add_file_contents("centroid spectrum")
            writer.remove_file_contents("profile spectrum")

            instrument_configs = source_reader.instrument_configuration()
            for config in instrument_configs:
                writer.add_instrument_configuration(config)

            software_list = source_reader.software_list()
            for software in software_list:
                writer.add_software(software)

            data_processing_list = source_reader.data_processing()
            for dp in data_processing_list:
                writer.add_data_processing(dp)

            processing = writer.build_processing_method()
            writer.add_data_processing(processing)
            bunch = next(source_reader)
            bunch.precursor.pick_peaks()
            bunch.precursor.deconvolute()
            for product in bunch.products:
                product.pick_peaks()
                product.deconvolute()
            writer.save(bunch)
            writer.complete()
            fh.flush()
            writer.format()
        source_reader.reset()
        processed_reader = ProcessedMzMLDeserializer(
            _compression.get_opener(writer.handle.name))

        for a, b in zip(source_reader.instrument_configuration(),
                        processed_reader.instrument_configuration()):
            assert a.analyzers == b.analyzers
        for a, b in zip(source_reader, processed_reader):
            assert a.precursor.id == b.precursor.id
            assert (a.precursor.acquisition_information ==
                    b.precursor.acquisition_information)
            for an, bn in zip(a.products, b.products):
                assert an.id == bn.id
                assert abs(an.precursor_information.neutral_mass -
                           bn.precursor_information.neutral_mass) < 1e-6
        processed_reader.reset()
        description = processed_reader.file_description()
        assert "profile spectrum" not in description.contents
        assert "centroid spectrum" in description.contents
        sf = description.source_files[0]
        assert 'location' not in sf.parameters
        assert sf.parameters[
            'SHA-1'] == 'a2a091b82f27676da87a6c7d17cc90d2d90b8fbf'
        index = processed_reader.extended_index
        pinfo = index.find_msms_by_precursor_mass(
            ms_deisotope.neutral_mass(562.7397, 2))
        assert len(pinfo) > 0

        processed_reader.close()
        try:
            os.remove(name)
            os.remove(processed_reader._index_file_name)
        except OSError:
            pass
示例#27
0
class TestMzMLLoaderScanBehavior(unittest.TestCase):
    path = datafile("three_test_scans.mzML")

    @property
    def reader(self):
        return infer_type.MSFileLoader(self.path)

    def test_iteration(self):
        reader = self.reader
        i = 0
        bunch = next(reader)
        if bunch.precursor:
            i += 1
        i += len(bunch.products)
        self.assertEqual(i, 3)

        reader.reset()
        reader.make_iterator(grouped=False)

        scan = next(reader)
        scan._load()
        self.assertEqual(scan.index, 0)

        reader.close()

    def test_index(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.index, 0)
        for i, scan in enumerate(bunch.products, 1):
            self.assertEqual(scan.index, i)
        reader.close()

    def test_ms_level(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.ms_level, 1)
        for i, scan in enumerate(bunch.products, 1):
            self.assertEqual(scan.ms_level, 2)
        reader.close()

    def test_polarity(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.polarity, 1)
        reader.close()

    def test_activation(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.activation, None)
        for product in bunch.products:
            self.assertNotEqual(product.activation, None)
            self.assertEqual(product.activation.method,
                             "beam-type collision-induced dissociation")
        reader.close()

    def test_precursor(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.precursor_information, None)
        for product in bunch.products:
            self.assertNotEqual(product.precursor_information, None)
            self.assertEqual(product.precursor_information.precursor,
                             bunch.precursor)
        reader.close()

    def test_pick_peaks(self):
        reader = self.reader
        bunch = next(reader)
        scan = bunch.precursor.pick_peaks()
        self.assertEqual(len(scan.peak_set), 2107)
        reader.close()

    def test_pack(self):
        reader = self.reader
        bunch = next(reader)
        bunch.precursor.pick_peaks()
        self.assertEqual(bunch.precursor.pack().title, bunch.precursor.title)
        reader.close()

    def test_get_scan_by_id(self):
        reader = self.reader
        precursor = reader.get_scan_by_id(scan_ids[0])
        self.assertEqual(precursor.id, scan_ids[0])
        self.assertEqual(precursor.index, 0)

        product = reader.get_scan_by_id(scan_ids[2])
        self.assertEqual(product.id, scan_ids[2])
        self.assertEqual(product.index, 2)

        self.assertEqual(product.precursor_information.precursor_scan_id,
                         scan_ids[0])
        self.assertIs(precursor, reader.get_scan_by_id(scan_ids[0]))
        reader.close()

    def test_get_scan_by_index(self):
        reader = self.reader

        precursor = reader.get_scan_by_index(0)
        self.assertEqual(precursor.index, 0)
        self.assertEqual(precursor.id, scan_ids[0])
        reader.close()

    def test_get_scan_by_time(self):
        reader = self.reader
        precursor = reader.get_scan_by_time(22.12829)
        self.assertEqual(precursor.id, scan_ids[0])

        product = reader.get_scan_by_time(22.132753)
        self.assertEqual(product.index, 1)
        reader.close()
 def get_scan(self):
     scan_data = gzload(datafile("test_scan.pkl.gz"))
     scan = common.Scan(scan_data, mzml.MzMLDataInterface())
     return scan
 def get_reference(self):
     processed_scan = gzload(datafile("test_scan_results.pkl.gz"))
     return processed_scan
示例#30
0
class TestMzMLLoaderScanBehavior(unittest.TestCase):
    path = datafile("three_test_scans.mzML")
    only_ms2_path = datafile("only_ms2_mzml.mzML")

    @property
    def reader(self):
        reader = infer_type.MSFileLoader(self.path)
        assert len(reader.index) == 3
        assert reader.index.from_index(0) == scan_ids[0]
        assert list(reader.index.index_sequence) == sorted(
            reader.index.index_sequence, key=lambda x: x[1])
        return reader

    def test_index_building(self):
        MzMLLoader.prebuild_byte_offset_file(self.path)
        parser = MzMLLoader._parser_cls(self.path)
        assert parser._check_has_byte_offset_file()
        index = parser.index
        offsets = index['spectrum']
        key_list = list(offsets.keys())
        assert key_list == scan_ids
        offset_file_name = parser._byte_offset_filename
        try:
            os.remove(offset_file_name)
        except OSError:
            pass

    def test_index_integrity(self):
        reader = self.reader
        reader.make_iterator(grouped=False)
        for i, scan in enumerate(reader):
            assert i == scan.index

    def test_iteration(self):
        reader = self.reader
        i = 0
        bunch = next(reader)
        if bunch.precursor:
            i += 1
        i += len(bunch.products)
        self.assertEqual(i, 3)

        reader.reset()
        reader.make_iterator(grouped=False)

        scan = next(reader)
        scan._load()
        self.assertEqual(scan.index, 0)

        reader.close()

    def test_index(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.index, 0)
        for i, scan in enumerate(bunch.products, 1):
            self.assertEqual(scan.index, i)
        reader.close()

    def test_ms_level(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.ms_level, 1)
        for i, scan in enumerate(bunch.products, 1):
            self.assertEqual(scan.ms_level, 2)
        reader.close()

    def test_polarity(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.polarity, 1)
        reader.close()

    def test_activation(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.activation, None)
        for product in bunch.products:
            self.assertNotEqual(product.activation, None)
            self.assertEqual(product.activation.method, "beam-type collision-induced dissociation")
        reader.close()

    def test_precursor(self):
        reader = self.reader
        bunch = next(reader)
        self.assertEqual(bunch.precursor.precursor_information, None)
        for product in bunch.products:
            self.assertNotEqual(product.precursor_information, None)
            self.assertEqual(product.precursor_information.precursor, bunch.precursor)
        reader.close()

    def test_pick_peaks(self):
        reader = self.reader
        bunch = next(reader)
        scan = bunch.precursor.pick_peaks()
        self.assertEqual(len(scan.peak_set), 2108)
        reader.close()

    def test_pack(self):
        reader = self.reader
        bunch = next(reader)
        bunch.precursor.pick_peaks()
        self.assertEqual(bunch.precursor.pack().title, bunch.precursor.title)
        reader.close()

    def test_get_scan_by_id(self):
        reader = self.reader
        precursor = reader.get_scan_by_id(scan_ids[0])
        self.assertEqual(precursor.id, scan_ids[0])
        self.assertEqual(precursor.index, 0)

        product = reader.get_scan_by_id(scan_ids[2])
        self.assertEqual(product.id, scan_ids[2])
        self.assertEqual(product.index, 2)

        self.assertEqual(product.precursor_information.precursor_scan_id, scan_ids[0])
        self.assertIs(precursor, reader.get_scan_by_id(scan_ids[0]))
        reader.close()

    def test_get_scan_by_index(self):
        reader = self.reader

        precursor = reader.get_scan_by_index(0)
        self.assertEqual(precursor.index, 0)
        self.assertEqual(precursor.id, scan_ids[0])
        reader.close()

    def test_get_scan_by_time(self):
        reader = self.reader
        precursor = reader.get_scan_by_time(22.12829)
        self.assertEqual(precursor.id, scan_ids[0])

        product = reader.get_scan_by_time(22.132753)
        self.assertEqual(product.index, 1)
        scan = reader.get_scan_by_time(float('inf'))
        assert scan == reader[-1]
        reader.close()

    def test_instrument_configuration(self):
        reader = self.reader
        bunch = next(reader)
        precursor = bunch.precursor
        config = precursor.instrument_configuration
        self.assertEqual(config.id, "IC1")
        assert "orbitrap" in config.analyzers

    def test_file_description(self):
        file_info = self.reader.file_description()
        assert "MS1 spectrum" in file_info.contents
        assert "MSn spectrum" in file_info.contents
        source_file = file_info.source_files[0]
        assert source_file.name == "three_test_scans.mzML"
        assert "location" not in source_file.parameters

    def test_acquisition_information(self):
        reader = self.reader
        bunch = next(reader)
        precursor = bunch.precursor
        acquisition = precursor.acquisition_information
        self.assertTrue(
            abs(acquisition[0].start_time - precursor.scan_time) < 1e-3)
        self.assertEqual(len(acquisition), 1)
        window = acquisition[0].total_scan_window()
        if window:
            self.assertIn(precursor.arrays.mz[len(precursor.arrays.mz) // 2], window)

    def test_annotations(self):
        reader = self.reader
        bunch = next(reader)
        precursor = bunch.precursor
        assert len(precursor.annotations) > 0

    def test_iteration_mode_detection(self):
        reader = infer_type.MSFileLoader(self.only_ms2_path)
        assert reader.iteration_mode == 'single'

    def test_source_file_parsing(self):
        reader = self.reader
        finfo = reader.file_description()
        sf = finfo.source_files[0]
        assert sf.name == 'three_test_scans.mzML'
        assert isinstance(sf.path, str)

        reader = infer_type.MSFileLoader(self.only_ms2_path)
        finfo = reader.file_description()
        sf = finfo.source_files[0]
        assert sf.name == 'analysis.baf'
        assert isinstance(sf.path, str)

    def test_data_processing_parsing(self):
        reader = infer_type.MSFileLoader(self.only_ms2_path)
        assert len(reader.data_processing()[0]) == 3

    def test_software_list(self):
        reader = infer_type.MSFileLoader(self.path)
        assert len(reader.software_list()) == 2
示例#31
0
import ms_deisotope

from ms_deisotope.feature_map import quick_index
from ms_deisotope.test.common import datafile

mzml_path = datafile("small.mzML")


def test_quick_index():
    reader = ms_deisotope.MSFileLoader(mzml_path)
    index, _interval_tree = quick_index.index(reader)
    n_1 = len(index.ms1_ids)
    n_n = len(index.msn_ids)
    assert n_1 == 14
    assert n_n == 34
示例#32
0
 def make_scan():
     reader = MSFileLoader(datafile("20150710_3um_AGP_001_29_30.mzML.gz"))
     scan = reader.get_scan_by_id("scanId=1740086")
     return scan
 def get_reference(self):
     processed_scan = gzload(datafile("test_scan_results.pkl.gz"))
     return processed_scan
示例#34
0
class TestThermoRawLoaderScanBehavior(unittest.TestCase):
    path = datafile("small.RAW")

    reference_mzml = datafile("small.mzML")
    reference_mgf = datafile("small.mgf")

    @property
    def reader(self):
        return infer_type.MSFileLoader(self.path)

    def test_iteration(self):
        reader = self.reader
        reader.start_from_scan('controllerType=0 controllerNumber=1 scan=10')
        bunch = next(reader)
        assert bunch.precursor.id == 'controllerType=0 controllerNumber=1 scan=9'
        bunch = next(reader)
        assert bunch.precursor.id == 'controllerType=0 controllerNumber=1 scan=15'
        reader.start_from_scan(rt=0.077788333333)
        bunch = next(reader)
        assert np.isclose(bunch.precursor.scan_time, 0.077788333333)

    def test_file_level_metadata(self):
        reader = self.reader
        desc = reader.file_description()
        assert desc.has_content("MS1 spectrum")
        assert desc.has_content("MSn spectrum")

        inst_config = reader.instrument_configuration()
        assert inst_config[0].analyzers[0] == 'orbitrap'

    def test_scan_level_data(self):
        reader = self.reader
        reader.start_from_scan('controllerType=0 controllerNumber=1 scan=10')
        bunch = next(reader)
        assert np.isclose(bunch.precursor.scan_time, 0.077788333333)
        assert len(
            bunch.precursor.pick_peaks(
                signal_to_noise_threshold=1.5).peak_set) == 3110
        scan_window = bunch.precursor.acquisition_information.scan_list[0][0]
        assert scan_window.lower == 200.0 and scan_window.upper == 2000.0
        product = bunch.products[0]
        assert product.ms_level == 2
        assert product.index == 9
        assert product.activation.energy == 35.0
        assert np.isclose(product.precursor_information.mz, 810.7528)
        annotations = {
            '[Thermo Trailer Extra]Micro Scan Count':
            3.0,
            '[Thermo Trailer Extra]Scan Event':
            3.0,
            '[Thermo Trailer Extra]Scan Segment':
            1.0,
            'filter_string':
            'ITMS + c ESI d Full ms2 [email protected] [210.00-1635.00]'
        }
        assert product.annotations == annotations
        assert np.isclose(product.isolation_window.target, 810.752807)
        assert product.isolation_window.lower == 1.0
        assert not product.is_profile

    def test_size(self):
        reader = self.reader
        n = len(reader)
        assert n == 48
        x = reader[-1]
        y = reader.get_scan_by_time(float('inf'))
        assert x == y

    def test_compat(self):
        raw_reader = self.reader
        mzml_reader = infer_type.MSFileLoader(self.reference_mzml)
        mgf_reader = infer_type.MSFileLoader(self.reference_mgf)

        mgf_scan = next(mgf_reader)
        mzml_scan = mzml_reader[2]
        raw_scan = raw_reader[2]

        self.assertEqual(mzml_scan, raw_scan)

        mgf_scan.pick_peaks()
        raw_scan.pick_peaks()
        self.assertEqual(raw_scan.peak_set, mgf_scan.peak_set)

        self.assertEqual(raw_scan.precursor_information.precursor,
                         mzml_scan.precursor_information.precursor)