Пример #1
0
    def test_TreeDiaController_percentile(self, simple_dataset):
        logger.info('Testing TreeDiaController percentile')

        # some parameters
        window_type = 'percentile'
        kaufmann_design = 'tree'
        num_windows = 64
        scan_overlap = 0
        ionisation_mode = POSITIVE
        scan_time_dict = {1: 0.12, 2: 0.06}
        min_rt = 0
        max_rt = 400
        min_mz = 100
        max_mz = 1000

        # run controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, simple_dataset,
                                                scan_duration=scan_time_dict)
        controller = DiaController(min_mz, max_mz, window_type, kaufmann_design, num_windows,
                                   scan_overlap=scan_overlap)
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)
        set_log_level_warning()
        env.run()

        # check that there is at least one non-empty MS2 scan
        check_non_empty_MS2(controller)

        # write simulated output to mzML file
        filename = 'tree_dia_percentile.mzml'
        check_mzML(env, OUT_DIR, filename)
Пример #2
0
    def test_multiple_adducts(self):
        fs = DatabaseFormulaSampler(HMDB)
        ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101)
        cs = ConstantChromatogramSampler()
        adduct_prior_dict = {POSITIVE: {'M+H': 100, 'M+Na': 100, 'M+K': 100}}
        cm = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cs,
                                    adduct_prior_dict=adduct_prior_dict,
                                    adduct_proportion_cutoff=0.0)

        n_adducts = len(adduct_prior_dict[POSITIVE])
        n_chems = 5
        dataset = cm.sample(n_chems, 2)

        for c in dataset:
            c.isotopes = [(c.mass, 1, "Mono")]

        # should be 15 peaks or less all the time
        # some adducts might not be sampled if the probability is less than 0.2
        controller = SimpleMs1Controller()
        ms = IndependentMassSpectrometer(POSITIVE, dataset)
        env = Environment(ms, controller, 102, 110, progress_bar=True)
        set_log_level_warning()
        env.run()
        for scan in controller.scans[1]:
            assert len(scan.mzs) <= n_chems * n_adducts
Пример #3
0
    def test_swath(self, ten_chems):
        min_mz = 100
        max_mz = 1000
        width = 100
        scan_overlap = 10

        ionisation_mode = POSITIVE

        controller = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap)
        scan_time_dict = {1: 0.124, 2: 0.124}

        spike_noise = UniformSpikeNoise(0.1, 1)

        mass_spec = IndependentMassSpectrometer(ionisation_mode, ten_chems,
                                                spike_noise=spike_noise,
                                                scan_duration=scan_time_dict)

        env = Environment(mass_spec, controller, 200, 300, progress_bar=True)

        set_log_level_warning()

        env.run()

        check_non_empty_MS2(controller)

        filename = 'SWATH_ten_chems.mzML'
        check_mzML(env, OUT_DIR, filename)
Пример #4
0
    def test_mean_scan_time_from_mzml(self):
        ionisation_mode = POSITIVE
        N = 10
        isolation_width = 0.7
        mz_tol = 0.01
        rt_tol = 15
        min_ms1_intensity = 10
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, min_ms1_intensity)

        # extract chemicals from mzML
        roi_params = RoiParams(min_intensity=10, min_length=5)
        cm = ChemicalMixtureFromMZML(MZML_FILE, roi_params=roi_params)
        chems = cm.sample(None, 2)

        # extract mean timing per scan level from mzML
        sd = MzMLScanTimeSampler(MZML_FILE, use_mean=True)
        ms = IndependentMassSpectrometer(ionisation_mode,
                                         chems,
                                         scan_duration=sd)

        # run simulation
        env = Environment(ms, controller, 500, 600, progress_bar=True)
        set_log_level_warning()
        env.run()
        filename = 'test_scan_time_mean_from_mzml.mzML'
        check_mzML(env, OUT_DIR, filename)
Пример #5
0
    def test_negative_fixed(self):
        fs = EvenMZFormulaSampler()
        ms = FixedMS2Sampler()
        ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101)
        cs = ConstantChromatogramSampler()
        cm = ChemicalMixtureCreator(fs,
                                    ms2_sampler=ms,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cs)
        dataset = cm.sample(3, 2)

        N = 10
        isolation_width = 0.7
        mz_tol = 10
        rt_tol = 15

        ms = IndependentMassSpectrometer(NEGATIVE, dataset)
        controller = TopNController(NEGATIVE, N, isolation_width, mz_tol,
                                    rt_tol, MIN_MS1_INTENSITY)
        env = Environment(ms, controller, 102, 110, progress_bar=True)
        set_log_level_warning()
        env.run()
        ms1_mz_vals = [int(m) for m in controller.scans[1][0].mzs]

        expected_vals = [98, 198, 298]
        for i, m in enumerate(ms1_mz_vals):
            assert m == expected_vals[i]

        expected_frags = set([88, 78, 188, 178, 288, 278])
        for scan in controller.scans[2]:
            for m in scan.mzs:
                assert int(m) in expected_frags
Пример #6
0
    def test_aif_with_fixed_chems(self):
        fs = EvenMZFormulaSampler()
        ms = FixedMS2Sampler(n_frags=2)
        cs = ConstantChromatogramSampler()
        ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=1)
        cs = ChemicalMixtureCreator(fs, ms2_sampler=ms, chromatogram_sampler=cs,
                                    rt_and_intensity_sampler=ri)
        d = cs.sample(1, 2)

        ms1_source_cid_energy = 30
        controller = AIF(ms1_source_cid_energy)
        ionisation_mode = POSITIVE
        mass_spec = IndependentMassSpectrometer(ionisation_mode, d)
        env = Environment(mass_spec, controller, 10, 20, progress_bar=True)

        set_log_level_warning()
        env.run()

        for i, s in enumerate(controller.scans[1]):
            if i % 2 == 1:
                # odd scan, AIF, should  have two peaks at 81 and 91
                integer_mzs = [int(i) for i in s.mzs]
                integer_mzs.sort()
                assert integer_mzs[0] == 81
                assert integer_mzs[1] == 91
            else:
                # even scan, MS1 - should have a single peak at integer value of 101
                integer_mzs = [int(i) for i in s.mzs]
                assert integer_mzs[0] == 101
Пример #7
0
 def test_fullscan_from_mzml(self, chems_from_mzml):
     ionisation_mode = POSITIVE
     controller = SimpleMs1Controller()
     ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml)
     env = Environment(ms, controller, 500, 600, progress_bar=True)
     set_log_level_warning()
     env.run()
     filename = 'fullscan_from_mzml.mzML'
     check_mzML(env, OUT_DIR, filename)
Пример #8
0
def run_environment(env):
    # set the log level to WARNING so we don't see too many messages when environment is running
    set_log_level_warning()
    # run the simulation
    logger.info('Running simulation')
    env.run()
    logger.info('Done')
    # set the log level back to DEBUG
    set_log_level_debug()
Пример #9
0
    def test_acquisition(self, two_fixed_chems):
        mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems]
        schedule = []
        # env = Environment()
        isolation_width = DEFAULT_ISOLATION_WIDTH
        mz_tol = 0.1
        rt_tol = 15

        min_rt = 110
        max_rt = 112

        ionisation_mode = POSITIVE

        controller = FixedScansController()
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                two_fixed_chems)
        env = Environment(mass_spec, controller, min_rt, max_rt)

        ms1_scan = get_default_scan_params(polarity=ionisation_mode)
        ms2_scan_1 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_2 = get_dda_scan_param(mz_to_target[1],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_3 = get_dda_scan_param(mz_to_target, [0.0, 0.0],
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)

        schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3]
        controller.set_tasks(schedule)
        set_log_level_warning()
        env.run()
        assert len(controller.scans[2]) == 3

        n_peaks = []
        for scan in controller.scans[2]:
            n_peaks.append(scan.num_peaks)

        assert n_peaks[0] > 0
        assert n_peaks[1] > 0
        assert n_peaks[2] == n_peaks[0] + n_peaks[1]
        env.write_mzML(OUT_DIR, 'multi_windows.mzML')
Пример #10
0
    def test_swath_more(self, even_chems):
        """
        Tests SWATH by making even chemicals and then
        varying the SWATH window so that in the first example
        each chemical is in its own window, in the second each window holds two chems
        and in the third, one window holds them all
        """
        ionisation_mode = POSITIVE
        min_mz = 50
        max_mz = 460
        width = 100
        scan_overlap = 0
        controller = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap)
        scan_time_dict = {1: 0.124, 2: 0.124}
        mass_spec = IndependentMassSpectrometer(ionisation_mode, even_chems,
                                                scan_duration=scan_time_dict)
        env = Environment(mass_spec, controller, 200, 300, progress_bar=True)
        set_log_level_warning()
        env.run()

        # check the scans
        ms2_scans = controller.scans[2]
        for i in range(4):
            assert len(ms2_scans[i].mzs) == len(even_chems[i].children)

        width = 200
        controller2 = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap)
        scan_time_dict = {1: 0.124, 2: 0.124}
        mass_spec = IndependentMassSpectrometer(ionisation_mode, even_chems,
                                                scan_duration=scan_time_dict)
        env = Environment(mass_spec, controller2, 200, 300, progress_bar=True)
        env.run()

        ms2_scans2 = controller2.scans[2]

        assert len(ms2_scans2[0].mzs) == len(even_chems[0].children) + len(even_chems[1].children)
        assert len(ms2_scans2[1].mzs) == len(even_chems[2].children) + len(even_chems[3].children)

        width = 400
        controller3 = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap)
        scan_time_dict = {1: 0.124, 2: 0.124}
        mass_spec = IndependentMassSpectrometer(ionisation_mode, even_chems,
                                                scan_duration=scan_time_dict)
        env = Environment(mass_spec, controller3, 200, 300, progress_bar=True)
        env.run()

        ms2_scans3 = controller3.scans[2]
        assert len(ms2_scans3[0].mzs) == sum([len(c.children) for c in even_chems])
        assert len(ms2_scans3[0].mzs) == sum([len(s.mzs) for s in ms2_scans2[:2]])
Пример #11
0
 def test_topn_from_mzml(self, chems_from_mzml):
     ionisation_mode = POSITIVE
     N = 10
     isolation_width = 0.7
     mz_tol = 0.01
     rt_tol = 15
     min_ms1_intensity = 10
     controller = TopNController(ionisation_mode, N, isolation_width,
                                 mz_tol, rt_tol, min_ms1_intensity)
     ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml)
     env = Environment(ms, controller, 500, 600, progress_bar=True)
     set_log_level_warning()
     env.run()
     check_non_empty_MS2(controller)
     filename = 'topn_from_mzml.mzML'
     check_mzML(env, OUT_DIR, filename)
Пример #12
0
def run_experiment(param):
    '''
    Runs a Top-N experiment
    :param param: the experimental parameters
    :return: the analysis name that has been successfully ran
    '''
    analysis_name = param['analysis_name']
    mzml_out = param['mzml_out']
    pickle_out = param['pickle_out']
    N = param['N']
    rt_tol = param['rt_tol']

    if os.path.isfile(mzml_out) and os.path.isfile(pickle_out):
        logger.debug('Skipping %s' % (analysis_name))
    else:
        logger.debug('Processing %s' % (analysis_name))
        peak_sampler = param['peak_sampler']
        if peak_sampler is None:  # extract density from the fragmenatation file
            mzml_path = param['mzml_path']
            fragfiles = param['fragfiles']
            fragfile = fragfiles[(
                N,
                rt_tol,
            )]
            min_rt = param['min_rt']
            max_rt = param['max_rt']
            peak_sampler = get_peak_sampler(mzml_path, fragfile, min_rt,
                                            max_rt)

        mass_spec = IndependentMassSpectrometer(param['ionisation_mode'],
                                                param['data'])
        controller = TopNController(param['ionisation_mode'], param['N'],
                                    param['isolation_width'], param['mz_tol'],
                                    param['rt_tol'],
                                    param['min_ms1_intensity'])
        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec,
                          controller,
                          param['min_rt'],
                          param['max_rt'],
                          progress_bar=param['pbar'])
        set_log_level_warning()
        env.run()
        set_log_level_debug()
        env.write_mzML(None, mzml_out)
        save_obj(controller, pickle_out)
        return analysis_name
Пример #13
0
    def test_FixedScansController(self, two_fixed_chems):
        logger.info('Testing FixedScansController')
        mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems]
        isolation_width = DEFAULT_ISOLATION_WIDTH
        mz_tol = 0.1
        rt_tol = 15
        min_rt = 110
        max_rt = 112
        ionisation_mode = POSITIVE

        controller = FixedScansController(schedule=None)
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                two_fixed_chems)
        env = Environment(mass_spec, controller, min_rt, max_rt)

        ms1_scan = get_default_scan_params(polarity=ionisation_mode)
        ms2_scan_1 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_2 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_3 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3]
        controller.set_tasks(schedule)
        set_log_level_warning()
        env.run()

        assert len(controller.scans[1]) == 1
        assert len(controller.scans[2]) == 3
        for scan in controller.scans[2]:
            assert scan.num_peaks > 0
        env.write_mzML(OUT_DIR, 'fixedScansController.mzML')
Пример #14
0
    def test_targeted(self):
        fs = EvenMZFormulaSampler()
        ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=10)
        cr = ConstantChromatogramSampler()
        ms = FixedMS2Sampler()
        cs = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cr,
                                    ms2_sampler=ms)
        d = cs.sample(2, 2)  # sample chems with m/z = 100 and 200
        ionisation_mode = POSITIVE
        targets = []
        targets.append(Target(101, 100, 102, 10, 20, adduct='M+H'))
        targets.append(Target(201, 200, 202, 10, 20, metadata={'a': 1}))
        ce_values = [10, 20, 30]
        n_replicates = 4
        controller = TargetedController(targets,
                                        ce_values,
                                        n_replicates=n_replicates,
                                        limit_acquisition=True)
        mass_spec = IndependentMassSpectrometer(ionisation_mode, d)
        env = Environment(mass_spec, controller, 5, 25, progress_bar=True)
        set_log_level_warning()
        env.run()

        # check that we go all the scans we wanted
        for ms_level in controller.scans:
            assert len(controller.scans[ms_level]) > 0
        set_log_level_debug()
        target_counts = {t: {c: 0 for c in ce_values} for t in targets}

        for s in controller.scans[2]:
            params = s.scan_params
            pmz = params.get(ScanParameters.PRECURSOR_MZ)[0].precursor_mz
            filtered_targets = list(
                filter(
                    lambda x: (x.from_rt <= s.rt <= x.to_rt) and
                    (x.from_mz <= pmz <= x.to_mz), targets))
            assert len(filtered_targets) == 1
            target = filtered_targets[0]
            ce = params.get(ScanParameters.COLLISION_ENERGY)
            target_counts[target][ce] += 1

        for t in target_counts:
            for ce, count in target_counts[t].items():
                assert count == n_replicates
Пример #15
0
    def test_default_scan_time(self, chems_from_mzml):
        ionisation_mode = POSITIVE
        N = 10
        isolation_width = 0.7
        mz_tol = 0.01
        rt_tol = 15
        min_ms1_intensity = 10
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, min_ms1_intensity)

        # run simulation using default scan times
        ms = IndependentMassSpectrometer(ionisation_mode,
                                         chems_from_mzml,
                                         scan_duration=DEFAULT_SCAN_TIME_DICT)
        env = Environment(ms, controller, 500, 600, progress_bar=True)
        set_log_level_warning()
        env.run()
        filename = 'test_scan_time_default.mzML'
        check_mzML(env, OUT_DIR, filename)
Пример #16
0
    def test_multiple_isolation(self):
        N = 3
        fs = EvenMZFormulaSampler()
        ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=10)
        cr = ConstantChromatogramSampler()
        ms = FixedMS2Sampler()
        cs = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cr,
                                    ms2_sampler=ms)
        d = cs.sample(3, 2)  # sample chems with m/z = 100 and 200
        # ionisation_mode = POSITIVE
        controller = MultiIsolationController(N)
        ms = IndependentMassSpectrometer(POSITIVE, d)
        env = Environment(ms, controller, 10, 20, progress_bar=True)
        set_log_level_warning()
        env.run()

        assert len(controller.scans[1]) > 0
        assert len(controller.scans[2]) > 0

        # look at the first block of MS2 scans
        # and check that they are the correct super-positions
        mm = {}
        # first three scans hit the individual precursors
        mm[(0, )] = controller.scans[2][0]
        mm[(1, )] = controller.scans[2][1]
        mm[(2, )] = controller.scans[2][2]
        # next three should hit the pairs
        mm[(0, 1)] = controller.scans[2][3]
        mm[(0, 2)] = controller.scans[2][4]
        mm[(1, 2)] = controller.scans[2][5]
        # final should hit all three
        mm[(0, 1, 2)] = controller.scans[2][6]

        for key, value in mm.items():
            actual_mz_vals = set(mm[key].mzs)
            expected_mz_vals = set()
            for k in key:
                for m in mm[(k, )].mzs:
                    expected_mz_vals.add(m)
            assert expected_mz_vals == actual_mz_vals
Пример #17
0
    def test_AIF_controller_with_beer_chems(self):
        logger.info('Testing Top-N controller with QC beer chemicals')

        # isolation_width = 1
        # N = 10
        # rt_tol = 15
        # mz_tol = 10
        ionisation_mode = POSITIVE
        min_mz = 100
        max_mz = 500

        # min_rt = 0
        # max_rt = 500

        # create a simulated mass spec without noise and Top-N controller
        scan_time_dict = {1: 0.124, 2: 0.124}
        mass_spec = IndependentMassSpectrometer(ionisation_mode, BEER_CHEMS,
                                                scan_duration=scan_time_dict)
        params = AdvancedParams(default_ms1_scan_window=[min_mz, max_mz])
        ms1_source_cid_energy = 30
        controller = AIF(ms1_source_cid_energy, params=params)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, BEER_MIN_BOUND, BEER_MAX_BOUND,
                          progress_bar=True)

        # set the log level to WARNING so we don't see too many messages
        # when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'AIF_qcbeer_chems_no_noise.mzML'
        check_mzML(env, OUT_DIR, filename)
Пример #18
0
def run_WeightedDEW(chems, scan_duration, params, out_dir):
    """
    Simulate WeightedDEW controller
    :param chems: a list of UnknownChemicals present in the injection
    :param ps: old PeakSampler object, now only used to generate MS2 scans
    (TODO: should be removed as part of issue #46)
    :param params: a dictionary of parameters
    :param out_file: output mzML file
    :param out_dir: output directory
    :return: None
    """
    logger.info('Running WeightedDEW simulation')
    logger.info(params)
    warn_handler_id = set_log_level_warning()

    t0_values = params['t0_values']
    rt_tol_values = params['rt_tol_values']
    params_list = []
    for t0 in t0_values:
        for r in rt_tol_values:
            # copy params and add additional attributes we need
            copy_params = dict(params)
            copy_params['t0'] = t0
            copy_params['r'] = r
            copy_params['chems'] = chems
            copy_params['scan_duration'] = scan_duration
            copy_params['out_dir'] = out_dir
            params_list.append(copy_params)

    # Try to run the controllers in parallel. If fails, then run it serially
    logger.warning('Running controllers in parallel, please wait ...')
    try:
        import ipyparallel as ipp
        rc = ipp.Client()
        dview = rc[:]  # use all engines
        with dview.sync_imports():
            pass
        dview.map_sync(run_single_WeightedDEW, params_list)
    except OSError:  # cluster has not been started
        run_serial = True
    except ipp.error.TimeoutError:  # takes too long to run
        run_serial = True

    if run_serial:  # if any exception from above, try to run it serially
        logger.warning(
            'IPython cluster not found, running controllers in serial mode')
        for copy_params in params_list:
            run_single_WeightedDEW(copy_params)

    set_log_level_debug(remove_id=warn_handler_id)
Пример #19
0
def run_vimms(no_injections, rt_box_size, mz_box_size):
    rt_range = [(0, 1440)]
    min_rt, max_rt = rt_range[0]
    ionisation_mode, isolation_width = POSITIVE, 1
    N, rt_tol, mz_tol, min_ms1_intensity = 10, 15, 10, 5000
    min_roi_intensity, min_roi_length, min_roi_length_for_fragmentation = \
        500, 3, 3
    grid = GridEstimator(
        LocatorGrid(min_rt, max_rt, rt_box_size, 0, 3000, mz_box_size),
        IdentityDrift())

    hmdbpath = os.path.join(os.path.abspath(os.getcwd()), "..", "..", "tests",
                            "fixtures", "hmdb_compounds.p")
    hmdb = load_obj(hmdbpath)
    df = DatabaseFormulaSampler(hmdb, min_mz=100, max_mz=1000)
    cm = ChemicalMixtureCreator(df, adduct_prior_dict={POSITIVE: {"M+H": 1}})
    chemicals = cm.sample(2000, 1)

    boxes = []
    for i in range(no_injections):
        mz_noise = GaussianPeakNoise(0.1)
        mass_spec = IndependentMassSpectrometer(POSITIVE, chemicals,
                                                mz_noise=mz_noise)
        controller = NonOverlapController(
            ionisation_mode, isolation_width, mz_tol, min_ms1_intensity,
            min_roi_intensity,
            min_roi_length, N, grid, rt_tol=rt_tol,
            min_roi_length_for_fragmentation=min_roi_length_for_fragmentation
        )
        env = Environment(mass_spec, controller, min_rt, max_rt,
                          progress_bar=True)
        set_log_level_warning()
        env.run()
        boxes.append(
            [r.to_box(0.01, 0.01) for r in controller.roi_builder.get_rois()])
    return boxes
Пример #20
0
    def test_TopNDEW_agent(self):
        set_log_level_debug()
        fs = UniformMZFormulaSampler()
        ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=80)
        cr = GaussianChromatogramSampler(sigma=1)
        ms = FixedMS2Sampler()
        cs = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cr,
                                    ms2_sampler=ms)
        d = cs.sample(500, 2)
        ionisation_mode = POSITIVE

        # Example shows how the same Agent object can be used in consecutive controllers

        agent = TopNDEWAgent(ionisation_mode, 10, 0.7, 10, 15, 1500)
        controller = AgentBasedController(agent)
        spike_noise = UniformSpikeNoise(0.1, 1000)
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                d,
                                                spike_noise=spike_noise)
        env = Environment(mass_spec, controller, 0, 100, progress_bar=True)
        set_log_level_warning()
        env.run()

        check_non_empty_MS1(controller)
        check_non_empty_MS2(controller)
        check_mzML(env, OUT_DIR, 'shell.mzML')

        controller = AgentBasedController(agent)
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                d,
                                                spike_noise=spike_noise)
        env = Environment(mass_spec, controller, 0, 100, progress_bar=True)
        set_log_level_warning()
        env.run()

        check_non_empty_MS1(controller)
        check_non_empty_MS2(controller)
        check_mzML(env, OUT_DIR, 'shell2.mzML')

        controller = AgentBasedController(agent)
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                d,
                                                spike_noise=spike_noise)
        env = Environment(mass_spec, controller, 0, 100, progress_bar=True)
        set_log_level_warning()
        env.run()

        check_non_empty_MS1(controller)
        # check_non_empty_MS2(controller) # ms2 scans have been exhausted at this point
        check_mzML(env, OUT_DIR, 'shell3.mzML')
Пример #21
0
def main():
    global file_spectra
    parser = argparse.ArgumentParser(description='Limited dataset creation')
    parser.add_argument('input_file_names', type=str)
    parser.add_argument('library_cache', type=str)
    parser.add_argument('libraries', type=str, nargs='+')
    parser.add_argument('--score_thresh',
                        dest='score_thresh',
                        type=float,
                        default=0.7)
    parser.add_argument('--ms1_tol', dest='ms1_tol', type=float, default=1.)
    parser.add_argument('--ms2_tol', dest='ms2_tol', type=float, default=0.2)
    parser.add_argument('--min_matched_peaks',
                        dest='min_matched_peaks',
                        type=int,
                        default=1)
    parser.add_argument('--output_csv_file',
                        dest='output_csv_file',
                        type=str,
                        default='hits.csv')
    parser.add_argument('--log_level',
                        dest='log_level',
                        type=str,
                        default='warning')
    parser.add_argument('--mgf_id_field',
                        dest='mgf_id_field',
                        type=str,
                        default='SCANS')
    args = parser.parse_args()
    input_file_names = args.input_file_names
    if ',' in input_file_names:  # multiple items
        input_file_names = input_file_names.split(',')
    else:  # single item
        input_file_names = [input_file_names]
    assert len(input_file_names) > 0
    # assume all the files have the same extension as the first one
    first = input_file_names[0]
    root, ext = os.path.splitext(first)
    if ext.lower() == '.mzml':
        query_spectra = {}
        for input_file_name in input_file_names:
            # load the ms2 scans from the .mzML
            file_spectra = load_scans_from_mzml(input_file_name)
            logger.warning("Loaded {} MS2 spectra from {}".format(
                len(file_spectra), input_file_name))
            query_spectra[input_file_name] = file_spectra

    elif ext.lower() == '.mgf':
        query_spectra = {}
        for input_file_name in input_file_names:
            # load the ms2 scans from the .mgf
            file_spectra = load_mgf(input_file_name,
                                    id_field=args.mgf_id_field,
                                    spectra={})
            logger.warning("Loaded {} MS2 spectra from {}".format(
                len(file_spectra), input_file_name))
            query_spectra[input_file_name] = file_spectra
    else:
        logger.warning("Unknown input file format -- should be .mzML or .mgf")
        sys.exit(0)
    if args.log_level == 'warning':
        set_log_level_warning()
    elif args.log_level == 'debug':
        set_log_level_debug()
    libraries = args.libraries
    spec_libraries = {}
    if args.library_cache is not None:
        for library in libraries:
            # attempt to load library
            lib_file = os.path.join(args.library_cache, library + '.p')
            if os.path.isfile(lib_file):
                logger.warning("Loading {}".format(lib_file))
                spec_libraries[library] = load_obj(lib_file)
                logger.warning("Loaded {}".format(lib_file))
            else:
                logger.warning("Could not find {}".format(lib_file))
                sys.exit(0)
    else:
        logger.warning("You must supply a library folder")
        sys.exit(0)
    all_hits = []
    for input_file_name in query_spectra.keys():
        file_spectra = query_spectra[input_file_name]
        logger.warning('Processing {}'.format(input_file_name))
        for spec_id in tqdm(file_spectra.keys()):
            for library in spec_libraries:
                hits = spec_libraries[library].spectral_match(
                    file_spectra[spec_id],
                    score_thresh=args.score_thresh,
                    ms2_tol=args.ms2_tol,
                    ms1_tol=args.ms1_tol,
                    min_match_peaks=args.min_matched_peaks)
                for hit in hits:
                    new_hit = [
                        spec_id, library, hit[0], hit[1],
                        hit[2].metadata['inchikey']
                    ]
                    all_hits.append(new_hit)
    if len(all_hits) == 0:
        logger.warning("No hits found!")
    else:
        logger.warning('Writing output to {}'.format(args.output_csv_file))
        with open(args.output_csv_file, 'w', newline='') as f:
            writer = csv.writer(f)
            writer.writerow(
                ['spec_id', 'library', 'hit_id', 'score', 'inchikey'])
            for hit in all_hits:
                writer.writerow(hit)

        # summary
        s, _, t, sc, ik = zip(*all_hits)
        logger.warning("{} unique spectra got hits".format(len(set(s))))
        logger.warning("{} unique structures were hit".format(
            len(set([a.split('-')[0] for a in ik if a is not None]))))
Пример #22
0
    def test_ms2_matching(self):
        rti = UniformRTAndIntensitySampler(min_rt=10, max_rt=20)
        fs = UniformMZFormulaSampler()
        adduct_prior_dict = {POSITIVE: {'M+H': 1}}

        cs = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=rti,
                                    adduct_prior_dict=adduct_prior_dict)
        d = cs.sample(300, 2)

        group_list = ['control', 'control', 'case', 'case']
        group_dict = {}
        group_dict['control'] = {
            'missing_probability': 0.0,
            'changing_probability': 0.0
        }
        group_dict['case'] = {
            'missing_probability': 0.0,
            'changing_probability': 1.0
        }

        mm = MultipleMixtureCreator(d, group_list, group_dict)

        cl = mm.generate_chemical_lists()

        N = 10
        isolation_width = 0.7
        mz_tol = 0.001
        rt_tol = 30
        min_ms1_intensity = 0

        set_log_level_warning()

        output_folder = os.path.join(OUT_DIR, 'ms2_matching')
        write_msp(d, 'mmm.msp', out_dir=output_folder)

        initial_exclusion_list = []
        for i, chem_list in enumerate(cl):
            controller = TopNController(
                POSITIVE,
                N,
                isolation_width,
                mz_tol,
                rt_tol,
                min_ms1_intensity,
                initial_exclusion_list=initial_exclusion_list)
            ms = IndependentMassSpectrometer(POSITIVE, chem_list)
            env = Environment(ms, controller, 10, 30, progress_bar=True)
            env.run()
            env.write_mzML(output_folder, '{}.mzML'.format(i))

            mz_intervals = list(
                controller.exclusion.exclusion_list.boxes_mz.items())
            rt_intervals = list(
                controller.exclusion.exclusion_list.boxes_rt.items())
            unique_items_mz = set(i.data for i in mz_intervals)
            unique_items_rt = set(i.data for i in rt_intervals)
            assert len(unique_items_mz) == len(unique_items_rt)

            initial_exclusion_list = list(unique_items_mz)
            logger.warning(len(initial_exclusion_list))

        set_log_level_debug()
        msp_file = os.path.join(output_folder, 'mmm.msp')
        # check with just the first file
        a, b = ms2_main(os.path.join(output_folder, '0.mzML'), msp_file, 1,
                        0.7)
        # check with all
        c, d = ms2_main(output_folder, os.path.join(output_folder, 'mmm.msp'),
                        1, 0.7)

        assert b == d
        assert c > a
Пример #23
0
    spike_noise = UniformSpikeNoise(0.01, args.spike_max)

    ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE,
                                     dataset,
                                     spike_noise=spike_noise)

    controller = TopNController(POSITIVE_IONISATION_MODE, 10, 0.7, 0.01, 15,
                                1e3)

    env = Environment(ms,
                      controller,
                      min_time=args.min_rt - 50,
                      max_time=args.max_rt + 50)

    set_log_level_warning()
    env.run()

    env.write_mzML(None, args.output_mzml_file)

    if args.output_swath_file is not None:
        sw = SWATH(args.min_mz, args.max_mz, 100, 0.0)
        ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE,
                                         dataset,
                                         spike_noise=spike_noise)
        env = Environment(ms,
                          sw,
                          min_time=args.min_rt - 50,
                          max_time=args.max_rt + 50)
        env.run()
        env.write_mzML(None, args.output_swath_file)
Пример #24
0
    def test_AIF_controller_with_simulated_chems(self, fragscan_dataset):
        logger.info('Testing Top-N controller with simulated chemicals')

        # create some chemical object
        assert len(fragscan_dataset) == N_CHEMS

        # isolation_width = 1
        # N = 10
        # rt_tol = 15
        # mz_tol = 10
        ionisation_mode = POSITIVE

        min_mz = 100
        max_mz = 500

        # shorten  the rt range for quicker tests
        # min_rt = 0
        # max_rt = 400

        scan_time_dict = {1: 0.12, 2: 0.06}

        # create a simulated mass spec without noise and Top-N controller
        logger.info('Without noise')
        mass_spec = IndependentMassSpectrometer(ionisation_mode, fragscan_dataset,
                                                scan_duration=scan_time_dict)
        params = AdvancedParams(default_ms1_scan_window=[min_mz, max_mz])
        ms1_source_cid_energy = 30
        controller = AIF(ms1_source_cid_energy, params=params)

        # create an environment to run both the mass spec and controller
        min_bound, max_bound = get_rt_bounds(fragscan_dataset, CENTRE_RANGE)
        env = Environment(mass_spec, controller, min_bound, max_bound, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when
        # environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'AIF_simulated_chems_no_noise.mzML'
        check_mzML(env, OUT_DIR, filename)

        # create a simulated mass spec with noise and Top-N controller
        logger.info('With noise')
        mz_noise = GaussianPeakNoiseLevelSpecific({2: 0.01})
        intensity_noise = GaussianPeakNoiseLevelSpecific({2: 1000.})
        mass_spec = IndependentMassSpectrometer(ionisation_mode, fragscan_dataset,
                                                mz_noise=mz_noise,
                                                intensity_noise=intensity_noise,
                                                scan_duration=scan_time_dict)
        params = AdvancedParams(default_ms1_scan_window=[min_mz, max_mz])
        ms1_source_cid_energy = 30
        controller = AIF(ms1_source_cid_energy, params=params)

        # create an environment to run both the mass spec and controller
        min_bound, max_bound = get_rt_bounds(fragscan_dataset, CENTRE_RANGE)
        env = Environment(mass_spec, controller, min_bound, max_bound, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages
        # when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'AIF_simulated_chems_with_noise.mzML'
        check_mzML(env, OUT_DIR, filename)