Exemplo n.º 1
0
def top_n_roi_experiment(datasets,
                         base_chemicals,
                         rt_range,
                         isolation_width,
                         mz_tol,
                         min_ms1_intensity,
                         min_roi_intensity,
                         min_roi_length,
                         N,
                         rt_tol,
                         ionisation_mode=POSITIVE):
    env_list = []
    for i in range(len(datasets)):
        mass_spec = IndependentMassSpectrometer(ionisation_mode, datasets[i])
        controller = TopN_RoiController(ionisation_mode,
                                        isolation_width,
                                        mz_tol,
                                        min_ms1_intensity,
                                        min_roi_intensity,
                                        min_roi_length,
                                        N=N,
                                        rt_tol=rt_tol)
        env = Environment(mass_spec,
                          controller,
                          rt_range[0],
                          rt_range[1],
                          progress_bar=True)
        env.run()
        env_list.append(env)
    final_evaluation = evaluate_multiple_simulated_env(
        env_list, base_chemicals=base_chemicals)
    return env_list, final_evaluation
Exemplo n.º 2
0
    def test_negative_fixed(self):
        fs = EvenMZFormulaSampler()
        ms = FixedMS2Sampler()
        ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101)
        cs = ConstantChromatogramSampler()
        cm = ChemicalMixtureCreator(fs,
                                    ms2_sampler=ms,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cs)
        dataset = cm.sample(3, 2)

        N = 10
        isolation_width = 0.7
        mz_tol = 10
        rt_tol = 15

        ms = IndependentMassSpectrometer(NEGATIVE, dataset)
        controller = TopNController(NEGATIVE, N, isolation_width, mz_tol,
                                    rt_tol, MIN_MS1_INTENSITY)
        env = Environment(ms, controller, 102, 110, progress_bar=True)
        set_log_level_warning()
        env.run()
        ms1_mz_vals = [int(m) for m in controller.scans[1][0].mzs]

        expected_vals = [98, 198, 298]
        for i, m in enumerate(ms1_mz_vals):
            assert m == expected_vals[i]

        expected_frags = set([88, 78, 188, 178, 288, 278])
        for scan in controller.scans[2]:
            for m in scan.mzs:
                assert int(m) in expected_frags
Exemplo n.º 3
0
def run_single_WeightedDEW(params):
    out_file = 'WeightedDEW_{}_{}_{}.mzml'.format(params['sample_name'],
                                                  params['t0'], params['r'])
    logger.warning('Generating %s' % out_file)
    if os.path.isfile(os.path.join(params['out_dir'], out_file)):
        logger.warning('Already done')
        return
    if params['t0'] > params['r']:
        logger.warning('Impossible combination')
        return

    controller = WeightedDEWController(params['ionisation_mode'],
                                       params['N'],
                                       params['isolation_width'],
                                       params['mz_tol'],
                                       params['r'],
                                       params['min_ms1_intensity'],
                                       exclusion_t_0=params['t0'],
                                       log_intensity=True)
    mass_spec = IndependentMassSpectrometer(
        params['ionisation_mode'],
        params['chems'],
        scan_duration=params['scan_duration'])
    env = Environment(mass_spec,
                      controller,
                      params['min_rt'],
                      params['max_rt'],
                      progress_bar=True,
                      out_dir=params['out_dir'],
                      out_file=out_file)
    env.run()
Exemplo n.º 4
0
def run_TopN(chems, scan_duration, params, out_dir):
    """
    Simulate TopN controller
    :param chems: a list of UnknownChemicals present in the injection
    :param ps: old PeakSampler object, now only used to generate MS2 scans
    (TODO: should be removed as part of issue #46)
    :param params: a dictionary of parameters
    :param out_file: output mzML file
    :param out_dir: output directory
    :return: None
    """
    logger.info('Running TopN simulation')
    logger.info(params)

    out_file = '%s_%s.mzML' % (params['controller_name'],
                               params['sample_name'])
    controller = TopNController(params['ionisation_mode'], params['N'],
                                params['isolation_width'], params['mz_tol'],
                                params['rt_tol'], params['min_ms1_intensity'])
    mass_spec = IndependentMassSpectrometer(params['ionisation_mode'],
                                            chems,
                                            scan_duration=scan_duration)
    env = Environment(mass_spec,
                      controller,
                      params['min_rt'],
                      params['max_rt'],
                      progress_bar=True,
                      out_dir=out_dir,
                      out_file=out_file)
    logger.info('Generating %s' % out_file)
    env.run()
Exemplo n.º 5
0
    def generate_mzmls(self, output_dir, params):
        scan_duration_dicts = self.time_gen(params)
        if (self.filenames is None):
            self.filenames = [
                os.path.join(output_dir, "time_exp_data_{:04d}.mzML".format(i))
                for i, _ in enumerate(scan_duration_dicts)
            ]
        self.file_counter += len(scan_duration_dicts)
        if (len(params) != len(self.filenames)):
            raise ValueError(
                "Parameter and filename list not the same length!")

        for f, d in zip(self.filenames, scan_duration_dicts):
            mass_spec = IndependentMassSpectrometer(POSITIVE,
                                                    self.chems,
                                                    None,
                                                    scan_duration_dict=d)
            controller = SimpleMs1Controller()

            env = Environment(mass_spec,
                              controller,
                              self.min_rt,
                              self.max_rt,
                              progress_bar=True)
            set_log_level_warning()
            env.run()

            set_log_level_warning()
            env.write_mzML(output_dir, os.path.basename(f))
Exemplo n.º 6
0
 def run_experiment(self, idx):
     controller_name = self.controller_schedule['Sample ID'][idx]
     mzml_files = glob.glob(os.path.join(self.base_dir, '*.mzML'))
     if controller_name + '.mzML' not in [
             os.path.basename(file) for file in mzml_files
     ]:
         controller, ms_params = super().run_experiment(idx)
         # load data and set up MS
         logger.info(self.controller_schedule.iloc[[idx]].to_dict())
         method = self.controller_schedule['Controller Method'][idx]
         dataset = self.controller_schedule['Dataset'][idx]
         if method is not None and dataset is not None:
             dataset = load_obj(self.controller_schedule['Dataset'][idx])
             mass_spec = IndependentMassSpectrometer(
                 ms_params['ionisation_mode'], dataset)
             # Run sample
             env = Environment(mass_spec,
                               controller,
                               self.rt_range[0][0],
                               self.rt_range[0][1],
                               progress_bar=self.progress_bar)
             env.run()
             env.write_mzML(self.base_dir, controller_name + '.mzML')
             if self.write_env:
                 save_obj(
                     controller,
                     os.path.join(self.base_dir, controller_name + '.p'))
     else:
         logger.info('Experiment already completed. Skipping...')
     mzml_file = os.path.join(self.base_dir, controller_name + '.mzML')
     return mzml_file, controller_name
Exemplo n.º 7
0
    def test_multiple_adducts(self):
        fs = DatabaseFormulaSampler(HMDB)
        ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101)
        cs = ConstantChromatogramSampler()
        adduct_prior_dict = {POSITIVE: {'M+H': 100, 'M+Na': 100, 'M+K': 100}}
        cm = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cs,
                                    adduct_prior_dict=adduct_prior_dict,
                                    adduct_proportion_cutoff=0.0)

        n_adducts = len(adduct_prior_dict[POSITIVE])
        n_chems = 5
        dataset = cm.sample(n_chems, 2)

        for c in dataset:
            c.isotopes = [(c.mass, 1, "Mono")]

        # should be 15 peaks or less all the time
        # some adducts might not be sampled if the probability is less than 0.2
        controller = SimpleMs1Controller()
        ms = IndependentMassSpectrometer(POSITIVE, dataset)
        env = Environment(ms, controller, 102, 110, progress_bar=True)
        set_log_level_warning()
        env.run()
        for scan in controller.scans[1]:
            assert len(scan.mzs) <= n_chems * n_adducts
Exemplo n.º 8
0
    def test_mass_spec(self):
        logger.info(
            'Testing mass spec using the Top-N controller and QC beer chemicals'
        )

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        task_manager = TaskManager(buffer_size=3)
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                BEER_CHEMS,
                                                task_manager=task_manager)
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, MIN_MS1_INTENSITY)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec,
                          controller,
                          BEER_MIN_BOUND,
                          BEER_MAX_BOUND,
                          progress_bar=True)
        # run_environment(env)
        env.run()

        # check that there is at least one non-empty MS2 scan
        check_non_empty_MS2(controller)

        # write simulated output to mzML file
        filename = 'test_mass_spec.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 9
0
    def test_roi_controller_with_beer_chems(self):
        logger.info('Testing ROI controller with QC beer chemicals')

        isolation_width = 1  # the isolation window in Dalton around a selected precursor ion
        N = 10
        rt_tol = 15
        mz_tol = 10
        min_roi_intensity = 5000
        min_roi_length = 10
        ionisation_mode = POSITIVE

        # create a simulated mass spec with noise and ROI controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, beer_chems, self.ps, add_noise=True)
        controller = RoiController(ionisation_mode, isolation_width, mz_tol, min_ms1_intensity,
                                   min_roi_intensity, min_roi_length, "Top N", N, rt_tol)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'roi_controller_qcbeer_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 10
0
    def test_ms1_controller_with_qcbeer_chems(self):
        logger.info('Testing MS1 controller with QC beer chemicals')

        # create a simulated mass spec and MS1 controller
        mass_spec = IndependentMassSpectrometer(POSITIVE, beer_chems, self.ps)
        controller = SimpleMs1Controller()

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'ms1_controller_qcbeer_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 11
0
    def test_ms1_controller_with_simulated_chems(self):
        logger.info('Testing MS1 controller with simulated chemicals')

        # create some chemical objects
        chems = ChemicalCreator(self.ps, ROI_Sources, hmdb)
        dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems, self.ms_level)
        self.assertEqual(len(dataset), n_chems)

        # create a simulated mass spec and MS1 controller
        mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, self.ps)
        controller = SimpleMs1Controller()

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'ms1_controller_simulated_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 12
0
    def test_mean_scan_time_from_mzml(self):
        ionisation_mode = POSITIVE
        N = 10
        isolation_width = 0.7
        mz_tol = 0.01
        rt_tol = 15
        min_ms1_intensity = 10
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, min_ms1_intensity)

        # extract chemicals from mzML
        roi_params = RoiParams(min_intensity=10, min_length=5)
        cm = ChemicalMixtureFromMZML(MZML_FILE, roi_params=roi_params)
        chems = cm.sample(None, 2)

        # extract mean timing per scan level from mzML
        sd = MzMLScanTimeSampler(MZML_FILE, use_mean=True)
        ms = IndependentMassSpectrometer(ionisation_mode,
                                         chems,
                                         scan_duration=sd)

        # run simulation
        env = Environment(ms, controller, 500, 600, progress_bar=True)
        set_log_level_warning()
        env.run()
        filename = 'test_scan_time_mean_from_mzml.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 13
0
    def test_TreeDiaController_percentile(self, simple_dataset):
        logger.info('Testing TreeDiaController percentile')

        # some parameters
        window_type = 'percentile'
        kaufmann_design = 'tree'
        num_windows = 64
        scan_overlap = 0
        ionisation_mode = POSITIVE
        scan_time_dict = {1: 0.12, 2: 0.06}
        min_rt = 0
        max_rt = 400
        min_mz = 100
        max_mz = 1000

        # run controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, simple_dataset,
                                                scan_duration=scan_time_dict)
        controller = DiaController(min_mz, max_mz, window_type, kaufmann_design, num_windows,
                                   scan_overlap=scan_overlap)
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)
        set_log_level_warning()
        env.run()

        # check that there is at least one non-empty MS2 scan
        check_non_empty_MS2(controller)

        # write simulated output to mzML file
        filename = 'tree_dia_percentile.mzml'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 14
0
    def test_swath(self, ten_chems):
        min_mz = 100
        max_mz = 1000
        width = 100
        scan_overlap = 10

        ionisation_mode = POSITIVE

        controller = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap)
        scan_time_dict = {1: 0.124, 2: 0.124}

        spike_noise = UniformSpikeNoise(0.1, 1)

        mass_spec = IndependentMassSpectrometer(ionisation_mode, ten_chems,
                                                spike_noise=spike_noise,
                                                scan_duration=scan_time_dict)

        env = Environment(mass_spec, controller, 200, 300, progress_bar=True)

        set_log_level_warning()

        env.run()

        check_non_empty_MS2(controller)

        filename = 'SWATH_ten_chems.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 15
0
    def test_aif_with_fixed_chems(self):
        fs = EvenMZFormulaSampler()
        ms = FixedMS2Sampler(n_frags=2)
        cs = ConstantChromatogramSampler()
        ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=1)
        cs = ChemicalMixtureCreator(fs, ms2_sampler=ms, chromatogram_sampler=cs,
                                    rt_and_intensity_sampler=ri)
        d = cs.sample(1, 2)

        ms1_source_cid_energy = 30
        controller = AIF(ms1_source_cid_energy)
        ionisation_mode = POSITIVE
        mass_spec = IndependentMassSpectrometer(ionisation_mode, d)
        env = Environment(mass_spec, controller, 10, 20, progress_bar=True)

        set_log_level_warning()
        env.run()

        for i, s in enumerate(controller.scans[1]):
            if i % 2 == 1:
                # odd scan, AIF, should  have two peaks at 81 and 91
                integer_mzs = [int(i) for i in s.mzs]
                integer_mzs.sort()
                assert integer_mzs[0] == 81
                assert integer_mzs[1] == 91
            else:
                # even scan, MS1 - should have a single peak at integer value of 101
                integer_mzs = [int(i) for i in s.mzs]
                assert integer_mzs[0] == 101
Exemplo n.º 16
0
    def test_TopN_controller_with_beer_chems(self):
        logger.info('Testing Top-N controller with QC beer chemicals')

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        # create a simulated mass spec without noise and Top-N controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, beer_chems, self.ps, add_noise=False)
        controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'topN_controller_qcbeer_chems_no_noise.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 17
0
def simple_ms1_processor():
    print(
        '#' * 10,
        'Load previously trained spectral feature database and the list of extracted metabolites, \
        created in 01. Download Data')
    #-----------------
    mypath = 'documents/simple_ms1/example_data'
    #-----------------
    base_dir = os.path.abspath(mypath)
    ps = load_obj(Path(base_dir, 'peak_sampler_mz_rt_int_19_beers_fullscan.p'))
    hmdb = load_obj(Path(base_dir, 'hmdb_compounds.p'))

    # set_log_level_debug()
    out_dir = Path(base_dir, 'results', 'MS1_single')
    # the list of ROI sources created in the previous notebook '01. Download Data.ipynb'
    ROI_Sources = [
        str(Path(base_dir, 'DsDA', 'DsDA_Beer', 'beer_t10_simulator_files'))
    ]

    # minimum MS1 intensity of chemicals
    min_ms1_intensity = 1.75E5

    # m/z and RT range of chemicals
    rt_range = [(0, 1440)]
    mz_range = [(0, 1050)]

    # the number of chemicals in the sample
    n_chems = 6500

    # maximum MS level (we do not generate fragmentation peaks when this value is 1)
    ms_level = 1

    chems = ChemicalCreator(ps, ROI_Sources, hmdb)
    dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems,
                           ms_level)
    save_obj(dataset, Path(out_dir, 'dataset.p'))

    for chem in dataset[0:10]:
        print(chem)
    print('#' * 10,
          'Run MS1 controller on the samples and generate .mzML files')
    min_rt = rt_range[0][0]
    max_rt = rt_range[0][1]

    mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, ps)
    controller = SimpleMs1Controller()

    # create an environment to run both the mass spec and controller
    env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

    # set the log level to WARNING so we don't see too many messages when environment is running
    set_log_level_warning()

    # run the simulation
    env.run()
    set_log_level_debug()
    mzml_filename = 'ms1_controller.mzML'
    env.write_mzML(out_dir, mzml_filename)
    return str(Path(mypath, 'results', 'MS1_single')) + '/' + mzml_filename
Exemplo n.º 18
0
 def test_fullscan_from_mzml(self, chems_from_mzml):
     ionisation_mode = POSITIVE
     controller = SimpleMs1Controller()
     ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml)
     env = Environment(ms, controller, 500, 600, progress_bar=True)
     set_log_level_warning()
     env.run()
     filename = 'fullscan_from_mzml.mzML'
     check_mzML(env, OUT_DIR, filename)
Exemplo n.º 19
0
def top_n_roi_experiment_evaluation(datasets,
                                    min_rt,
                                    max_rt,
                                    N,
                                    isolation_window,
                                    mz_tol,
                                    rt_tol,
                                    min_ms1_intensity,
                                    min_roi_intensity,
                                    min_roi_length,
                                    base_chemicals=None,
                                    mzmine_files=None,
                                    rt_tolerance=100,
                                    experiment_dir=None,
                                    progress_bar=False):
    if base_chemicals is not None or mzmine_files is not None:
        env_list = []
        mzml_files = []
        source_files = ['sample_' + str(i) for i in range(len(datasets))]
        for i in range(len(datasets)):
            mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i])
            controller = TopN_RoiController(POSITIVE,
                                            isolation_window,
                                            mz_tol,
                                            min_ms1_intensity,
                                            min_roi_intensity,
                                            min_roi_length,
                                            N=N,
                                            rt_tol=rt_tol)
            env = Environment(mass_spec,
                              controller,
                              min_rt,
                              max_rt,
                              progress_bar=progress_bar)
            env.run()
            if progress_bar is False:
                print('Processed dataset ' + str(i))
            env_list.append(env)
            if base_chemicals is None:
                file_link = os.path.join(experiment_dir,
                                         source_files[i] + '.mzml')
                mzml_files.append(file_link)
                env.write_mzML(experiment_dir, source_files[i] + '.mzml')
        if base_chemicals is not None:
            evaluation = evaluate_multiple_simulated_env(
                env_list, base_chemicals=base_chemicals)
        else:
            roi_aligner = RoiAligner(rt_tolerance=rt_tolerance)
            for i in range(len(mzml_files)):
                roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i],
                                             source_files[i], 'mzmine')
            evaluation = evaluate_multi_peak_roi_aligner(
                roi_aligner, source_files)
        return env_list, evaluation
    else:
        return None, None
Exemplo n.º 20
0
    def test_acquisition(self, two_fixed_chems):
        mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems]
        schedule = []
        # env = Environment()
        isolation_width = DEFAULT_ISOLATION_WIDTH
        mz_tol = 0.1
        rt_tol = 15

        min_rt = 110
        max_rt = 112

        ionisation_mode = POSITIVE

        controller = FixedScansController()
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                two_fixed_chems)
        env = Environment(mass_spec, controller, min_rt, max_rt)

        ms1_scan = get_default_scan_params(polarity=ionisation_mode)
        ms2_scan_1 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_2 = get_dda_scan_param(mz_to_target[1],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_3 = get_dda_scan_param(mz_to_target, [0.0, 0.0],
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)

        schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3]
        controller.set_tasks(schedule)
        set_log_level_warning()
        env.run()
        assert len(controller.scans[2]) == 3

        n_peaks = []
        for scan in controller.scans[2]:
            n_peaks.append(scan.num_peaks)

        assert n_peaks[0] > 0
        assert n_peaks[1] > 0
        assert n_peaks[2] == n_peaks[0] + n_peaks[1]
        env.write_mzML(OUT_DIR, 'multi_windows.mzML')
Exemplo n.º 21
0
def run_env(mass_spec, controller, min_rt, max_rt, mzml_file):
    env = Environment(mass_spec, controller, min_rt, max_rt)
    env.run()
    env.write_mzML(None, mzml_file)
    chems = [
        event.chem.__repr__() for event in env.mass_spec.fragmentation_events
        if event.ms_level > 1
    ]
    chemical_coverage = len(np.unique(np.array(chems))) / len(
        env.mass_spec.chemicals)
    return chemical_coverage
Exemplo n.º 22
0
    def test_hybrid_controller_with_beer_chems(self):
        logger.info('Testing hybrid controller with QC beer chemicals')

        isolation_window = [1]  # the isolation window in Dalton around a selected precursor ion
        N = [5]
        rt_tol = [10]
        mz_tol = [10]
        min_ms1_intensity = 1.75E5
        scan_param_changepoints = None
        rt_range = [(0, 400)]
        min_rt = rt_range[0][0]
        max_rt = rt_range[0][1]
        n_purity_scans = N[0]
        purity_shift = 0.2
        purity_threshold = 1

        # these settings change the Mass Spec type. They arent necessary to run the Top-N ROI Controller
        isolation_transition_window = 'gaussian'
        isolation_transition_window_params = [0.5]

        purity_add_ms1 = True  # this seems to be the broken bit
        purity_randomise = True

        mass_spec = IndependentMassSpectrometer(POSITIVE, beer_chems, self.ps, add_noise=True,
                                                isolation_transition_window=isolation_transition_window,
                                                isolation_transition_window_params=isolation_transition_window_params)
        controller = HybridController(mass_spec, N, scan_param_changepoints, isolation_window, mz_tol, rt_tol,
                                      min_ms1_intensity, n_purity_scans, purity_shift, purity_threshold,
                                      purity_add_ms1=purity_add_ms1, purity_randomise=purity_randomise)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'hybrid_controller_qcbeer_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 23
0
 def test_topn_from_mzml(self, chems_from_mzml):
     ionisation_mode = POSITIVE
     N = 10
     isolation_width = 0.7
     mz_tol = 0.01
     rt_tol = 15
     min_ms1_intensity = 10
     controller = TopNController(ionisation_mode, N, isolation_width,
                                 mz_tol, rt_tol, min_ms1_intensity)
     ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml)
     env = Environment(ms, controller, 500, 600, progress_bar=True)
     set_log_level_warning()
     env.run()
     check_non_empty_MS2(controller)
     filename = 'topn_from_mzml.mzML'
     check_mzML(env, OUT_DIR, filename)
Exemplo n.º 24
0
    def test_FixedScansController(self, two_fixed_chems):
        logger.info('Testing FixedScansController')
        mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems]
        isolation_width = DEFAULT_ISOLATION_WIDTH
        mz_tol = 0.1
        rt_tol = 15
        min_rt = 110
        max_rt = 112
        ionisation_mode = POSITIVE

        controller = FixedScansController(schedule=None)
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                two_fixed_chems)
        env = Environment(mass_spec, controller, min_rt, max_rt)

        ms1_scan = get_default_scan_params(polarity=ionisation_mode)
        ms2_scan_1 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_2 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_3 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3]
        controller.set_tasks(schedule)
        set_log_level_warning()
        env.run()

        assert len(controller.scans[1]) == 1
        assert len(controller.scans[2]) == 3
        for scan in controller.scans[2]:
            assert scan.num_peaks > 0
        env.write_mzML(OUT_DIR, 'fixedScansController.mzML')
Exemplo n.º 25
0
def run_experiment(param):
    '''
    Runs a Top-N experiment
    :param param: the experimental parameters
    :return: the analysis name that has been successfully ran
    '''
    analysis_name = param['analysis_name']
    mzml_out = param['mzml_out']
    pickle_out = param['pickle_out']
    N = param['N']
    rt_tol = param['rt_tol']

    if os.path.isfile(mzml_out) and os.path.isfile(pickle_out):
        logger.debug('Skipping %s' % (analysis_name))
    else:
        logger.debug('Processing %s' % (analysis_name))
        peak_sampler = param['peak_sampler']
        if peak_sampler is None:  # extract density from the fragmenatation file
            mzml_path = param['mzml_path']
            fragfiles = param['fragfiles']
            fragfile = fragfiles[(
                N,
                rt_tol,
            )]
            min_rt = param['min_rt']
            max_rt = param['max_rt']
            peak_sampler = get_peak_sampler(mzml_path, fragfile, min_rt,
                                            max_rt)

        mass_spec = IndependentMassSpectrometer(param['ionisation_mode'],
                                                param['data'])
        controller = TopNController(param['ionisation_mode'], param['N'],
                                    param['isolation_width'], param['mz_tol'],
                                    param['rt_tol'],
                                    param['min_ms1_intensity'])
        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec,
                          controller,
                          param['min_rt'],
                          param['max_rt'],
                          progress_bar=param['pbar'])
        set_log_level_warning()
        env.run()
        set_log_level_debug()
        env.write_mzML(None, mzml_out)
        save_obj(controller, pickle_out)
        return analysis_name
Exemplo n.º 26
0
    def test_targeted(self):
        fs = EvenMZFormulaSampler()
        ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=10)
        cr = ConstantChromatogramSampler()
        ms = FixedMS2Sampler()
        cs = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cr,
                                    ms2_sampler=ms)
        d = cs.sample(2, 2)  # sample chems with m/z = 100 and 200
        ionisation_mode = POSITIVE
        targets = []
        targets.append(Target(101, 100, 102, 10, 20, adduct='M+H'))
        targets.append(Target(201, 200, 202, 10, 20, metadata={'a': 1}))
        ce_values = [10, 20, 30]
        n_replicates = 4
        controller = TargetedController(targets,
                                        ce_values,
                                        n_replicates=n_replicates,
                                        limit_acquisition=True)
        mass_spec = IndependentMassSpectrometer(ionisation_mode, d)
        env = Environment(mass_spec, controller, 5, 25, progress_bar=True)
        set_log_level_warning()
        env.run()

        # check that we go all the scans we wanted
        for ms_level in controller.scans:
            assert len(controller.scans[ms_level]) > 0
        set_log_level_debug()
        target_counts = {t: {c: 0 for c in ce_values} for t in targets}

        for s in controller.scans[2]:
            params = s.scan_params
            pmz = params.get(ScanParameters.PRECURSOR_MZ)[0].precursor_mz
            filtered_targets = list(
                filter(
                    lambda x: (x.from_rt <= s.rt <= x.to_rt) and
                    (x.from_mz <= pmz <= x.to_mz), targets))
            assert len(filtered_targets) == 1
            target = filtered_targets[0]
            ce = params.get(ScanParameters.COLLISION_ENERGY)
            target_counts[target][ce] += 1

        for t in target_counts:
            for ce, count in target_counts[t].items():
                assert count == n_replicates
Exemplo n.º 27
0
def top_n_box_experiment(datasets,
                         base_chemicals,
                         rt_range,
                         boxes_params,
                         dataset_group_list,
                         isolation_width,
                         mz_tol,
                         min_ms1_intensity,
                         min_roi_intensity,
                         min_roi_length,
                         N,
                         rt_tol,
                         ionisation_mode=POSITIVE):
    env_list = []
    aligner = RoiAligner()
    boxes = None
    boxes_intensity = None
    for i in range(len(datasets)):
        mass_spec = IndependentMassSpectrometer(ionisation_mode, datasets[i])
        controller = TopNBoxRoiController(ionisation_mode,
                                          isolation_width,
                                          mz_tol,
                                          min_ms1_intensity,
                                          min_roi_intensity,
                                          min_roi_length,
                                          boxes_params=boxes_params,
                                          boxes=boxes,
                                          boxes_intensity=boxes_intensity,
                                          N=N,
                                          rt_tol=rt_tol)
        env = Environment(mass_spec,
                          controller,
                          rt_range[0],
                          rt_range[1],
                          progress_bar=True)
        env.run()
        env_list.append(env)
        rois = env.controller.live_roi + env.controller.dead_roi
        aligner.add_sample(rois, 'sample_' + str(i), dataset_group_list[i])
        boxes = aligner.get_boxes()
        boxes_intensity = aligner.get_max_frag_intensities()
    final_evaluation = evaluate_multiple_simulated_env(
        env_list, base_chemicals=base_chemicals)
    return env_list, final_evaluation
Exemplo n.º 28
0
    def test_default_scan_time(self, chems_from_mzml):
        ionisation_mode = POSITIVE
        N = 10
        isolation_width = 0.7
        mz_tol = 0.01
        rt_tol = 15
        min_ms1_intensity = 10
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, min_ms1_intensity)

        # run simulation using default scan times
        ms = IndependentMassSpectrometer(ionisation_mode,
                                         chems_from_mzml,
                                         scan_duration=DEFAULT_SCAN_TIME_DICT)
        env = Environment(ms, controller, 500, 600, progress_bar=True)
        set_log_level_warning()
        env.run()
        filename = 'test_scan_time_default.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 29
0
    def test_multiple_isolation(self):
        N = 3
        fs = EvenMZFormulaSampler()
        ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=10)
        cr = ConstantChromatogramSampler()
        ms = FixedMS2Sampler()
        cs = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cr,
                                    ms2_sampler=ms)
        d = cs.sample(3, 2)  # sample chems with m/z = 100 and 200
        # ionisation_mode = POSITIVE
        controller = MultiIsolationController(N)
        ms = IndependentMassSpectrometer(POSITIVE, d)
        env = Environment(ms, controller, 10, 20, progress_bar=True)
        set_log_level_warning()
        env.run()

        assert len(controller.scans[1]) > 0
        assert len(controller.scans[2]) > 0

        # look at the first block of MS2 scans
        # and check that they are the correct super-positions
        mm = {}
        # first three scans hit the individual precursors
        mm[(0, )] = controller.scans[2][0]
        mm[(1, )] = controller.scans[2][1]
        mm[(2, )] = controller.scans[2][2]
        # next three should hit the pairs
        mm[(0, 1)] = controller.scans[2][3]
        mm[(0, 2)] = controller.scans[2][4]
        mm[(1, 2)] = controller.scans[2][5]
        # final should hit all three
        mm[(0, 1, 2)] = controller.scans[2][6]

        for key, value in mm.items():
            actual_mz_vals = set(mm[key].mzs)
            expected_mz_vals = set()
            for k in key:
                for m in mm[(k, )].mzs:
                    expected_mz_vals.add(m)
            assert expected_mz_vals == actual_mz_vals
Exemplo n.º 30
0
def non_overlap_experiment(datasets,
                           base_chemicals,
                           rt_range,
                           isolation_width,
                           mz_tol,
                           min_ms1_intensity,
                           min_roi_intensity,
                           min_roi_length,
                           N,
                           rt_tol,
                           min_roi_length_for_fragmentation,
                           rt_box_size,
                           mz_box_size,
                           ionisation_mode=POSITIVE):
    env_list = []
    grid = GridEstimator(
        LocatorGrid(rt_range[0], rt_range[1], rt_box_size, 0, 3000,
                    mz_box_size), IdentityDrift())
    for i in range(len(datasets)):
        mass_spec = IndependentMassSpectrometer(ionisation_mode, datasets[i])
        controller = NonOverlapController(
            ionisation_mode,
            isolation_width,
            mz_tol,
            min_ms1_intensity,
            min_roi_intensity,
            min_roi_length,
            N,
            grid,
            rt_tol=rt_tol,
            min_roi_length_for_fragmentation=min_roi_length_for_fragmentation)
        env = Environment(mass_spec,
                          controller,
                          rt_range[0],
                          rt_range[1],
                          progress_bar=True)
        env.run()
        env_list.append(env)
    final_evaluation = evaluate_multiple_simulated_env(
        env_list, base_chemicals=base_chemicals)
    return env_list, final_evaluation