Exemplo n.º 1
0
def run_TopN(chems, scan_duration, params, out_dir):
    """
    Simulate TopN controller
    :param chems: a list of UnknownChemicals present in the injection
    :param ps: old PeakSampler object, now only used to generate MS2 scans
    (TODO: should be removed as part of issue #46)
    :param params: a dictionary of parameters
    :param out_file: output mzML file
    :param out_dir: output directory
    :return: None
    """
    logger.info('Running TopN simulation')
    logger.info(params)

    out_file = '%s_%s.mzML' % (params['controller_name'],
                               params['sample_name'])
    controller = TopNController(params['ionisation_mode'], params['N'],
                                params['isolation_width'], params['mz_tol'],
                                params['rt_tol'], params['min_ms1_intensity'])
    mass_spec = IndependentMassSpectrometer(params['ionisation_mode'],
                                            chems,
                                            scan_duration=scan_duration)
    env = Environment(mass_spec,
                      controller,
                      params['min_rt'],
                      params['max_rt'],
                      progress_bar=True,
                      out_dir=out_dir,
                      out_file=out_file)
    logger.info('Generating %s' % out_file)
    env.run()
Exemplo n.º 2
0
    def test_TopN_controller_with_beer_chems_and_scan_duration_dict(self):
        logger.info('Testing Top-N controller with QC beer chemicals '
                    'passing in the scan durations')

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        # ps = None
        scan_duration_dict = {1: 0.2, 2: 0.1}

        # create a simulated mass spec without noise and Top-N controller and passing
        # in the scan_duration dict
        mass_spec = IndependentMassSpectrometer(
            ionisation_mode, BEER_CHEMS, scan_duration=scan_duration_dict)
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, MIN_MS1_INTENSITY)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec,
                          controller,
                          BEER_MIN_BOUND,
                          BEER_MAX_BOUND,
                          progress_bar=True)
        run_environment(env)

        # check that there is at least one non-empty MS2 scan
        check_non_empty_MS2(controller)

        # write simulated output to mzML file
        filename = 'topN_controller_qcbeer_chems_no_noise_with_scan_duration.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 3
0
    def test_mean_scan_time_from_mzml(self):
        ionisation_mode = POSITIVE
        N = 10
        isolation_width = 0.7
        mz_tol = 0.01
        rt_tol = 15
        min_ms1_intensity = 10
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, min_ms1_intensity)

        # extract chemicals from mzML
        roi_params = RoiParams(min_intensity=10, min_length=5)
        cm = ChemicalMixtureFromMZML(MZML_FILE, roi_params=roi_params)
        chems = cm.sample(None, 2)

        # extract mean timing per scan level from mzML
        sd = MzMLScanTimeSampler(MZML_FILE, use_mean=True)
        ms = IndependentMassSpectrometer(ionisation_mode,
                                         chems,
                                         scan_duration=sd)

        # run simulation
        env = Environment(ms, controller, 500, 600, progress_bar=True)
        set_log_level_warning()
        env.run()
        filename = 'test_scan_time_mean_from_mzml.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 4
0
    def test_neg(self, even_chems):
        mass_spec = IndependentMassSpectrometer(NEGATIVE, even_chems)
        N = 10
        controller = TopNController(NEGATIVE, N, 0.7, 10, 15, 0, force_N=True)
        env = Environment(mass_spec, controller, 200, 300, progress_bar=True)
        run_environment(env)

        for level in controller.scans:
            for scan in controller.scans[level]:
                assert scan.scan_params.get(
                    ScanParameters.POLARITY) == NEGATIVE
        ms1_peaks = [int(m) for m in controller.scans[1][0].mzs]
        ms1_peaks.sort()
        assert 98 in ms1_peaks
        assert 198 in ms1_peaks
        assert 298 in ms1_peaks
        assert 398 in ms1_peaks

        filename = 'topn_negative.mzML'
        check_mzML(env, OUT_DIR, filename)

        # load the file and check polarity in the mzml

        run = pymzml.run.Reader(os.path.join(OUT_DIR, filename))
        for n, spec in enumerate(run):
            assert spec.get(
                'MS:1000129')  # this is the negative scan accession
Exemplo n.º 5
0
def top_n_experiment(datasets,
                     base_chemicals,
                     rt_range,
                     N,
                     isolation_width,
                     mz_tol,
                     rt_tol,
                     min_ms1_intensity,
                     ionisation_mode=POSITIVE):
    env_list = []
    for i in range(len(datasets)):
        mass_spec = IndependentMassSpectrometer(ionisation_mode, datasets[i])
        controller = TopNController(ionisation_mode,
                                    N,
                                    isolation_width,
                                    mz_tol,
                                    rt_tol,
                                    min_ms1_intensity,
                                    ms1_shift=0,
                                    initial_exclusion_list=None,
                                    force_N=False)
        env = Environment(mass_spec,
                          controller,
                          rt_range[0],
                          rt_range[1],
                          progress_bar=True)
        env.run()
        env_list.append(env)
    final_evaluation = evaluate_multiple_simulated_env(
        env_list, base_chemicals=base_chemicals)
    return env_list, final_evaluation
Exemplo n.º 6
0
    def test_negative_fixed(self):
        fs = EvenMZFormulaSampler()
        ms = FixedMS2Sampler()
        ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101)
        cs = ConstantChromatogramSampler()
        cm = ChemicalMixtureCreator(fs,
                                    ms2_sampler=ms,
                                    rt_and_intensity_sampler=ri,
                                    chromatogram_sampler=cs)
        dataset = cm.sample(3, 2)

        N = 10
        isolation_width = 0.7
        mz_tol = 10
        rt_tol = 15

        ms = IndependentMassSpectrometer(NEGATIVE, dataset)
        controller = TopNController(NEGATIVE, N, isolation_width, mz_tol,
                                    rt_tol, MIN_MS1_INTENSITY)
        env = Environment(ms, controller, 102, 110, progress_bar=True)
        set_log_level_warning()
        env.run()
        ms1_mz_vals = [int(m) for m in controller.scans[1][0].mzs]

        expected_vals = [98, 198, 298]
        for i, m in enumerate(ms1_mz_vals):
            assert m == expected_vals[i]

        expected_frags = set([88, 78, 188, 178, 288, 278])
        for scan in controller.scans[2]:
            for m in scan.mzs:
                assert int(m) in expected_frags
Exemplo n.º 7
0
    def test_TopN_controller_with_simulated_chems(self, fragscan_dataset):
        logger.info(
            'Testing Top-N controller with simulated chemicals -- no noise')
        assert len(fragscan_dataset) == N_CHEMS

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        # create a simulated mass spec without noise and Top-N controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                fragscan_dataset)
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, MIN_MS1_INTENSITY)
        min_bound, max_bound = get_rt_bounds(fragscan_dataset, CENTRE_RANGE)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec,
                          controller,
                          min_bound,
                          max_bound,
                          progress_bar=True)
        run_environment(env)

        # check that there is at least one non-empty MS2 scan
        check_non_empty_MS2(controller)

        filename = 'topN_controller_simulated_chems_no_noise.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 8
0
    def test_TopN_controller_with_beer_chems(self):
        logger.info('Testing Top-N controller with QC beer chemicals')

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        # create a simulated mass spec without noise and Top-N controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, beer_chems, self.ps, add_noise=False)
        controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'topN_controller_qcbeer_chems_no_noise.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 9
0
    def test_mass_spec(self):
        logger.info(
            'Testing mass spec using the Top-N controller and QC beer chemicals'
        )

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        task_manager = TaskManager(buffer_size=3)
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                BEER_CHEMS,
                                                task_manager=task_manager)
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, MIN_MS1_INTENSITY)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec,
                          controller,
                          BEER_MIN_BOUND,
                          BEER_MAX_BOUND,
                          progress_bar=True)
        # run_environment(env)
        env.run()

        # check that there is at least one non-empty MS2 scan
        check_non_empty_MS2(controller)

        # write simulated output to mzML file
        filename = 'test_mass_spec.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 10
0
def top_n_experiment_evaluation(datasets,
                                min_rt,
                                max_rt,
                                N,
                                isolation_window,
                                mz_tol,
                                rt_tol,
                                min_ms1_intensity,
                                base_chemicals=None,
                                mzmine_files=None,
                                rt_tolerance=100,
                                experiment_dir=None,
                                progress_bar=False):
    if base_chemicals is not None or mzmine_files is not None:
        env_list = []
        mzml_files = []
        source_files = ['sample_' + str(i) for i in range(len(datasets))]
        for i in range(len(datasets)):
            mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i])
            controller = TopNController(POSITIVE,
                                        N,
                                        isolation_window,
                                        mz_tol,
                                        rt_tol,
                                        min_ms1_intensity,
                                        ms1_shift=0,
                                        initial_exclusion_list=None,
                                        force_N=False)
            env = Environment(mass_spec,
                              controller,
                              min_rt,
                              max_rt,
                              progress_bar=progress_bar)
            env.run()
            if progress_bar is False:
                print('Processed dataset ' + str(i))
            env_list.append(env)
            if base_chemicals is None:
                file_link = os.path.join(experiment_dir,
                                         source_files[i] + '.mzml')
                mzml_files.append(file_link)
                env.write_mzML(experiment_dir, source_files[i] + '.mzml')
        if base_chemicals is not None:
            evaluation = evaluate_multiple_simulated_env(
                env_list, base_chemicals=base_chemicals)
        else:
            roi_aligner = RoiAligner(rt_tolerance=rt_tolerance)
            for i in range(len(mzml_files)):
                roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i],
                                             source_files[i], 'mzmine')
            evaluation = evaluate_multi_peak_roi_aligner(
                roi_aligner, source_files)
        return env_list, evaluation
    else:
        return None, None
Exemplo n.º 11
0
 def test_topn_from_mzml(self, chems_from_mzml):
     ionisation_mode = POSITIVE
     N = 10
     isolation_width = 0.7
     mz_tol = 0.01
     rt_tol = 15
     min_ms1_intensity = 10
     controller = TopNController(ionisation_mode, N, isolation_width,
                                 mz_tol, rt_tol, min_ms1_intensity)
     ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml)
     env = Environment(ms, controller, 500, 600, progress_bar=True)
     set_log_level_warning()
     env.run()
     check_non_empty_MS2(controller)
     filename = 'topn_from_mzml.mzML'
     check_mzML(env, OUT_DIR, filename)
Exemplo n.º 12
0
    def test_exclusion_simple_data(self):
        # three chemicals, both will get fragmented
        # first time around and exclusion such  that neither
        # should be fragmented second time
        fs = EvenMZFormulaSampler()
        ch = ConstantChromatogramSampler()
        rti = UniformRTAndIntensitySampler(min_rt=0, max_rt=5)
        cs = ChemicalMixtureCreator(fs,
                                    chromatogram_sampler=ch,
                                    rt_and_intensity_sampler=rti)
        n_chems = 3
        dataset = cs.sample(n_chems, 2)
        ionisation_mode = POSITIVE
        initial_exclusion_list = []
        min_ms1_intensity = 0
        N = 10
        mz_tol = 10
        rt_tol = 30
        isolation_width = 1
        all_controllers = []
        for i in range(3):
            mass_spec = IndependentMassSpectrometer(ionisation_mode, dataset)
            controller = TopNController(
                ionisation_mode,
                N,
                isolation_width,
                mz_tol,
                rt_tol,
                min_ms1_intensity,
                initial_exclusion_list=initial_exclusion_list)
            env = Environment(mass_spec, controller, 0, 20, progress_bar=True)
            run_environment(env)

            mz_intervals = list(
                controller.exclusion.exclusion_list.boxes_mz.items())
            rt_intervals = list(
                controller.exclusion.exclusion_list.boxes_rt.items())
            unique_items_mz = set(i.data for i in mz_intervals)
            unique_items_rt = set(i.data for i in rt_intervals)
            assert len(unique_items_mz) == len(unique_items_rt)

            initial_exclusion_list = list(unique_items_mz)

            all_controllers.append(controller)
        assert len(all_controllers[0].scans[2]) == n_chems
        assert len(all_controllers[1].scans[2]) == 0
        assert len(all_controllers[2].scans[2]) == 0
Exemplo n.º 13
0
def run_experiment(param):
    '''
    Runs a Top-N experiment
    :param param: the experimental parameters
    :return: the analysis name that has been successfully ran
    '''
    analysis_name = param['analysis_name']
    mzml_out = param['mzml_out']
    pickle_out = param['pickle_out']
    N = param['N']
    rt_tol = param['rt_tol']

    if os.path.isfile(mzml_out) and os.path.isfile(pickle_out):
        logger.debug('Skipping %s' % (analysis_name))
    else:
        logger.debug('Processing %s' % (analysis_name))
        peak_sampler = param['peak_sampler']
        if peak_sampler is None:  # extract density from the fragmenatation file
            mzml_path = param['mzml_path']
            fragfiles = param['fragfiles']
            fragfile = fragfiles[(
                N,
                rt_tol,
            )]
            min_rt = param['min_rt']
            max_rt = param['max_rt']
            peak_sampler = get_peak_sampler(mzml_path, fragfile, min_rt,
                                            max_rt)

        mass_spec = IndependentMassSpectrometer(param['ionisation_mode'],
                                                param['data'])
        controller = TopNController(param['ionisation_mode'], param['N'],
                                    param['isolation_width'], param['mz_tol'],
                                    param['rt_tol'],
                                    param['min_ms1_intensity'])
        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec,
                          controller,
                          param['min_rt'],
                          param['max_rt'],
                          progress_bar=param['pbar'])
        set_log_level_warning()
        env.run()
        set_log_level_debug()
        env.write_mzML(None, mzml_out)
        save_obj(controller, pickle_out)
        return analysis_name
Exemplo n.º 14
0
    def test_TopN_forceN(self, ten_chems):
        mass_spec = IndependentMassSpectrometer(POSITIVE, ten_chems)
        N = 20
        controller = TopNController(POSITIVE, N, 0.7, 10, 15, 0, force_N=True)
        env = Environment(mass_spec, controller, 200, 300, progress_bar=True)
        run_environment(env)

        all_scans = controller.scans[1] + controller.scans[2]
        # sort by RT
        all_scans.sort(key=lambda x: x.rt)
        ms1_pos = []
        for i, s in enumerate(all_scans):
            if s.ms_level == 1:
                ms1_pos.append(i)

        for i, mp in enumerate(ms1_pos[:-1]):
            assert ms1_pos[i + 1] - (mp + 1) == N
Exemplo n.º 15
0
    def test_TopN_controller_with_beer_chems_and_initial_exclusion_list(self):
        logger.info('Testing Top-N controller with QC beer chemicals and '
                    'an initial exclusion list')

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        initial_exclusion_list = []
        for i in range(3):
            mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                    BEER_CHEMS)
            controller = TopNController(
                ionisation_mode,
                N,
                isolation_width,
                mz_tol,
                rt_tol,
                MIN_MS1_INTENSITY,
                initial_exclusion_list=initial_exclusion_list)
            env = Environment(mass_spec,
                              controller,
                              BEER_MIN_BOUND,
                              BEER_MAX_BOUND,
                              progress_bar=True)
            run_environment(env)

            mz_intervals = list(
                controller.exclusion.exclusion_list.boxes_mz.items())
            rt_intervals = list(
                controller.exclusion.exclusion_list.boxes_rt.items())
            unique_items_mz = set(i.data for i in mz_intervals)
            unique_items_rt = set(i.data for i in rt_intervals)
            assert len(unique_items_mz) == len(unique_items_rt)

            initial_exclusion_list = list(unique_items_mz)

            # check that there is at least one non-empty MS2 scan
            check_non_empty_MS2(controller)

            # write simulated output to mzML file
            filename = 'topN_controller_qcbeer_exclusion_%d.mzML' % i
            check_mzML(env, OUT_DIR, filename)
Exemplo n.º 16
0
    def test_default_scan_time(self, chems_from_mzml):
        ionisation_mode = POSITIVE
        N = 10
        isolation_width = 0.7
        mz_tol = 0.01
        rt_tol = 15
        min_ms1_intensity = 10
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, min_ms1_intensity)

        # run simulation using default scan times
        ms = IndependentMassSpectrometer(ionisation_mode,
                                         chems_from_mzml,
                                         scan_duration=DEFAULT_SCAN_TIME_DICT)
        env = Environment(ms, controller, 500, 600, progress_bar=True)
        set_log_level_warning()
        env.run()
        filename = 'test_scan_time_default.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 17
0
def create_controller(controller_method, param_dict):
    if controller_method == 'TopN_RoiController':
        controller = TopN_RoiController(
            param_dict['ionisation_mode'], param_dict['isolation_width'],
            param_dict['mz_tol'], param_dict['min_ms1_intensity'],
            param_dict['min_roi_intensity'], param_dict['min_roi_length'],
            param_dict['N'], param_dict['rt_tol'],
            param_dict['min_roi_length_for_fragmentation'],
            param_dict['length_units'], param_dict['ms1_shift'],
            param_dict['params'])

    if controller_method == 'TopN_SmartRoiController':
        controller = TopN_SmartRoiController(
            param_dict['ionisation_mode'], param_dict['isolation_width'],
            param_dict['mz_tol'], param_dict['min_ms1_intensity'],
            param_dict['min_roi_intensity'], param_dict['min_roi_length'],
            param_dict['N'], param_dict['rt_tol'],
            param_dict['min_roi_length_for_fragmentation'],
            param_dict['reset_length_seconds'],
            param_dict['intensity_increase_factor'],
            param_dict['length_units'], param_dict['drop_perc'],
            param_dict['ms1_shift'], param_dict['params'])

    elif controller_method == 'TopNController':
        controller = TopNController(
            param_dict['ionisation_mode'], param_dict['N'],
            param_dict['isolation_width'], param_dict['mz_tol'],
            param_dict['rt_tol'], param_dict['min_ms1_intensity'],
            param_dict['ms1_shift'], param_dict['initial_exclusion_list'],
            param_dict['params'])

    elif controller_method == 'WeightedDewController':
        controller = WeightedDEWController(
            param_dict['ionisation_mode'], param_dict['N'],
            param_dict['isolation_width'], param_dict['mz_tol'],
            param_dict['rt_tol'], param_dict['min_ms1_intensity'],
            param_dict['ms1_shift'], param_dict['exclusion_t_0'],
            param_dict['log_intensity'], param_dict['params'])
    else:
        logger.warning('Invalid controller_method')
    return controller
Exemplo n.º 18
0
def top_n_evaluation(param_dict):
    mass_spec = load_obj(param_dict['mass_spec_file'])
    params = load_obj(param_dict['params_file'])
    topn = TopNController(param_dict['ionisation_mode'],
                          param_dict['N'],
                          param_dict['isolation_width'],
                          param_dict['mz_tol'],
                          param_dict['rt_tol'],
                          param_dict['min_ms1_intensity'],
                          params=params)
    chemical_coverage = run_env(mass_spec, topn, param_dict['min_rt'],
                                param_dict['max_rt'],
                                param_dict['save_file_name'])
    coverage = run_coverage_evaluation(param_dict['box_file'],
                                       param_dict['save_file_name'],
                                       param_dict['half_isolation_window'])
    print('coverage', coverage)
    print('chemical_coverage', chemical_coverage)
    if param_dict['coverage_type'] == 'coverage':
        return coverage
    else:
        return chemical_coverage
Exemplo n.º 19
0
def dsda_experiment_evaluation(datasets,
                               base_dir,
                               min_rt,
                               max_rt,
                               N,
                               isolation_window,
                               mz_tol,
                               rt_tol,
                               min_ms1_intensity,
                               mzmine_files=None,
                               rt_tolerance=100,
                               progress_bar=False):
    data_dir = os.path.join(base_dir, 'Data')
    schedule_dir = os.path.join(base_dir, 'settings')
    mass_spec = IndependentMassSpectrometer(
        POSITIVE, datasets[0])  # necessary to get timings for schedule
    create_dsda_schedule(mass_spec, N, min_rt, max_rt, base_dir)
    print('Please open and run R script now')
    time.sleep(1)
    template_file = os.path.join(base_dir, 'DsDA_Timing_schedule.csv')
    env_list = []
    mzml_files = []
    source_files = ['sample_' + "%03d" % i for i in range(len(datasets))]
    for i in range(len(datasets)):
        mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i])
        if i == 0:
            controller = TopNController(POSITIVE,
                                        N,
                                        isolation_window,
                                        mz_tol,
                                        rt_tol,
                                        min_ms1_intensity,
                                        ms1_shift=0,
                                        initial_exclusion_list=None,
                                        force_N=False)
        else:
            print('Looking for next schedule')
            new_schedule = get_schedule(i, schedule_dir)
            print('Found next schedule')
            time.sleep(1)
            schedule_param_list = dsda_get_scan_params(new_schedule,
                                                       template_file,
                                                       isolation_window,
                                                       mz_tol, rt_tol)
            controller = FixedScansController(schedule=schedule_param_list)
        env = Environment(mass_spec,
                          controller,
                          min_rt,
                          max_rt,
                          progress_bar=progress_bar)
        env.run()
        if progress_bar is False:
            print('Processed dataset ' + str(i))
        env_list.append(env)
        file_link = os.path.join(data_dir, source_files[i] + '.mzml')
        mzml_files.append(file_link)
        print("Processed ", i + 1, " files")
        env.write_mzML(data_dir, source_files[i] + '.mzml')
        print("Waiting for R to process .mzML files")
        if mzmine_files is None:
            evaluation = evaluate_multiple_simulated_env(env_list)
        else:
            roi_aligner = RoiAligner(rt_tolerance=rt_tolerance)
            for i in range(len(mzml_files)):
                roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i],
                                             source_files[i], 'mzmine')
            evaluation = evaluate_multi_peak_roi_aligner(
                roi_aligner, source_files)
        return env_list, evaluation
    else:
        return None, None
Exemplo n.º 20
0
    def test_TopN_controller_advanced_params(self):
        # set some values that are not the defaults, so we know they're passed correctly
        params = AdvancedParams(default_ms1_scan_window=(10.0, 2000.0),
                                ms1_agc_target=100000,
                                ms1_max_it=500,
                                ms1_collision_energy=200,
                                ms1_orbitrap_resolution=100000,
                                ms1_activation_type='CID',
                                ms1_mass_analyser='IonTrap',
                                ms1_isolation_mode='IonTrap',
                                ms1_source_cid_energy=10,
                                ms2_agc_target=50000,
                                ms2_max_it=250,
                                ms2_collision_energy=300,
                                ms2_orbitrap_resolution=100000,
                                ms2_activation_type='CID',
                                ms2_mass_analyser='IonTrap',
                                ms2_isolation_mode='IonTrap',
                                ms2_source_cid_energy=20)

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        # create a simulated mass spec without noise and Top-N controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, BEER_CHEMS)
        controller = TopNController(ionisation_mode,
                                    N,
                                    isolation_width,
                                    mz_tol,
                                    rt_tol,
                                    MIN_MS1_INTENSITY,
                                    params=params)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec,
                          controller,
                          BEER_MIN_BOUND,
                          BEER_MAX_BOUND,
                          progress_bar=True)
        run_environment(env)

        # check that some of the scan parameters returned are actually what we set
        # ms1 check
        scan = controller.scans[1][0]
        scan_params = scan.scan_params
        assert scan_params.get(
            ScanParameters.FIRST_MASS) == params.default_ms1_scan_window[0]
        assert scan_params.get(
            ScanParameters.LAST_MASS) == params.default_ms1_scan_window[1]
        assert scan_params.get(
            ScanParameters.AGC_TARGET) == params.ms1_agc_target
        assert scan_params.get(ScanParameters.MAX_IT) == params.ms1_max_it
        assert scan_params.get(
            ScanParameters.COLLISION_ENERGY) == params.ms1_collision_energy
        assert scan_params.get(ScanParameters.ORBITRAP_RESOLUTION
                               ) == params.ms1_orbitrap_resolution  # noqa
        assert scan_params.get(
            ScanParameters.ACTIVATION_TYPE) == params.ms1_activation_type
        assert scan_params.get(
            ScanParameters.MASS_ANALYSER) == params.ms1_mass_analyser
        assert scan_params.get(
            ScanParameters.ISOLATION_MODE) == params.ms1_isolation_mode
        assert scan_params.get(
            ScanParameters.SOURCE_CID_ENERGY) == params.ms1_source_cid_energy

        # ms2 check
        scan = controller.scans[2][0]
        scan_params = scan.scan_params
        assert scan_params.get(
            ScanParameters.AGC_TARGET) == params.ms2_agc_target
        assert scan_params.get(ScanParameters.MAX_IT) == params.ms2_max_it
        assert scan_params.get(
            ScanParameters.COLLISION_ENERGY) == params.ms2_collision_energy
        assert scan_params.get(ScanParameters.ORBITRAP_RESOLUTION
                               ) == params.ms2_orbitrap_resolution  # noqa
        assert scan_params.get(
            ScanParameters.ACTIVATION_TYPE) == params.ms2_activation_type
        assert scan_params.get(
            ScanParameters.MASS_ANALYSER) == params.ms2_mass_analyser
        assert scan_params.get(
            ScanParameters.ISOLATION_MODE) == params.ms2_isolation_mode
        assert scan_params.get(
            ScanParameters.SOURCE_CID_ENERGY) == params.ms2_source_cid_energy
Exemplo n.º 21
0
    def test_ms2_matching(self):
        rti = UniformRTAndIntensitySampler(min_rt=10, max_rt=20)
        fs = UniformMZFormulaSampler()
        adduct_prior_dict = {POSITIVE: {'M+H': 1}}

        cs = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=rti,
                                    adduct_prior_dict=adduct_prior_dict)
        d = cs.sample(300, 2)

        group_list = ['control', 'control', 'case', 'case']
        group_dict = {}
        group_dict['control'] = {
            'missing_probability': 0.0,
            'changing_probability': 0.0
        }
        group_dict['case'] = {
            'missing_probability': 0.0,
            'changing_probability': 1.0
        }

        mm = MultipleMixtureCreator(d, group_list, group_dict)

        cl = mm.generate_chemical_lists()

        N = 10
        isolation_width = 0.7
        mz_tol = 0.001
        rt_tol = 30
        min_ms1_intensity = 0

        set_log_level_warning()

        output_folder = os.path.join(OUT_DIR, 'ms2_matching')
        write_msp(d, 'mmm.msp', out_dir=output_folder)

        initial_exclusion_list = []
        for i, chem_list in enumerate(cl):
            controller = TopNController(
                POSITIVE,
                N,
                isolation_width,
                mz_tol,
                rt_tol,
                min_ms1_intensity,
                initial_exclusion_list=initial_exclusion_list)
            ms = IndependentMassSpectrometer(POSITIVE, chem_list)
            env = Environment(ms, controller, 10, 30, progress_bar=True)
            env.run()
            env.write_mzML(output_folder, '{}.mzML'.format(i))

            mz_intervals = list(
                controller.exclusion.exclusion_list.boxes_mz.items())
            rt_intervals = list(
                controller.exclusion.exclusion_list.boxes_rt.items())
            unique_items_mz = set(i.data for i in mz_intervals)
            unique_items_rt = set(i.data for i in rt_intervals)
            assert len(unique_items_mz) == len(unique_items_rt)

            initial_exclusion_list = list(unique_items_mz)
            logger.warning(len(initial_exclusion_list))

        set_log_level_debug()
        msp_file = os.path.join(output_folder, 'mmm.msp')
        # check with just the first file
        a, b = ms2_main(os.path.join(output_folder, '0.mzML'), msp_file, 1,
                        0.7)
        # check with all
        c, d = ms2_main(output_folder, os.path.join(output_folder, 'mmm.msp'),
                        1, 0.7)

        assert b == d
        assert c > a
Exemplo n.º 22
0
    if args.print_chems:
        logger.debug("Sampled chems")
        for chem in dataset:
            logger.debug(chem)

    if args.output_msp_file is not None:
        write_msp(dataset, args.output_msp_file)

    spike_noise = UniformSpikeNoise(0.01, args.spike_max)

    ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE,
                                     dataset,
                                     spike_noise=spike_noise)

    controller = TopNController(POSITIVE_IONISATION_MODE, 10, 0.7, 0.01, 15,
                                1e3)

    env = Environment(ms,
                      controller,
                      min_time=args.min_rt - 50,
                      max_time=args.max_rt + 50)

    set_log_level_warning()
    env.run()

    env.write_mzML(None, args.output_mzml_file)

    if args.output_swath_file is not None:
        sw = SWATH(args.min_mz, args.max_mz, 100, 0.0)
        ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE,
                                         dataset,
Exemplo n.º 23
0
def topn_processor():
    pathlist = []
    base_dir = 'documents/simple_ms1/example_data'
    # base_dir = 'example_data'
    mzml_path = os.path.join(base_dir, 'beers', 'fragmentation', 'mzML')
    file_name = 'Beer_multibeers_1_T10_POS.mzML'

    experiment_name = 'mzml_compare'
    experiment_out_dir = os.path.join(base_dir, 'results', experiment_name)
    min_rt = 0
    max_rt = 1441
    kde_min_ms1_intensity = 0  # min intensity to be selected for kdes
    kde_min_ms2_intensity = 0

    roi_mz_tol = 10
    roi_min_length = 1
    roi_min_intensity = 0
    roi_start_rt = min_rt
    roi_stop_rt = max_rt

    isolation_width = 1  # the (full) isolation width in Dalton around a selected precursor m/z
    ionisation_mode = POSITIVE
    N = 10
    rt_tol = 15
    mz_tol = 10
    min_ms1_intensity = 1.75E5  # minimum ms1 intensity to fragment

    mzml_filename = 'simulated.mzML'
    mzml_out = os.path.join(experiment_out_dir, mzml_filename)
    pathlist.append(mzml_out)

    print('#' * 10, 'Train densities')
    ds = DataSource()
    ds.load_data(mzml_path, file_name=file_name)
    bandwidth_mz_intensity_rt = 1.0
    bandwidth_n_peaks = 1.0
    ps = get_spectral_feature_database(ds, file_name, kde_min_ms1_intensity,
                                       kde_min_ms2_intensity, min_rt, max_rt,
                                       bandwidth_mz_intensity_rt,
                                       bandwidth_n_peaks)

    print('#' * 10, 'Extract all ROIs')
    mzml_file = os.path.join(mzml_path, file_name)
    good_roi, junk = make_roi(mzml_file,
                              mz_tol=roi_mz_tol,
                              mz_units='ppm',
                              min_length=roi_min_length,
                              min_intensity=roi_min_intensity,
                              start_rt=roi_start_rt,
                              stop_rt=roi_stop_rt)
    all_roi = good_roi + junk
    print('#' * 10, len(all_roi))

    keep = []
    for roi in all_roi:
        if np.count_nonzero(
                np.array(roi.intensity_list) > min_ms1_intensity) > 0:
            keep.append(roi)

    all_roi = keep

    set_log_level_debug()
    rtcc = RoiToChemicalCreator(ps, all_roi)
    data = rtcc.chemicals
    save_obj(data, os.path.join(experiment_out_dir, 'dataset.p'))

    set_log_level_warning()
    pbar = True
    mass_spec = IndependentMassSpectrometer(ionisation_mode, data, ps)
    controller = TopNController(ionisation_mode, N, isolation_width, mz_tol,
                                rt_tol, min_ms1_intensity)
    # create an environment to run both the mass spec and controller
    env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

    # set the log level to WARNING so we don't see too many messages when environment is running
    set_log_level_warning()

    # run the simulation
    env.run()
    set_log_level_debug()
    env.write_mzML(experiment_out_dir, mzml_filename)

    print('#' * 10, 'Compare Results')
    matplotlib.use('agg')
    simulated_input_file = mzml_out
    simulated_mzs, simulated_rts, simulated_intensities, simulated_cumsum_ms1, simulated_cumsum_ms2 = count_stuff(
        simulated_input_file, min_rt, max_rt)

    real_input_file = mzml_file
    real_mzs, real_rts, real_intensities, real_cumsum_ms1, real_cumsum_ms2 = count_stuff(
        real_input_file, min_rt, max_rt)

    plt.rcParams.update({'font.size': 14})
    out_file = os.path.join(base_dir, 'results', 'topN_num_scans.png')
    pathlist.append(out_file)
    plot_num_scans(real_cumsum_ms1, real_cumsum_ms2, simulated_cumsum_ms1,
                   simulated_cumsum_ms2, out_file)

    mz_tol = None  # in ppm. if None, then 2 decimal places is used for matching the m/z
    rt_tol = 5  # seconds
    matches = match_peaklist(real_mzs, real_rts, real_intensities,
                             simulated_mzs, simulated_rts,
                             simulated_intensities, mz_tol, rt_tol)
    check_found_matches(matches, 'Real', 'Simulated')

    mz_tol = None
    rt_tol = 10
    matches = match_peaklist(real_mzs, real_rts, real_intensities,
                             simulated_mzs, simulated_rts,
                             simulated_intensities, mz_tol, rt_tol)
    check_found_matches(matches, 'Real', 'Simulated')

    mz_tol = None
    rt_tol = 15
    matches = match_peaklist(real_mzs, real_rts, real_intensities,
                             simulated_mzs, simulated_rts,
                             simulated_intensities, mz_tol, rt_tol)
    check_found_matches(matches, 'Real', 'Simulated')

    unmatched_intensities = []
    matched_intensities = []
    for key, value in list(matches.items()):
        intensity = key[2]
        if value is None:
            unmatched_intensities.append(intensity)
        else:
            matched_intensities.append(intensity)
    plt.rcParams.update({'font.size': 18})

    out_file = os.path.join(base_dir, 'results',
                            'topN_matched_intensities.png')
    plot_matched_intensities(matched_intensities, unmatched_intensities,
                             out_file)
    pathlist.append(out_file)
    out_file = os.path.join(base_dir, 'results', 'topN_matched_precursors.png')
    plot_matched_precursors(matches, 50, 1000, 180, 1260, out_file)
    pathlist.append(out_file)
    return pathlist
Exemplo n.º 24
0
    def test_TopN_controller_with_simulated_chems(self):
        logger.info('Testing Top-N controller with simulated chemicals')

        # create some chemical objects
        chems = ChemicalCreator(self.ps, ROI_Sources, hmdb)
        dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems, self.ms_level,
                               get_children_method=GET_MS2_BY_PEAKS)
        self.assertEqual(len(dataset), n_chems)

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        # create a simulated mass spec without noise and Top-N controller
        logger.info('Without noise')
        mass_spec = IndependentMassSpectrometer(ionisation_mode, dataset, self.ps, add_noise=False)
        controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'topN_controller_simulated_chems_no_noise.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))

        # create a simulated mass spec with noise and Top-N controller
        logger.info('With noise')
        mass_spec = IndependentMassSpectrometer(ionisation_mode, dataset, self.ps, add_noise=True)
        controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'topN_controller_simulated_chems_with_noise.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()