Exemplo n.º 1
0
    def test_roi_controller_with_beer_chems(self):
        logger.info('Testing ROI controller with QC beer chemicals')

        isolation_width = 1  # the isolation window in Dalton around a selected precursor ion
        N = 10
        rt_tol = 15
        mz_tol = 10
        min_roi_intensity = 5000
        min_roi_length = 10
        ionisation_mode = POSITIVE

        # create a simulated mass spec with noise and ROI controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, beer_chems, self.ps, add_noise=True)
        controller = RoiController(ionisation_mode, isolation_width, mz_tol, min_ms1_intensity,
                                   min_roi_intensity, min_roi_length, "Top N", N, rt_tol)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'roi_controller_qcbeer_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 2
0
    def test_TopN_controller_with_beer_chems(self):
        logger.info('Testing Top-N controller with QC beer chemicals')

        isolation_width = 1
        N = 10
        rt_tol = 15
        mz_tol = 10
        ionisation_mode = POSITIVE

        # create a simulated mass spec without noise and Top-N controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, beer_chems, self.ps, add_noise=False)
        controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'topN_controller_qcbeer_chems_no_noise.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 3
0
    def generate_mzmls(self, output_dir, params):
        scan_duration_dicts = self.time_gen(params)
        if (self.filenames is None):
            self.filenames = [
                os.path.join(output_dir, "time_exp_data_{:04d}.mzML".format(i))
                for i, _ in enumerate(scan_duration_dicts)
            ]
        self.file_counter += len(scan_duration_dicts)
        if (len(params) != len(self.filenames)):
            raise ValueError(
                "Parameter and filename list not the same length!")

        for f, d in zip(self.filenames, scan_duration_dicts):
            mass_spec = IndependentMassSpectrometer(POSITIVE,
                                                    self.chems,
                                                    None,
                                                    scan_duration_dict=d)
            controller = SimpleMs1Controller()

            env = Environment(mass_spec,
                              controller,
                              self.min_rt,
                              self.max_rt,
                              progress_bar=True)
            set_log_level_warning()
            env.run()

            set_log_level_warning()
            env.write_mzML(output_dir, os.path.basename(f))
Exemplo n.º 4
0
    def test_ms1_controller_with_simulated_chems(self):
        logger.info('Testing MS1 controller with simulated chemicals')

        # create some chemical objects
        chems = ChemicalCreator(self.ps, ROI_Sources, hmdb)
        dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems, self.ms_level)
        self.assertEqual(len(dataset), n_chems)

        # create a simulated mass spec and MS1 controller
        mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, self.ps)
        controller = SimpleMs1Controller()

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'ms1_controller_simulated_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 5
0
 def run_experiment(self, idx):
     controller_name = self.controller_schedule['Sample ID'][idx]
     mzml_files = glob.glob(os.path.join(self.base_dir, '*.mzML'))
     if controller_name + '.mzML' not in [
             os.path.basename(file) for file in mzml_files
     ]:
         controller, ms_params = super().run_experiment(idx)
         # load data and set up MS
         logger.info(self.controller_schedule.iloc[[idx]].to_dict())
         method = self.controller_schedule['Controller Method'][idx]
         dataset = self.controller_schedule['Dataset'][idx]
         if method is not None and dataset is not None:
             dataset = load_obj(self.controller_schedule['Dataset'][idx])
             mass_spec = IndependentMassSpectrometer(
                 ms_params['ionisation_mode'], dataset)
             # Run sample
             env = Environment(mass_spec,
                               controller,
                               self.rt_range[0][0],
                               self.rt_range[0][1],
                               progress_bar=self.progress_bar)
             env.run()
             env.write_mzML(self.base_dir, controller_name + '.mzML')
             if self.write_env:
                 save_obj(
                     controller,
                     os.path.join(self.base_dir, controller_name + '.p'))
     else:
         logger.info('Experiment already completed. Skipping...')
     mzml_file = os.path.join(self.base_dir, controller_name + '.mzML')
     return mzml_file, controller_name
Exemplo n.º 6
0
    def test_ms1_controller_with_qcbeer_chems(self):
        logger.info('Testing MS1 controller with QC beer chemicals')

        # create a simulated mass spec and MS1 controller
        mass_spec = IndependentMassSpectrometer(POSITIVE, beer_chems, self.ps)
        controller = SimpleMs1Controller()

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'ms1_controller_qcbeer_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 7
0
def simple_ms1_processor():
    print(
        '#' * 10,
        'Load previously trained spectral feature database and the list of extracted metabolites, \
        created in 01. Download Data')
    #-----------------
    mypath = 'documents/simple_ms1/example_data'
    #-----------------
    base_dir = os.path.abspath(mypath)
    ps = load_obj(Path(base_dir, 'peak_sampler_mz_rt_int_19_beers_fullscan.p'))
    hmdb = load_obj(Path(base_dir, 'hmdb_compounds.p'))

    # set_log_level_debug()
    out_dir = Path(base_dir, 'results', 'MS1_single')
    # the list of ROI sources created in the previous notebook '01. Download Data.ipynb'
    ROI_Sources = [
        str(Path(base_dir, 'DsDA', 'DsDA_Beer', 'beer_t10_simulator_files'))
    ]

    # minimum MS1 intensity of chemicals
    min_ms1_intensity = 1.75E5

    # m/z and RT range of chemicals
    rt_range = [(0, 1440)]
    mz_range = [(0, 1050)]

    # the number of chemicals in the sample
    n_chems = 6500

    # maximum MS level (we do not generate fragmentation peaks when this value is 1)
    ms_level = 1

    chems = ChemicalCreator(ps, ROI_Sources, hmdb)
    dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems,
                           ms_level)
    save_obj(dataset, Path(out_dir, 'dataset.p'))

    for chem in dataset[0:10]:
        print(chem)
    print('#' * 10,
          'Run MS1 controller on the samples and generate .mzML files')
    min_rt = rt_range[0][0]
    max_rt = rt_range[0][1]

    mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, ps)
    controller = SimpleMs1Controller()

    # create an environment to run both the mass spec and controller
    env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

    # set the log level to WARNING so we don't see too many messages when environment is running
    set_log_level_warning()

    # run the simulation
    env.run()
    set_log_level_debug()
    mzml_filename = 'ms1_controller.mzML'
    env.write_mzML(out_dir, mzml_filename)
    return str(Path(mypath, 'results', 'MS1_single')) + '/' + mzml_filename
Exemplo n.º 8
0
def top_n_roi_experiment_evaluation(datasets,
                                    min_rt,
                                    max_rt,
                                    N,
                                    isolation_window,
                                    mz_tol,
                                    rt_tol,
                                    min_ms1_intensity,
                                    min_roi_intensity,
                                    min_roi_length,
                                    base_chemicals=None,
                                    mzmine_files=None,
                                    rt_tolerance=100,
                                    experiment_dir=None,
                                    progress_bar=False):
    if base_chemicals is not None or mzmine_files is not None:
        env_list = []
        mzml_files = []
        source_files = ['sample_' + str(i) for i in range(len(datasets))]
        for i in range(len(datasets)):
            mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i])
            controller = TopN_RoiController(POSITIVE,
                                            isolation_window,
                                            mz_tol,
                                            min_ms1_intensity,
                                            min_roi_intensity,
                                            min_roi_length,
                                            N=N,
                                            rt_tol=rt_tol)
            env = Environment(mass_spec,
                              controller,
                              min_rt,
                              max_rt,
                              progress_bar=progress_bar)
            env.run()
            if progress_bar is False:
                print('Processed dataset ' + str(i))
            env_list.append(env)
            if base_chemicals is None:
                file_link = os.path.join(experiment_dir,
                                         source_files[i] + '.mzml')
                mzml_files.append(file_link)
                env.write_mzML(experiment_dir, source_files[i] + '.mzml')
        if base_chemicals is not None:
            evaluation = evaluate_multiple_simulated_env(
                env_list, base_chemicals=base_chemicals)
        else:
            roi_aligner = RoiAligner(rt_tolerance=rt_tolerance)
            for i in range(len(mzml_files)):
                roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i],
                                             source_files[i], 'mzmine')
            evaluation = evaluate_multi_peak_roi_aligner(
                roi_aligner, source_files)
        return env_list, evaluation
    else:
        return None, None
Exemplo n.º 9
0
def run_env(mass_spec, controller, min_rt, max_rt, mzml_file):
    env = Environment(mass_spec, controller, min_rt, max_rt)
    env.run()
    env.write_mzML(None, mzml_file)
    chems = [
        event.chem.__repr__() for event in env.mass_spec.fragmentation_events
        if event.ms_level > 1
    ]
    chemical_coverage = len(np.unique(np.array(chems))) / len(
        env.mass_spec.chemicals)
    return chemical_coverage
Exemplo n.º 10
0
    def test_acquisition(self, two_fixed_chems):
        mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems]
        schedule = []
        # env = Environment()
        isolation_width = DEFAULT_ISOLATION_WIDTH
        mz_tol = 0.1
        rt_tol = 15

        min_rt = 110
        max_rt = 112

        ionisation_mode = POSITIVE

        controller = FixedScansController()
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                two_fixed_chems)
        env = Environment(mass_spec, controller, min_rt, max_rt)

        ms1_scan = get_default_scan_params(polarity=ionisation_mode)
        ms2_scan_1 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_2 = get_dda_scan_param(mz_to_target[1],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_3 = get_dda_scan_param(mz_to_target, [0.0, 0.0],
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)

        schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3]
        controller.set_tasks(schedule)
        set_log_level_warning()
        env.run()
        assert len(controller.scans[2]) == 3

        n_peaks = []
        for scan in controller.scans[2]:
            n_peaks.append(scan.num_peaks)

        assert n_peaks[0] > 0
        assert n_peaks[1] > 0
        assert n_peaks[2] == n_peaks[0] + n_peaks[1]
        env.write_mzML(OUT_DIR, 'multi_windows.mzML')
Exemplo n.º 11
0
    def test_hybrid_controller_with_beer_chems(self):
        logger.info('Testing hybrid controller with QC beer chemicals')

        isolation_window = [1]  # the isolation window in Dalton around a selected precursor ion
        N = [5]
        rt_tol = [10]
        mz_tol = [10]
        min_ms1_intensity = 1.75E5
        scan_param_changepoints = None
        rt_range = [(0, 400)]
        min_rt = rt_range[0][0]
        max_rt = rt_range[0][1]
        n_purity_scans = N[0]
        purity_shift = 0.2
        purity_threshold = 1

        # these settings change the Mass Spec type. They arent necessary to run the Top-N ROI Controller
        isolation_transition_window = 'gaussian'
        isolation_transition_window_params = [0.5]

        purity_add_ms1 = True  # this seems to be the broken bit
        purity_randomise = True

        mass_spec = IndependentMassSpectrometer(POSITIVE, beer_chems, self.ps, add_noise=True,
                                                isolation_transition_window=isolation_transition_window,
                                                isolation_transition_window_params=isolation_transition_window_params)
        controller = HybridController(mass_spec, N, scan_param_changepoints, isolation_window, mz_tol, rt_tol,
                                      min_ms1_intensity, n_purity_scans, purity_shift, purity_threshold,
                                      purity_add_ms1=purity_add_ms1, purity_randomise=purity_randomise)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'hybrid_controller_qcbeer_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 12
0
def run_experiment(param):
    '''
    Runs a Top-N experiment
    :param param: the experimental parameters
    :return: the analysis name that has been successfully ran
    '''
    analysis_name = param['analysis_name']
    mzml_out = param['mzml_out']
    pickle_out = param['pickle_out']
    N = param['N']
    rt_tol = param['rt_tol']

    if os.path.isfile(mzml_out) and os.path.isfile(pickle_out):
        logger.debug('Skipping %s' % (analysis_name))
    else:
        logger.debug('Processing %s' % (analysis_name))
        peak_sampler = param['peak_sampler']
        if peak_sampler is None:  # extract density from the fragmenatation file
            mzml_path = param['mzml_path']
            fragfiles = param['fragfiles']
            fragfile = fragfiles[(
                N,
                rt_tol,
            )]
            min_rt = param['min_rt']
            max_rt = param['max_rt']
            peak_sampler = get_peak_sampler(mzml_path, fragfile, min_rt,
                                            max_rt)

        mass_spec = IndependentMassSpectrometer(param['ionisation_mode'],
                                                param['data'])
        controller = TopNController(param['ionisation_mode'], param['N'],
                                    param['isolation_width'], param['mz_tol'],
                                    param['rt_tol'],
                                    param['min_ms1_intensity'])
        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec,
                          controller,
                          param['min_rt'],
                          param['max_rt'],
                          progress_bar=param['pbar'])
        set_log_level_warning()
        env.run()
        set_log_level_debug()
        env.write_mzML(None, mzml_out)
        save_obj(controller, pickle_out)
        return analysis_name
Exemplo n.º 13
0
    def test_FixedScansController(self, two_fixed_chems):
        logger.info('Testing FixedScansController')
        mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems]
        isolation_width = DEFAULT_ISOLATION_WIDTH
        mz_tol = 0.1
        rt_tol = 15
        min_rt = 110
        max_rt = 112
        ionisation_mode = POSITIVE

        controller = FixedScansController(schedule=None)
        mass_spec = IndependentMassSpectrometer(ionisation_mode,
                                                two_fixed_chems)
        env = Environment(mass_spec, controller, min_rt, max_rt)

        ms1_scan = get_default_scan_params(polarity=ionisation_mode)
        ms2_scan_1 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_2 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        ms2_scan_3 = get_dda_scan_param(mz_to_target[0],
                                        0.0,
                                        None,
                                        isolation_width,
                                        mz_tol,
                                        rt_tol,
                                        polarity=ionisation_mode)
        schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3]
        controller.set_tasks(schedule)
        set_log_level_warning()
        env.run()

        assert len(controller.scans[1]) == 1
        assert len(controller.scans[2]) == 3
        for scan in controller.scans[2]:
            assert scan.num_peaks > 0
        env.write_mzML(OUT_DIR, 'fixedScansController.mzML')
Exemplo n.º 14
0
    def test_roi_controller_with_simulated_chems(self):
        logger.info('Testing ROI controller with simulated chemicals')

        # create some chemical objects
        chems = ChemicalCreator(self.ps, ROI_Sources, hmdb)
        dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems, self.ms_level,
                               get_children_method=GET_MS2_BY_SPECTRA)
        self.assertEqual(len(dataset), n_chems)

        isolation_width = 1  # the isolation window in Dalton around a selected precursor ion
        N = 10
        rt_tol = 15
        mz_tol = 10
        min_roi_intensity = 5000
        min_roi_length = 10
        ionisation_mode = POSITIVE

        # create a simulated mass spec with noise and ROI controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, dataset, self.ps, add_noise=True)
        controller = RoiController(ionisation_mode, isolation_width, mz_tol, min_ms1_intensity,
                                   min_roi_intensity, min_roi_length, "Top N", N, rt_tol)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'roi_controller_simulated_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 15
0
    def test_hybrid_controller_with_simulated_chems(self):
        logger.info('Testing hybrid controller with simulated chemicals')

        # create some chemical objects
        chems = ChemicalCreator(self.ps, ROI_Sources, hmdb)
        dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems, self.ms_level,
                               get_children_method=GET_MS2_BY_PEAKS)
        self.assertEqual(len(dataset), n_chems)

        # set different isolation widths, Ns, dynamic exclusion RT and mz tolerances at different timepoints
        isolation_widths = [1, 1, 1, 1]
        N = [5, 10, 15, 20]
        rt_tol = [15, 30, 60, 120]
        mz_tol = [10, 5, 15, 20]
        scan_param_changepoints = [300, 600, 900]  # the timepoints when we will change the 4 parameters above
        ionisation_mode = POSITIVE

        # create a simulated mass spec with noise and Hybrid controller
        mass_spec = IndependentMassSpectrometer(ionisation_mode, dataset, self.ps, add_noise=True)
        controller = HybridController(ionisation_mode, N, scan_param_changepoints, isolation_widths, mz_tol, rt_tol,
                                      min_ms1_intensity)

        # create an environment to run both the mass spec and controller
        env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

        # set the log level to WARNING so we don't see too many messages when environment is running
        set_log_level_warning()

        # run the simulation
        env.run()

        # set the log level back to DEBUG
        set_log_level_debug()

        # write simulated output to mzML file
        filename = 'hybrid_controller_simulated_chems.mzML'
        out_file = os.path.join(out_dir, filename)
        env.write_mzML(out_dir, filename)
        self.assertTrue(os.path.exists(out_file))
        print()
Exemplo n.º 16
0
def case_control_non_overlap_experiment_evaluation(
        datasets,
        min_rt,
        max_rt,
        N,
        isolation_window,
        mz_tol,
        rt_tol,
        min_ms1_intensity,
        min_roi_intensity,
        min_roi_length,
        rt_box_size,
        mz_box_size,
        min_roi_length_for_fragmentation,
        scoring_params=None,
        base_chemicals=None,
        mzmine_files=None,
        rt_tolerance=100,
        experiment_dir=None,
        box_method='mean',
        roi_type=ROI_TYPE_NORMAL,
        reset_length_seconds=1e6,
        intensity_increase_factor=10,
        drop_perc=0.1 / 100,
        exclusion_method=ROI_EXCLUSION_DEW,
        exclusion_t_0=None,
        progress_bar=False):
    if base_chemicals is not None or mzmine_files is not None:
        env_list = []
        grid = CaseControlGridEstimator(AllOverlapGrid(min_rt, max_rt,
                                                       rt_box_size, 0, 3000,
                                                       mz_box_size),
                                        IdentityDrift(),
                                        rt_tolerance=rt_tolerance,
                                        box_method=box_method)
        mzml_files = []
        source_files = ['sample_' + str(i) for i in range(len(datasets))]
        for i in range(len(datasets)):
            mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i])
            controller = FlexibleNonOverlapController(
                POSITIVE,
                isolation_window,
                mz_tol,
                min_ms1_intensity,
                min_roi_intensity,
                min_roi_length,
                N,
                grid,
                rt_tol=rt_tol,
                min_roi_length_for_fragmentation=
                min_roi_length_for_fragmentation,
                scoring_params=scoring_params,
                roi_type=roi_type,
                reset_length_seconds=reset_length_seconds,
                intensity_increase_factor=intensity_increase_factor,
                drop_perc=drop_perc,
                exclusion_method=exclusion_method,
                exclusion_t_0=exclusion_t_0)
            env = Environment(mass_spec,
                              controller,
                              min_rt,
                              max_rt,
                              progress_bar=progress_bar)
            env.run()
            if progress_bar is False:
                print('Processed dataset ' + str(i))
            env_list.append(env)
            if base_chemicals is None:
                file_link = os.path.join(experiment_dir,
                                         source_files[i] + '.mzml')
                mzml_files.append(file_link)
                env.write_mzML(experiment_dir, source_files[i] + '.mzml')
        if base_chemicals is not None:
            evaluation = evaluate_multiple_simulated_env(
                env_list, base_chemicals=base_chemicals)
        else:
            roi_aligner = FrequentistRoiAligner(rt_tolerance=rt_tolerance)
            for i in range(len(mzml_files)):
                roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i],
                                             source_files[i], 'mzmine')
            evaluation = evaluate_multi_peak_roi_aligner(
                roi_aligner, source_files, True)
        return env_list, evaluation
    else:
        return None, None
Exemplo n.º 17
0
def dsda_experiment_evaluation(datasets,
                               base_dir,
                               min_rt,
                               max_rt,
                               N,
                               isolation_window,
                               mz_tol,
                               rt_tol,
                               min_ms1_intensity,
                               mzmine_files=None,
                               rt_tolerance=100,
                               progress_bar=False):
    data_dir = os.path.join(base_dir, 'Data')
    schedule_dir = os.path.join(base_dir, 'settings')
    mass_spec = IndependentMassSpectrometer(
        POSITIVE, datasets[0])  # necessary to get timings for schedule
    create_dsda_schedule(mass_spec, N, min_rt, max_rt, base_dir)
    print('Please open and run R script now')
    time.sleep(1)
    template_file = os.path.join(base_dir, 'DsDA_Timing_schedule.csv')
    env_list = []
    mzml_files = []
    source_files = ['sample_' + "%03d" % i for i in range(len(datasets))]
    for i in range(len(datasets)):
        mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i])
        if i == 0:
            controller = TopNController(POSITIVE,
                                        N,
                                        isolation_window,
                                        mz_tol,
                                        rt_tol,
                                        min_ms1_intensity,
                                        ms1_shift=0,
                                        initial_exclusion_list=None,
                                        force_N=False)
        else:
            print('Looking for next schedule')
            new_schedule = get_schedule(i, schedule_dir)
            print('Found next schedule')
            time.sleep(1)
            schedule_param_list = dsda_get_scan_params(new_schedule,
                                                       template_file,
                                                       isolation_window,
                                                       mz_tol, rt_tol)
            controller = FixedScansController(schedule=schedule_param_list)
        env = Environment(mass_spec,
                          controller,
                          min_rt,
                          max_rt,
                          progress_bar=progress_bar)
        env.run()
        if progress_bar is False:
            print('Processed dataset ' + str(i))
        env_list.append(env)
        file_link = os.path.join(data_dir, source_files[i] + '.mzml')
        mzml_files.append(file_link)
        print("Processed ", i + 1, " files")
        env.write_mzML(data_dir, source_files[i] + '.mzml')
        print("Waiting for R to process .mzML files")
        if mzmine_files is None:
            evaluation = evaluate_multiple_simulated_env(env_list)
        else:
            roi_aligner = RoiAligner(rt_tolerance=rt_tolerance)
            for i in range(len(mzml_files)):
                roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i],
                                             source_files[i], 'mzmine')
            evaluation = evaluate_multi_peak_roi_aligner(
                roi_aligner, source_files)
        return env_list, evaluation
    else:
        return None, None
Exemplo n.º 18
0
    def test_ms2_matching(self):
        rti = UniformRTAndIntensitySampler(min_rt=10, max_rt=20)
        fs = UniformMZFormulaSampler()
        adduct_prior_dict = {POSITIVE: {'M+H': 1}}

        cs = ChemicalMixtureCreator(fs,
                                    rt_and_intensity_sampler=rti,
                                    adduct_prior_dict=adduct_prior_dict)
        d = cs.sample(300, 2)

        group_list = ['control', 'control', 'case', 'case']
        group_dict = {}
        group_dict['control'] = {
            'missing_probability': 0.0,
            'changing_probability': 0.0
        }
        group_dict['case'] = {
            'missing_probability': 0.0,
            'changing_probability': 1.0
        }

        mm = MultipleMixtureCreator(d, group_list, group_dict)

        cl = mm.generate_chemical_lists()

        N = 10
        isolation_width = 0.7
        mz_tol = 0.001
        rt_tol = 30
        min_ms1_intensity = 0

        set_log_level_warning()

        output_folder = os.path.join(OUT_DIR, 'ms2_matching')
        write_msp(d, 'mmm.msp', out_dir=output_folder)

        initial_exclusion_list = []
        for i, chem_list in enumerate(cl):
            controller = TopNController(
                POSITIVE,
                N,
                isolation_width,
                mz_tol,
                rt_tol,
                min_ms1_intensity,
                initial_exclusion_list=initial_exclusion_list)
            ms = IndependentMassSpectrometer(POSITIVE, chem_list)
            env = Environment(ms, controller, 10, 30, progress_bar=True)
            env.run()
            env.write_mzML(output_folder, '{}.mzML'.format(i))

            mz_intervals = list(
                controller.exclusion.exclusion_list.boxes_mz.items())
            rt_intervals = list(
                controller.exclusion.exclusion_list.boxes_rt.items())
            unique_items_mz = set(i.data for i in mz_intervals)
            unique_items_rt = set(i.data for i in rt_intervals)
            assert len(unique_items_mz) == len(unique_items_rt)

            initial_exclusion_list = list(unique_items_mz)
            logger.warning(len(initial_exclusion_list))

        set_log_level_debug()
        msp_file = os.path.join(output_folder, 'mmm.msp')
        # check with just the first file
        a, b = ms2_main(os.path.join(output_folder, '0.mzML'), msp_file, 1,
                        0.7)
        # check with all
        c, d = ms2_main(output_folder, os.path.join(output_folder, 'mmm.msp'),
                        1, 0.7)

        assert b == d
        assert c > a
Exemplo n.º 19
0
def topn_processor():
    pathlist = []
    base_dir = 'documents/simple_ms1/example_data'
    # base_dir = 'example_data'
    mzml_path = os.path.join(base_dir, 'beers', 'fragmentation', 'mzML')
    file_name = 'Beer_multibeers_1_T10_POS.mzML'

    experiment_name = 'mzml_compare'
    experiment_out_dir = os.path.join(base_dir, 'results', experiment_name)
    min_rt = 0
    max_rt = 1441
    kde_min_ms1_intensity = 0  # min intensity to be selected for kdes
    kde_min_ms2_intensity = 0

    roi_mz_tol = 10
    roi_min_length = 1
    roi_min_intensity = 0
    roi_start_rt = min_rt
    roi_stop_rt = max_rt

    isolation_width = 1  # the (full) isolation width in Dalton around a selected precursor m/z
    ionisation_mode = POSITIVE
    N = 10
    rt_tol = 15
    mz_tol = 10
    min_ms1_intensity = 1.75E5  # minimum ms1 intensity to fragment

    mzml_filename = 'simulated.mzML'
    mzml_out = os.path.join(experiment_out_dir, mzml_filename)
    pathlist.append(mzml_out)

    print('#' * 10, 'Train densities')
    ds = DataSource()
    ds.load_data(mzml_path, file_name=file_name)
    bandwidth_mz_intensity_rt = 1.0
    bandwidth_n_peaks = 1.0
    ps = get_spectral_feature_database(ds, file_name, kde_min_ms1_intensity,
                                       kde_min_ms2_intensity, min_rt, max_rt,
                                       bandwidth_mz_intensity_rt,
                                       bandwidth_n_peaks)

    print('#' * 10, 'Extract all ROIs')
    mzml_file = os.path.join(mzml_path, file_name)
    good_roi, junk = make_roi(mzml_file,
                              mz_tol=roi_mz_tol,
                              mz_units='ppm',
                              min_length=roi_min_length,
                              min_intensity=roi_min_intensity,
                              start_rt=roi_start_rt,
                              stop_rt=roi_stop_rt)
    all_roi = good_roi + junk
    print('#' * 10, len(all_roi))

    keep = []
    for roi in all_roi:
        if np.count_nonzero(
                np.array(roi.intensity_list) > min_ms1_intensity) > 0:
            keep.append(roi)

    all_roi = keep

    set_log_level_debug()
    rtcc = RoiToChemicalCreator(ps, all_roi)
    data = rtcc.chemicals
    save_obj(data, os.path.join(experiment_out_dir, 'dataset.p'))

    set_log_level_warning()
    pbar = True
    mass_spec = IndependentMassSpectrometer(ionisation_mode, data, ps)
    controller = TopNController(ionisation_mode, N, isolation_width, mz_tol,
                                rt_tol, min_ms1_intensity)
    # create an environment to run both the mass spec and controller
    env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

    # set the log level to WARNING so we don't see too many messages when environment is running
    set_log_level_warning()

    # run the simulation
    env.run()
    set_log_level_debug()
    env.write_mzML(experiment_out_dir, mzml_filename)

    print('#' * 10, 'Compare Results')
    matplotlib.use('agg')
    simulated_input_file = mzml_out
    simulated_mzs, simulated_rts, simulated_intensities, simulated_cumsum_ms1, simulated_cumsum_ms2 = count_stuff(
        simulated_input_file, min_rt, max_rt)

    real_input_file = mzml_file
    real_mzs, real_rts, real_intensities, real_cumsum_ms1, real_cumsum_ms2 = count_stuff(
        real_input_file, min_rt, max_rt)

    plt.rcParams.update({'font.size': 14})
    out_file = os.path.join(base_dir, 'results', 'topN_num_scans.png')
    pathlist.append(out_file)
    plot_num_scans(real_cumsum_ms1, real_cumsum_ms2, simulated_cumsum_ms1,
                   simulated_cumsum_ms2, out_file)

    mz_tol = None  # in ppm. if None, then 2 decimal places is used for matching the m/z
    rt_tol = 5  # seconds
    matches = match_peaklist(real_mzs, real_rts, real_intensities,
                             simulated_mzs, simulated_rts,
                             simulated_intensities, mz_tol, rt_tol)
    check_found_matches(matches, 'Real', 'Simulated')

    mz_tol = None
    rt_tol = 10
    matches = match_peaklist(real_mzs, real_rts, real_intensities,
                             simulated_mzs, simulated_rts,
                             simulated_intensities, mz_tol, rt_tol)
    check_found_matches(matches, 'Real', 'Simulated')

    mz_tol = None
    rt_tol = 15
    matches = match_peaklist(real_mzs, real_rts, real_intensities,
                             simulated_mzs, simulated_rts,
                             simulated_intensities, mz_tol, rt_tol)
    check_found_matches(matches, 'Real', 'Simulated')

    unmatched_intensities = []
    matched_intensities = []
    for key, value in list(matches.items()):
        intensity = key[2]
        if value is None:
            unmatched_intensities.append(intensity)
        else:
            matched_intensities.append(intensity)
    plt.rcParams.update({'font.size': 18})

    out_file = os.path.join(base_dir, 'results',
                            'topN_matched_intensities.png')
    plot_matched_intensities(matched_intensities, unmatched_intensities,
                             out_file)
    pathlist.append(out_file)
    out_file = os.path.join(base_dir, 'results', 'topN_matched_precursors.png')
    plot_matched_precursors(matches, 50, 1000, 180, 1260, out_file)
    pathlist.append(out_file)
    return pathlist
def dia_processor():
    # data_dir = os.path.abspath(os.path.join(os.getcwd(),'..','..','tests','integration','fixtures'))
    # print(data_dir)
    mypath = 'documents/prepared_data_dia'
    data_dir = os.path.join(os.getcwd(), mypath)

    dataset = load_obj(os.path.join(data_dir, 'QCB_22May19_1.p'))
    ps = load_obj(
        Path(data_dir, 'peak_sampler_mz_rt_int_beerqcb_fragmentation.p'))

    rt_range = [(0, 1440)]
    min_rt = rt_range[0][0]
    max_rt = rt_range[0][1]

    dia_design = 'basic'
    window_type = 'even'
    kaufmann_design = None
    extra_bins = 0
    num_windows = 1

    mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, ps)
    controller = TreeController(dia_design, window_type, kaufmann_design,
                                extra_bins, num_windows)

    # create an environment to run both the mass spec and controller
    env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

    # set the log level to WARNING so we don't see too many messages when environment is running
    set_log_level_warning()

    print('#' * 10, 'run the simulation')
    env.run()

    print('#' * 10, 'Run Fixed Window DIA')
    rt_range = [(0, 1440)]
    min_rt = rt_range[0][0]
    max_rt = rt_range[0][1]

    dia_design = 'basic'
    window_type = 'even'
    kaufmann_design = None
    extra_bins = 0
    num_windows = 10

    mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, ps)
    controller = TreeController(dia_design, window_type, kaufmann_design,
                                extra_bins, num_windows)

    # create an environment to run both the mass spec and controller
    env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

    # set the log level to WARNING so we don't see too many messages when environment is running
    set_log_level_warning()

    # run the simulation
    env.run()

    print('#' * 10, 'Run Tree DIA method of Kauffman and Walker')
    rt_range = [(0, 1440)]
    min_rt = rt_range[0][0]
    max_rt = rt_range[0][1]

    dia_design = 'kaufmann'
    window_type = 'even'
    kaufmann_design = 'tree'
    extra_bins = 0
    num_windows = 10

    mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, ps)
    controller = TreeController(dia_design, window_type, kaufmann_design,
                                extra_bins, num_windows)
    # create an environment to run both the mass spec and controller
    env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

    # set the log level to WARNING so we don't see too many messages when environment is running
    set_log_level_warning()

    # run the simulation
    env.run()
    print('#' * 10, 'Run Nested DIA method of Kauffman and Walker')
    rt_range = [(0, 1440)]
    min_rt = rt_range[0][0]
    max_rt = rt_range[0][1]

    dia_design = 'kaufmann'
    window_type = 'even'
    kaufmann_design = 'nested'
    extra_bins = 0

    mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, ps)
    controller = TreeController(dia_design, window_type, kaufmann_design,
                                extra_bins, num_windows)
    num_windows = 10
    # create an environment to run both the mass spec and controller
    env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True)

    # set the log level to WARNING so we don't see too many messages when environment is running
    set_log_level_warning()

    # run the simulation
    env.run()
    mzml_filename = 'dia_controller.mzML'
    out_dir = os.path.join(data_dir, 'results')
    env.write_mzML(out_dir, mzml_filename)
    return str(Path(mypath, 'results')) + '/' + mzml_filename
Exemplo n.º 21
0
    spike_noise = UniformSpikeNoise(0.01, args.spike_max)

    ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE,
                                     dataset,
                                     spike_noise=spike_noise)

    controller = TopNController(POSITIVE_IONISATION_MODE, 10, 0.7, 0.01, 15,
                                1e3)

    env = Environment(ms,
                      controller,
                      min_time=args.min_rt - 50,
                      max_time=args.max_rt + 50)

    set_log_level_warning()
    env.run()

    env.write_mzML(None, args.output_mzml_file)

    if args.output_swath_file is not None:
        sw = SWATH(args.min_mz, args.max_mz, 100, 0.0)
        ms = IndependentMassSpectrometer(POSITIVE_IONISATION_MODE,
                                         dataset,
                                         spike_noise=spike_noise)
        env = Environment(ms,
                          sw,
                          min_time=args.min_rt - 50,
                          max_time=args.max_rt + 50)
        env.run()
        env.write_mzML(None, args.output_swath_file)