def top_n_roi_experiment(datasets, base_chemicals, rt_range, isolation_width, mz_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, N, rt_tol, ionisation_mode=POSITIVE): env_list = [] for i in range(len(datasets)): mass_spec = IndependentMassSpectrometer(ionisation_mode, datasets[i]) controller = TopN_RoiController(ionisation_mode, isolation_width, mz_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, N=N, rt_tol=rt_tol) env = Environment(mass_spec, controller, rt_range[0], rt_range[1], progress_bar=True) env.run() env_list.append(env) final_evaluation = evaluate_multiple_simulated_env( env_list, base_chemicals=base_chemicals) return env_list, final_evaluation
def test_negative_fixed(self): fs = EvenMZFormulaSampler() ms = FixedMS2Sampler() ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101) cs = ConstantChromatogramSampler() cm = ChemicalMixtureCreator(fs, ms2_sampler=ms, rt_and_intensity_sampler=ri, chromatogram_sampler=cs) dataset = cm.sample(3, 2) N = 10 isolation_width = 0.7 mz_tol = 10 rt_tol = 15 ms = IndependentMassSpectrometer(NEGATIVE, dataset) controller = TopNController(NEGATIVE, N, isolation_width, mz_tol, rt_tol, MIN_MS1_INTENSITY) env = Environment(ms, controller, 102, 110, progress_bar=True) set_log_level_warning() env.run() ms1_mz_vals = [int(m) for m in controller.scans[1][0].mzs] expected_vals = [98, 198, 298] for i, m in enumerate(ms1_mz_vals): assert m == expected_vals[i] expected_frags = set([88, 78, 188, 178, 288, 278]) for scan in controller.scans[2]: for m in scan.mzs: assert int(m) in expected_frags
def run_single_WeightedDEW(params): out_file = 'WeightedDEW_{}_{}_{}.mzml'.format(params['sample_name'], params['t0'], params['r']) logger.warning('Generating %s' % out_file) if os.path.isfile(os.path.join(params['out_dir'], out_file)): logger.warning('Already done') return if params['t0'] > params['r']: logger.warning('Impossible combination') return controller = WeightedDEWController(params['ionisation_mode'], params['N'], params['isolation_width'], params['mz_tol'], params['r'], params['min_ms1_intensity'], exclusion_t_0=params['t0'], log_intensity=True) mass_spec = IndependentMassSpectrometer( params['ionisation_mode'], params['chems'], scan_duration=params['scan_duration']) env = Environment(mass_spec, controller, params['min_rt'], params['max_rt'], progress_bar=True, out_dir=params['out_dir'], out_file=out_file) env.run()
def run_TopN(chems, scan_duration, params, out_dir): """ Simulate TopN controller :param chems: a list of UnknownChemicals present in the injection :param ps: old PeakSampler object, now only used to generate MS2 scans (TODO: should be removed as part of issue #46) :param params: a dictionary of parameters :param out_file: output mzML file :param out_dir: output directory :return: None """ logger.info('Running TopN simulation') logger.info(params) out_file = '%s_%s.mzML' % (params['controller_name'], params['sample_name']) controller = TopNController(params['ionisation_mode'], params['N'], params['isolation_width'], params['mz_tol'], params['rt_tol'], params['min_ms1_intensity']) mass_spec = IndependentMassSpectrometer(params['ionisation_mode'], chems, scan_duration=scan_duration) env = Environment(mass_spec, controller, params['min_rt'], params['max_rt'], progress_bar=True, out_dir=out_dir, out_file=out_file) logger.info('Generating %s' % out_file) env.run()
def generate_mzmls(self, output_dir, params): scan_duration_dicts = self.time_gen(params) if (self.filenames is None): self.filenames = [ os.path.join(output_dir, "time_exp_data_{:04d}.mzML".format(i)) for i, _ in enumerate(scan_duration_dicts) ] self.file_counter += len(scan_duration_dicts) if (len(params) != len(self.filenames)): raise ValueError( "Parameter and filename list not the same length!") for f, d in zip(self.filenames, scan_duration_dicts): mass_spec = IndependentMassSpectrometer(POSITIVE, self.chems, None, scan_duration_dict=d) controller = SimpleMs1Controller() env = Environment(mass_spec, controller, self.min_rt, self.max_rt, progress_bar=True) set_log_level_warning() env.run() set_log_level_warning() env.write_mzML(output_dir, os.path.basename(f))
def run_experiment(self, idx): controller_name = self.controller_schedule['Sample ID'][idx] mzml_files = glob.glob(os.path.join(self.base_dir, '*.mzML')) if controller_name + '.mzML' not in [ os.path.basename(file) for file in mzml_files ]: controller, ms_params = super().run_experiment(idx) # load data and set up MS logger.info(self.controller_schedule.iloc[[idx]].to_dict()) method = self.controller_schedule['Controller Method'][idx] dataset = self.controller_schedule['Dataset'][idx] if method is not None and dataset is not None: dataset = load_obj(self.controller_schedule['Dataset'][idx]) mass_spec = IndependentMassSpectrometer( ms_params['ionisation_mode'], dataset) # Run sample env = Environment(mass_spec, controller, self.rt_range[0][0], self.rt_range[0][1], progress_bar=self.progress_bar) env.run() env.write_mzML(self.base_dir, controller_name + '.mzML') if self.write_env: save_obj( controller, os.path.join(self.base_dir, controller_name + '.p')) else: logger.info('Experiment already completed. Skipping...') mzml_file = os.path.join(self.base_dir, controller_name + '.mzML') return mzml_file, controller_name
def test_multiple_adducts(self): fs = DatabaseFormulaSampler(HMDB) ri = UniformRTAndIntensitySampler(min_rt=100, max_rt=101) cs = ConstantChromatogramSampler() adduct_prior_dict = {POSITIVE: {'M+H': 100, 'M+Na': 100, 'M+K': 100}} cm = ChemicalMixtureCreator(fs, rt_and_intensity_sampler=ri, chromatogram_sampler=cs, adduct_prior_dict=adduct_prior_dict, adduct_proportion_cutoff=0.0) n_adducts = len(adduct_prior_dict[POSITIVE]) n_chems = 5 dataset = cm.sample(n_chems, 2) for c in dataset: c.isotopes = [(c.mass, 1, "Mono")] # should be 15 peaks or less all the time # some adducts might not be sampled if the probability is less than 0.2 controller = SimpleMs1Controller() ms = IndependentMassSpectrometer(POSITIVE, dataset) env = Environment(ms, controller, 102, 110, progress_bar=True) set_log_level_warning() env.run() for scan in controller.scans[1]: assert len(scan.mzs) <= n_chems * n_adducts
def test_mass_spec(self): logger.info( 'Testing mass spec using the Top-N controller and QC beer chemicals' ) isolation_width = 1 N = 10 rt_tol = 15 mz_tol = 10 ionisation_mode = POSITIVE task_manager = TaskManager(buffer_size=3) mass_spec = IndependentMassSpectrometer(ionisation_mode, BEER_CHEMS, task_manager=task_manager) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, MIN_MS1_INTENSITY) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, BEER_MIN_BOUND, BEER_MAX_BOUND, progress_bar=True) # run_environment(env) env.run() # check that there is at least one non-empty MS2 scan check_non_empty_MS2(controller) # write simulated output to mzML file filename = 'test_mass_spec.mzML' check_mzML(env, OUT_DIR, filename)
def test_roi_controller_with_beer_chems(self): logger.info('Testing ROI controller with QC beer chemicals') isolation_width = 1 # the isolation window in Dalton around a selected precursor ion N = 10 rt_tol = 15 mz_tol = 10 min_roi_intensity = 5000 min_roi_length = 10 ionisation_mode = POSITIVE # create a simulated mass spec with noise and ROI controller mass_spec = IndependentMassSpectrometer(ionisation_mode, beer_chems, self.ps, add_noise=True) controller = RoiController(ionisation_mode, isolation_width, mz_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, "Top N", N, rt_tol) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'roi_controller_qcbeer_chems.mzML' out_file = os.path.join(out_dir, filename) env.write_mzML(out_dir, filename) self.assertTrue(os.path.exists(out_file)) print()
def test_ms1_controller_with_qcbeer_chems(self): logger.info('Testing MS1 controller with QC beer chemicals') # create a simulated mass spec and MS1 controller mass_spec = IndependentMassSpectrometer(POSITIVE, beer_chems, self.ps) controller = SimpleMs1Controller() # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'ms1_controller_qcbeer_chems.mzML' out_file = os.path.join(out_dir, filename) env.write_mzML(out_dir, filename) self.assertTrue(os.path.exists(out_file)) print()
def test_ms1_controller_with_simulated_chems(self): logger.info('Testing MS1 controller with simulated chemicals') # create some chemical objects chems = ChemicalCreator(self.ps, ROI_Sources, hmdb) dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems, self.ms_level) self.assertEqual(len(dataset), n_chems) # create a simulated mass spec and MS1 controller mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, self.ps) controller = SimpleMs1Controller() # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'ms1_controller_simulated_chems.mzML' out_file = os.path.join(out_dir, filename) env.write_mzML(out_dir, filename) self.assertTrue(os.path.exists(out_file)) print()
def test_mean_scan_time_from_mzml(self): ionisation_mode = POSITIVE N = 10 isolation_width = 0.7 mz_tol = 0.01 rt_tol = 15 min_ms1_intensity = 10 controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # extract chemicals from mzML roi_params = RoiParams(min_intensity=10, min_length=5) cm = ChemicalMixtureFromMZML(MZML_FILE, roi_params=roi_params) chems = cm.sample(None, 2) # extract mean timing per scan level from mzML sd = MzMLScanTimeSampler(MZML_FILE, use_mean=True) ms = IndependentMassSpectrometer(ionisation_mode, chems, scan_duration=sd) # run simulation env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() filename = 'test_scan_time_mean_from_mzml.mzML' check_mzML(env, OUT_DIR, filename)
def test_TreeDiaController_percentile(self, simple_dataset): logger.info('Testing TreeDiaController percentile') # some parameters window_type = 'percentile' kaufmann_design = 'tree' num_windows = 64 scan_overlap = 0 ionisation_mode = POSITIVE scan_time_dict = {1: 0.12, 2: 0.06} min_rt = 0 max_rt = 400 min_mz = 100 max_mz = 1000 # run controller mass_spec = IndependentMassSpectrometer(ionisation_mode, simple_dataset, scan_duration=scan_time_dict) controller = DiaController(min_mz, max_mz, window_type, kaufmann_design, num_windows, scan_overlap=scan_overlap) env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) set_log_level_warning() env.run() # check that there is at least one non-empty MS2 scan check_non_empty_MS2(controller) # write simulated output to mzML file filename = 'tree_dia_percentile.mzml' check_mzML(env, OUT_DIR, filename)
def test_swath(self, ten_chems): min_mz = 100 max_mz = 1000 width = 100 scan_overlap = 10 ionisation_mode = POSITIVE controller = SWATH(min_mz, max_mz, width, scan_overlap=scan_overlap) scan_time_dict = {1: 0.124, 2: 0.124} spike_noise = UniformSpikeNoise(0.1, 1) mass_spec = IndependentMassSpectrometer(ionisation_mode, ten_chems, spike_noise=spike_noise, scan_duration=scan_time_dict) env = Environment(mass_spec, controller, 200, 300, progress_bar=True) set_log_level_warning() env.run() check_non_empty_MS2(controller) filename = 'SWATH_ten_chems.mzML' check_mzML(env, OUT_DIR, filename)
def test_aif_with_fixed_chems(self): fs = EvenMZFormulaSampler() ms = FixedMS2Sampler(n_frags=2) cs = ConstantChromatogramSampler() ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=1) cs = ChemicalMixtureCreator(fs, ms2_sampler=ms, chromatogram_sampler=cs, rt_and_intensity_sampler=ri) d = cs.sample(1, 2) ms1_source_cid_energy = 30 controller = AIF(ms1_source_cid_energy) ionisation_mode = POSITIVE mass_spec = IndependentMassSpectrometer(ionisation_mode, d) env = Environment(mass_spec, controller, 10, 20, progress_bar=True) set_log_level_warning() env.run() for i, s in enumerate(controller.scans[1]): if i % 2 == 1: # odd scan, AIF, should have two peaks at 81 and 91 integer_mzs = [int(i) for i in s.mzs] integer_mzs.sort() assert integer_mzs[0] == 81 assert integer_mzs[1] == 91 else: # even scan, MS1 - should have a single peak at integer value of 101 integer_mzs = [int(i) for i in s.mzs] assert integer_mzs[0] == 101
def test_TopN_controller_with_beer_chems(self): logger.info('Testing Top-N controller with QC beer chemicals') isolation_width = 1 N = 10 rt_tol = 15 mz_tol = 10 ionisation_mode = POSITIVE # create a simulated mass spec without noise and Top-N controller mass_spec = IndependentMassSpectrometer(ionisation_mode, beer_chems, self.ps, add_noise=False) controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'topN_controller_qcbeer_chems_no_noise.mzML' out_file = os.path.join(out_dir, filename) env.write_mzML(out_dir, filename) self.assertTrue(os.path.exists(out_file)) print()
def simple_ms1_processor(): print( '#' * 10, 'Load previously trained spectral feature database and the list of extracted metabolites, \ created in 01. Download Data') #----------------- mypath = 'documents/simple_ms1/example_data' #----------------- base_dir = os.path.abspath(mypath) ps = load_obj(Path(base_dir, 'peak_sampler_mz_rt_int_19_beers_fullscan.p')) hmdb = load_obj(Path(base_dir, 'hmdb_compounds.p')) # set_log_level_debug() out_dir = Path(base_dir, 'results', 'MS1_single') # the list of ROI sources created in the previous notebook '01. Download Data.ipynb' ROI_Sources = [ str(Path(base_dir, 'DsDA', 'DsDA_Beer', 'beer_t10_simulator_files')) ] # minimum MS1 intensity of chemicals min_ms1_intensity = 1.75E5 # m/z and RT range of chemicals rt_range = [(0, 1440)] mz_range = [(0, 1050)] # the number of chemicals in the sample n_chems = 6500 # maximum MS level (we do not generate fragmentation peaks when this value is 1) ms_level = 1 chems = ChemicalCreator(ps, ROI_Sources, hmdb) dataset = chems.sample(mz_range, rt_range, min_ms1_intensity, n_chems, ms_level) save_obj(dataset, Path(out_dir, 'dataset.p')) for chem in dataset[0:10]: print(chem) print('#' * 10, 'Run MS1 controller on the samples and generate .mzML files') min_rt = rt_range[0][0] max_rt = rt_range[0][1] mass_spec = IndependentMassSpectrometer(POSITIVE, dataset, ps) controller = SimpleMs1Controller() # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() set_log_level_debug() mzml_filename = 'ms1_controller.mzML' env.write_mzML(out_dir, mzml_filename) return str(Path(mypath, 'results', 'MS1_single')) + '/' + mzml_filename
def test_fullscan_from_mzml(self, chems_from_mzml): ionisation_mode = POSITIVE controller = SimpleMs1Controller() ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml) env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() filename = 'fullscan_from_mzml.mzML' check_mzML(env, OUT_DIR, filename)
def top_n_roi_experiment_evaluation(datasets, min_rt, max_rt, N, isolation_window, mz_tol, rt_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, base_chemicals=None, mzmine_files=None, rt_tolerance=100, experiment_dir=None, progress_bar=False): if base_chemicals is not None or mzmine_files is not None: env_list = [] mzml_files = [] source_files = ['sample_' + str(i) for i in range(len(datasets))] for i in range(len(datasets)): mass_spec = IndependentMassSpectrometer(POSITIVE, datasets[i]) controller = TopN_RoiController(POSITIVE, isolation_window, mz_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, N=N, rt_tol=rt_tol) env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=progress_bar) env.run() if progress_bar is False: print('Processed dataset ' + str(i)) env_list.append(env) if base_chemicals is None: file_link = os.path.join(experiment_dir, source_files[i] + '.mzml') mzml_files.append(file_link) env.write_mzML(experiment_dir, source_files[i] + '.mzml') if base_chemicals is not None: evaluation = evaluate_multiple_simulated_env( env_list, base_chemicals=base_chemicals) else: roi_aligner = RoiAligner(rt_tolerance=rt_tolerance) for i in range(len(mzml_files)): roi_aligner.add_picked_peaks(mzml_files[i], mzmine_files[i], source_files[i], 'mzmine') evaluation = evaluate_multi_peak_roi_aligner( roi_aligner, source_files) return env_list, evaluation else: return None, None
def test_acquisition(self, two_fixed_chems): mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems] schedule = [] # env = Environment() isolation_width = DEFAULT_ISOLATION_WIDTH mz_tol = 0.1 rt_tol = 15 min_rt = 110 max_rt = 112 ionisation_mode = POSITIVE controller = FixedScansController() mass_spec = IndependentMassSpectrometer(ionisation_mode, two_fixed_chems) env = Environment(mass_spec, controller, min_rt, max_rt) ms1_scan = get_default_scan_params(polarity=ionisation_mode) ms2_scan_1 = get_dda_scan_param(mz_to_target[0], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) ms2_scan_2 = get_dda_scan_param(mz_to_target[1], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) ms2_scan_3 = get_dda_scan_param(mz_to_target, [0.0, 0.0], None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3] controller.set_tasks(schedule) set_log_level_warning() env.run() assert len(controller.scans[2]) == 3 n_peaks = [] for scan in controller.scans[2]: n_peaks.append(scan.num_peaks) assert n_peaks[0] > 0 assert n_peaks[1] > 0 assert n_peaks[2] == n_peaks[0] + n_peaks[1] env.write_mzML(OUT_DIR, 'multi_windows.mzML')
def run_env(mass_spec, controller, min_rt, max_rt, mzml_file): env = Environment(mass_spec, controller, min_rt, max_rt) env.run() env.write_mzML(None, mzml_file) chems = [ event.chem.__repr__() for event in env.mass_spec.fragmentation_events if event.ms_level > 1 ] chemical_coverage = len(np.unique(np.array(chems))) / len( env.mass_spec.chemicals) return chemical_coverage
def test_hybrid_controller_with_beer_chems(self): logger.info('Testing hybrid controller with QC beer chemicals') isolation_window = [1] # the isolation window in Dalton around a selected precursor ion N = [5] rt_tol = [10] mz_tol = [10] min_ms1_intensity = 1.75E5 scan_param_changepoints = None rt_range = [(0, 400)] min_rt = rt_range[0][0] max_rt = rt_range[0][1] n_purity_scans = N[0] purity_shift = 0.2 purity_threshold = 1 # these settings change the Mass Spec type. They arent necessary to run the Top-N ROI Controller isolation_transition_window = 'gaussian' isolation_transition_window_params = [0.5] purity_add_ms1 = True # this seems to be the broken bit purity_randomise = True mass_spec = IndependentMassSpectrometer(POSITIVE, beer_chems, self.ps, add_noise=True, isolation_transition_window=isolation_transition_window, isolation_transition_window_params=isolation_transition_window_params) controller = HybridController(mass_spec, N, scan_param_changepoints, isolation_window, mz_tol, rt_tol, min_ms1_intensity, n_purity_scans, purity_shift, purity_threshold, purity_add_ms1=purity_add_ms1, purity_randomise=purity_randomise) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, min_rt, max_rt, progress_bar=True) # set the log level to WARNING so we don't see too many messages when environment is running set_log_level_warning() # run the simulation env.run() # set the log level back to DEBUG set_log_level_debug() # write simulated output to mzML file filename = 'hybrid_controller_qcbeer_chems.mzML' out_file = os.path.join(out_dir, filename) env.write_mzML(out_dir, filename) self.assertTrue(os.path.exists(out_file)) print()
def test_topn_from_mzml(self, chems_from_mzml): ionisation_mode = POSITIVE N = 10 isolation_width = 0.7 mz_tol = 0.01 rt_tol = 15 min_ms1_intensity = 10 controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml) env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() check_non_empty_MS2(controller) filename = 'topn_from_mzml.mzML' check_mzML(env, OUT_DIR, filename)
def test_FixedScansController(self, two_fixed_chems): logger.info('Testing FixedScansController') mz_to_target = [chem.mass + 1.0 for chem in two_fixed_chems] isolation_width = DEFAULT_ISOLATION_WIDTH mz_tol = 0.1 rt_tol = 15 min_rt = 110 max_rt = 112 ionisation_mode = POSITIVE controller = FixedScansController(schedule=None) mass_spec = IndependentMassSpectrometer(ionisation_mode, two_fixed_chems) env = Environment(mass_spec, controller, min_rt, max_rt) ms1_scan = get_default_scan_params(polarity=ionisation_mode) ms2_scan_1 = get_dda_scan_param(mz_to_target[0], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) ms2_scan_2 = get_dda_scan_param(mz_to_target[0], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) ms2_scan_3 = get_dda_scan_param(mz_to_target[0], 0.0, None, isolation_width, mz_tol, rt_tol, polarity=ionisation_mode) schedule = [ms1_scan, ms2_scan_1, ms2_scan_2, ms2_scan_3] controller.set_tasks(schedule) set_log_level_warning() env.run() assert len(controller.scans[1]) == 1 assert len(controller.scans[2]) == 3 for scan in controller.scans[2]: assert scan.num_peaks > 0 env.write_mzML(OUT_DIR, 'fixedScansController.mzML')
def run_experiment(param): ''' Runs a Top-N experiment :param param: the experimental parameters :return: the analysis name that has been successfully ran ''' analysis_name = param['analysis_name'] mzml_out = param['mzml_out'] pickle_out = param['pickle_out'] N = param['N'] rt_tol = param['rt_tol'] if os.path.isfile(mzml_out) and os.path.isfile(pickle_out): logger.debug('Skipping %s' % (analysis_name)) else: logger.debug('Processing %s' % (analysis_name)) peak_sampler = param['peak_sampler'] if peak_sampler is None: # extract density from the fragmenatation file mzml_path = param['mzml_path'] fragfiles = param['fragfiles'] fragfile = fragfiles[( N, rt_tol, )] min_rt = param['min_rt'] max_rt = param['max_rt'] peak_sampler = get_peak_sampler(mzml_path, fragfile, min_rt, max_rt) mass_spec = IndependentMassSpectrometer(param['ionisation_mode'], param['data']) controller = TopNController(param['ionisation_mode'], param['N'], param['isolation_width'], param['mz_tol'], param['rt_tol'], param['min_ms1_intensity']) # create an environment to run both the mass spec and controller env = Environment(mass_spec, controller, param['min_rt'], param['max_rt'], progress_bar=param['pbar']) set_log_level_warning() env.run() set_log_level_debug() env.write_mzML(None, mzml_out) save_obj(controller, pickle_out) return analysis_name
def test_targeted(self): fs = EvenMZFormulaSampler() ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=10) cr = ConstantChromatogramSampler() ms = FixedMS2Sampler() cs = ChemicalMixtureCreator(fs, rt_and_intensity_sampler=ri, chromatogram_sampler=cr, ms2_sampler=ms) d = cs.sample(2, 2) # sample chems with m/z = 100 and 200 ionisation_mode = POSITIVE targets = [] targets.append(Target(101, 100, 102, 10, 20, adduct='M+H')) targets.append(Target(201, 200, 202, 10, 20, metadata={'a': 1})) ce_values = [10, 20, 30] n_replicates = 4 controller = TargetedController(targets, ce_values, n_replicates=n_replicates, limit_acquisition=True) mass_spec = IndependentMassSpectrometer(ionisation_mode, d) env = Environment(mass_spec, controller, 5, 25, progress_bar=True) set_log_level_warning() env.run() # check that we go all the scans we wanted for ms_level in controller.scans: assert len(controller.scans[ms_level]) > 0 set_log_level_debug() target_counts = {t: {c: 0 for c in ce_values} for t in targets} for s in controller.scans[2]: params = s.scan_params pmz = params.get(ScanParameters.PRECURSOR_MZ)[0].precursor_mz filtered_targets = list( filter( lambda x: (x.from_rt <= s.rt <= x.to_rt) and (x.from_mz <= pmz <= x.to_mz), targets)) assert len(filtered_targets) == 1 target = filtered_targets[0] ce = params.get(ScanParameters.COLLISION_ENERGY) target_counts[target][ce] += 1 for t in target_counts: for ce, count in target_counts[t].items(): assert count == n_replicates
def top_n_box_experiment(datasets, base_chemicals, rt_range, boxes_params, dataset_group_list, isolation_width, mz_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, N, rt_tol, ionisation_mode=POSITIVE): env_list = [] aligner = RoiAligner() boxes = None boxes_intensity = None for i in range(len(datasets)): mass_spec = IndependentMassSpectrometer(ionisation_mode, datasets[i]) controller = TopNBoxRoiController(ionisation_mode, isolation_width, mz_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, boxes_params=boxes_params, boxes=boxes, boxes_intensity=boxes_intensity, N=N, rt_tol=rt_tol) env = Environment(mass_spec, controller, rt_range[0], rt_range[1], progress_bar=True) env.run() env_list.append(env) rois = env.controller.live_roi + env.controller.dead_roi aligner.add_sample(rois, 'sample_' + str(i), dataset_group_list[i]) boxes = aligner.get_boxes() boxes_intensity = aligner.get_max_frag_intensities() final_evaluation = evaluate_multiple_simulated_env( env_list, base_chemicals=base_chemicals) return env_list, final_evaluation
def test_default_scan_time(self, chems_from_mzml): ionisation_mode = POSITIVE N = 10 isolation_width = 0.7 mz_tol = 0.01 rt_tol = 15 min_ms1_intensity = 10 controller = TopNController(ionisation_mode, N, isolation_width, mz_tol, rt_tol, min_ms1_intensity) # run simulation using default scan times ms = IndependentMassSpectrometer(ionisation_mode, chems_from_mzml, scan_duration=DEFAULT_SCAN_TIME_DICT) env = Environment(ms, controller, 500, 600, progress_bar=True) set_log_level_warning() env.run() filename = 'test_scan_time_default.mzML' check_mzML(env, OUT_DIR, filename)
def test_multiple_isolation(self): N = 3 fs = EvenMZFormulaSampler() ri = UniformRTAndIntensitySampler(min_rt=0, max_rt=10) cr = ConstantChromatogramSampler() ms = FixedMS2Sampler() cs = ChemicalMixtureCreator(fs, rt_and_intensity_sampler=ri, chromatogram_sampler=cr, ms2_sampler=ms) d = cs.sample(3, 2) # sample chems with m/z = 100 and 200 # ionisation_mode = POSITIVE controller = MultiIsolationController(N) ms = IndependentMassSpectrometer(POSITIVE, d) env = Environment(ms, controller, 10, 20, progress_bar=True) set_log_level_warning() env.run() assert len(controller.scans[1]) > 0 assert len(controller.scans[2]) > 0 # look at the first block of MS2 scans # and check that they are the correct super-positions mm = {} # first three scans hit the individual precursors mm[(0, )] = controller.scans[2][0] mm[(1, )] = controller.scans[2][1] mm[(2, )] = controller.scans[2][2] # next three should hit the pairs mm[(0, 1)] = controller.scans[2][3] mm[(0, 2)] = controller.scans[2][4] mm[(1, 2)] = controller.scans[2][5] # final should hit all three mm[(0, 1, 2)] = controller.scans[2][6] for key, value in mm.items(): actual_mz_vals = set(mm[key].mzs) expected_mz_vals = set() for k in key: for m in mm[(k, )].mzs: expected_mz_vals.add(m) assert expected_mz_vals == actual_mz_vals
def non_overlap_experiment(datasets, base_chemicals, rt_range, isolation_width, mz_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, N, rt_tol, min_roi_length_for_fragmentation, rt_box_size, mz_box_size, ionisation_mode=POSITIVE): env_list = [] grid = GridEstimator( LocatorGrid(rt_range[0], rt_range[1], rt_box_size, 0, 3000, mz_box_size), IdentityDrift()) for i in range(len(datasets)): mass_spec = IndependentMassSpectrometer(ionisation_mode, datasets[i]) controller = NonOverlapController( ionisation_mode, isolation_width, mz_tol, min_ms1_intensity, min_roi_intensity, min_roi_length, N, grid, rt_tol=rt_tol, min_roi_length_for_fragmentation=min_roi_length_for_fragmentation) env = Environment(mass_spec, controller, rt_range[0], rt_range[1], progress_bar=True) env.run() env_list.append(env) final_evaluation = evaluate_multiple_simulated_env( env_list, base_chemicals=base_chemicals) return env_list, final_evaluation