Exemplo n.º 1
0
    def test_mean_scan_time_from_mzml(self):
        ionisation_mode = POSITIVE
        N = 10
        isolation_width = 0.7
        mz_tol = 0.01
        rt_tol = 15
        min_ms1_intensity = 10
        controller = TopNController(ionisation_mode, N, isolation_width,
                                    mz_tol, rt_tol, min_ms1_intensity)

        # extract chemicals from mzML
        roi_params = RoiParams(min_intensity=10, min_length=5)
        cm = ChemicalMixtureFromMZML(MZML_FILE, roi_params=roi_params)
        chems = cm.sample(None, 2)

        # extract mean timing per scan level from mzML
        sd = MzMLScanTimeSampler(MZML_FILE, use_mean=True)
        ms = IndependentMassSpectrometer(ionisation_mode,
                                         chems,
                                         scan_duration=sd)

        # run simulation
        env = Environment(ms, controller, 500, 600, progress_bar=True)
        set_log_level_warning()
        env.run()
        filename = 'test_scan_time_mean_from_mzml.mzML'
        check_mzML(env, OUT_DIR, filename)
Exemplo n.º 2
0
def get_chemicals(mzML_file,
                  mz_tol,
                  min_ms1_intensity,
                  start_rt,
                  stop_rt,
                  min_length=1):
    '''
    Extract ROI from an mzML file and turn them into UnknownChemical objects
    :param mzML_file: input mzML file
    :param mz_tol: mz tolerance for ROI extraction
    :param min_ms1_intensity: ROI will only be kept if it has one point above this threshold
    :param start_rt: start RT to extract ROI
    :param stop_rt: end RT to extract ROI
    :return: a list of UnknownChemical objects
    '''
    min_intensity = 0
    roi_params = RoiParams(mz_tol=mz_tol,
                           min_length=min_length,
                           min_intensity=min_intensity,
                           start_rt=start_rt,
                           stop_rt=stop_rt)
    good_roi = make_roi(mzML_file, roi_params)

    # keep ROI that have at least one point above the minimum to fragment threshold
    keep = []
    for roi in good_roi:
        if np.count_nonzero(
                np.array(roi.intensity_list) > min_ms1_intensity) > 0:
            keep.append(roi)

    ps = None  # old_unused_experimental
    rtcc = RoiToChemicalCreator(ps, keep)
    chemicals = np.array(rtcc.chemicals)
    return chemicals
Exemplo n.º 3
0
    def __init__(self,
                 sequence_manager,
                 controller_method,
                 mass_spec_param_dict,
                 dataset_file,
                 variable_params_dict,
                 base_params_dict,
                 mzml_file=None,
                 roi_params=RoiParams(min_intensity=10, min_length=5),
                 ps=None,
                 parallel=True):

        self.sequence_manager = sequence_manager
        self.parallel = parallel
        self.controller_method = controller_method
        self.mass_spec_param_dict = mass_spec_param_dict
        self.dataset_file = dataset_file
        self.mzml_file = mzml_file
        if self.dataset_file is None:
            cm = ChemicalMixtureFromMZML(self.mzml_file, roi_params=roi_params)
            dataset = cm.sample(None, 2)
            dataset_name = os.path.join(sequence_manager.base_dir,
                                        Path(mzml_file).stem + '.p')
            save_obj(dataset, dataset_name)
            self.dataset_file = dataset_name
            if self.sequence_manager.ms1_picked_peaks_file is None and len(
                    self.sequence_manager.evaluation_methods) > 0:
                self.sequence_manager.ms1_picked_peaks_file = self.sequence_manager.pick_peaks(
                    self.mzml_file, None, 1)
        self.variable_params_dict = variable_params_dict
        self.base_params_dict = base_params_dict
        sequence_manager.controller_schedule = self._generate_controller_schedule(
        )
        super().__init__(sequence_manager, self.parallel, ps=ps)
Exemplo n.º 4
0
    def __init__(self, mzml_file_name, roi_params=None):
        self.mzml_file_name = mzml_file_name
        self.roi_params = roi_params
        if self.roi_params is None:
            self.roi_params = RoiParams()

        self.good_rois = self._extract_rois()
Exemplo n.º 5
0
def get_rois(mzml, min_roi_length, mzml2chems_dict=QCB_MZML2CHEMS_DICT):
    roi_params = RoiParams(mz_tol=mzml2chems_dict['mz_tol'],
                           mz_units=mzml2chems_dict['mz_units'],
                           min_length=min_roi_length,
                           min_intensity=mzml2chems_dict['min_intensity'],
                           start_rt=mzml2chems_dict['start_rt'],
                           stop_rt=mzml2chems_dict['stop_rt'])
    good_roi = make_roi(mzml, roi_params)
    return good_roi
Exemplo n.º 6
0
    def __init__(self, mzml_file_name, ms2_sampler=UniformMS2Sampler(),
                 roi_params=None):
        self.mzml_file_name = mzml_file_name
        self.ms2_sampler = ms2_sampler
        self.roi_params = roi_params

        if roi_params is None:
            self.roi_params = RoiParams()

        self.good_rois = self._extract_rois()
        assert len(self.good_rois) > 0
Exemplo n.º 7
0
 def __init__(self, mzml_file_name, n_intensity_bins=10, min_rt=0,
              max_rt=1600, min_log_intensity=np.log(1e4),
              max_log_intensity=np.log(1e7), roi_params=None):
     self.min_rt = min_rt
     self.max_rt = max_rt
     self.min_log_intensity = min_log_intensity
     self.max_log_intensity = max_log_intensity
     self.mzml_file_name = mzml_file_name
     self.roi_params = roi_params
     self.n_intensity_bins = n_intensity_bins
     if self.roi_params is None:
         self.roi_params = RoiParams()
     self._get_distributions()
Exemplo n.º 8
0
def extract_chemicals(seed_file, params_dict):
    """
    Extract chemicals from a seed file
    :param seed_file: the seed file in mzML format, should be a DDA file
    (containing MS1 and MS2 scans)
    :param params_dict: a dictionary of parameters to extract ROI
    :return: a list of UnknownChemical objects
    """
    logger.info('Seed file = %s' % seed_file)
    logger.info('params = %s' % params_dict)

    rp = RoiParams(**params_dict)
    cm = ChemicalMixtureFromMZML(seed_file, roi_params=rp)
    dataset = cm.sample(None, 2)
    return dataset
Exemplo n.º 9
0
def extract_roi(file_names,
                out_dir,
                pattern,
                mzml_path,
                param_dict=DEFAULT_MZML_CHEMICAL_CREATOR_PARAMS):
    """
    Extract ROI for all mzML files listed in file_names, and turn them
    into Chemical objecs
    :param file_names: a list of mzML file names
    :param out_dir: output directory to store pickled chemicals. If None,
    then the current directory is used
    :param pattern: pattern for output file
    :param mzml_path: input directory containing all the mzML files in
    file_names.
    :param ps: a peak sampler object
    :param param_dict: dictionary of parameters
    :return: a list of extracted Chemicals, one for each mzML file
    """
    # extract ROI for all mzML files in file_names
    datasets = []
    for i in range(len(file_names)):

        # if mzml_path is provided, use that as the front part of filename
        if mzml_path is not None:
            mzml_file = os.path.join(mzml_path, file_names[i])
        else:
            mzml_file = file_names[i]

        rp = RoiParams(**param_dict)
        cm = ChemicalMixtureFromMZML(mzml_file, roi_params=rp)
        dataset = cm.sample(None, 2)
        datasets.append(dataset)

        # save extracted chemicals
        if out_dir is None:
            # if no out_dir provided, then same in the same location
            # as the mzML file
            dataset_name = os.path.splitext(mzml_file)[0] + '.p'
            save_obj(dataset, dataset_name)
        else:
            # else save the chemicals in our_dir, using pattern as the filename
            basename = os.path.basename(file_names[i])
            out_name = pattern % int(basename.split('_')[2])
            save_obj(dataset, os.path.join(out_dir, out_name))

    return datasets
Exemplo n.º 10
0
 def __init__(self,
              sequence_manager,
              parallel=True,
              mzml_file_list=None,
              roi_params=RoiParams(min_intensity=10, min_length=5),
              ps=None):
     self.parallel = parallel
     self.roi_params = roi_params
     self.ps = ps
     sequence_manager = self.add_defaults_controller_params(
         sequence_manager)
     if mzml_file_list is not None and all(
             np.array(sequence_manager.controller_schedule['Dataset']) ==
             None):
         sequence_manager = self.add_dataset_files(sequence_manager,
                                                   mzml_file_list)
     super().__init__(sequence_manager)
Exemplo n.º 11
0
 def test_chemical_mixture_from_mzml(self):
     roi_params = RoiParams(min_intensity=10, min_length=5)
     cm = ChemicalMixtureFromMZML(MZML_FILE, roi_params=roi_params)
     d = cm.sample(None, 2)
     assert len(d) == len(cm.good_rois)
Exemplo n.º 12
0
def chems_from_mzml():
    np.random.seed(0)
    rand.seed(0)
    roi_params = RoiParams(min_intensity=10, min_length=5)
    cm = ChemicalMixtureFromMZML(MZML_FILE, roi_params=roi_params)
    return cm.sample(None, 2)