Пример #1
0
def reader(path: str, on_disc: bool = True):
    """
    Load `path` file into an OnDiskExperiment. If the file is not indexed, load
    the file.

    Parameters
    ----------
    path : str
        path to read mzML file from.
    on_disc : bool
        if True doesn't load the whole file on memory.

    Returns
    -------
    pyopenms.OnDiskMSExperiment or pyopenms.MSExperiment
    """
    if on_disc:
        try:
            exp_reader = pyopenms.OnDiscMSExperiment()
            exp_reader.openFile(path)
        except RuntimeError:
            msg = "{} is not an indexed mzML file, switching to MSExperiment"
            print(msg.format(path))
            exp_reader = pyopenms.MSExperiment()
            pyopenms.MzMLFile().load(path, exp_reader)
    else:
        exp_reader = pyopenms.MSExperiment()
        pyopenms.MzMLFile().load(path, exp_reader)
    return exp_reader
Пример #2
0
    def main(self):
        #after path_parsing method we have self.src_full_name_list
        print("Peak Picking implementation")

        for f in get_list_full_names(self.src):

            # to prepare(init) empty list and entity;
            self.init_entity(**self.kw)

            print("source file:", f)
            
            input_map = oms.MSExperiment() # the 1st step: load map;

            oms.MzMLFile().load(f, input_map)

            centroid_out_map = oms.MSExperiment()

            # the 2nd step: apply_ffm;
            self.pp.entity.pickExperiment(input_map, centroid_out_map)
            
            centroid_out_map.updateRanges()

            # the 3d step: is store result into file:
            #convert_src_to_dst_file_name(src, dst, suffix_dst_files, ext_dst_files)
            dst_full_file_name = os.path.join(self.dst,\
                convert_src_to_dst_file_name(f,
                                            self.dst,
                                            self.suffix_dst_files,
                                            self.ext_dst_files
                                            ) )   #call 'global' function;
            #print("dst=",dst_full_file_name)
            oms.MzMLFile().store(dst_full_file_name, centroid_out_map)
            
            print("Picked data stored into:", dst_full_file_name)
Пример #3
0
def maxq(ctx, filename, zipurl, rawname):
    """Calculate all possible metrics for these files. These data sources will be included in set metrics."""
    exp = oms.MSExperiment()
    oms.MzMLFile().load(click.format_filename(filename), exp)
    rq = basicqc.getBasicQuality(exp)

    ms2num = 0
    for x in rq.qualityMetrics:
        if x.name == "Number of MS2 spectra":
            ms2num = x.value

    if ms2num < 1:
        logging.warn(
            "We seem to have found no MS2 spectra which is unlikely to be true since you have also given some identifications. \
                We continue with symbolic value of 1 for the number of MS2 spectra, \
                however this means some metrics will invariably be incorrect!\
                Please make sure, we have the right inputs.")
        ms2num = 1

    try:
        mq, params = idqcmq.loadMQZippedResults(zipurl)
        if not rawname:
            logging.warning("Infering rawname from mzML")
            rawname = basename(
                exp.getExperimentalSettings().getSourceFiles()
                [0].getNameOfFile().decode())  # TODO split extensions

        rq.qualityMetrics.extend(
            idqcmq.getMQMetrics(rawname, params, mq, ms2num))
        rqs.append(rq)
    except:
        logging.warn("Retrieving any results from the URL failed.")

    finale()
Пример #4
0
def test0():
    fh = pyopenms.MzMLFile()
    here = os.path.dirname(os.path.abspath(__file__))
    path = os.path.join(here, "test2.mzML").encode()

    class Consumer(object):
        def __init__(self):
            self.speclevels = []
            self.rts = []

        def consumeSpectrum(self, spec):
            self.speclevels.append(spec.getMSLevel())
            self.rts.append(spec.getRT())

        def consumeChromatogram(self, chromo):
            raise Exception(
                "should never be called as we have no chromoatograms in example file"
            )

        def setExpectedSize(self, num_specs, num_chromo):
            assert num_specs == 5, num_specs
            assert num_chromo == 0, num_chromo

        def setExperimentalSettings(self, exp):
            assert isinstance(exp, pyopenms.ExperimentalSettings)

    consumer = Consumer()
    fh.transform(path, consumer)
    cc = Counter(consumer.speclevels)
    assert set(cc.keys()) == set([1, 2])
    assert cc[1] == 2
    assert cc[2] == 3
    assert abs(min(consumer.rts) - 4200.76) < 0.01
    assert abs(max(consumer.rts) - 4202.03) < 0.01
Пример #5
0
    def test_extractor(self):
        targeted = pyopenms.TargetedExperiment()
        tramlfile = pyopenms.TraMLFile()
        tramlfile.load(self.filename, targeted)

        exp = pyopenms.MSExperiment()
        pyopenms.MzMLFile().load(self.filename_mzml, exp)

        trafo = pyopenms.TransformationDescription()

        tmp_out = pyopenms.MSExperiment()
        extractor = pyopenms.ChromatogramExtractor()
        extractor.extractChromatograms(exp, tmp_out, targeted, 10, False,
                                       trafo, -1, "tophat")

        # Basically test that the output is non-zero (e.g. the data is
        # correctly relayed to python)
        # The functionality is not tested here!
        self.assertEqual(len(tmp_out.getChromatograms()),
                         len(targeted.getTransitions()))
        self.assertNotEqual(len(tmp_out.getChromatograms()), 0)
        self.assertEqual(tmp_out.getChromatograms()[0].size(), exp.size())
        self.assertNotEqual(tmp_out.getChromatograms()[0].size(), 0)
        self.assertNotEqual(tmp_out.getChromatograms()[0][0].getRT(), 0)
        self.assertNotEqual(tmp_out.getChromatograms()[0][0].getIntensity(), 0)
Пример #6
0
 def __get_mzml(self, file):
     b_content = file.bcore
     if self.ext == 'raw':
         self.tf = store_byte_in_tmp(
             b_content,
             prefix=self.fname,
             suffix='.RAW',
             directory=self.target_dir.absolute().as_posix()
         )
         self.cmd_msconvert = self.__build_cmd_msconvert()
         self.__run_cmd()
     elif self.ext == 'mzml':
         self.tf = store_byte_in_tmp(
             b_content,
             prefix=self.fname,
             suffix='.mzML',
             directory=self.target_dir.absolute().as_posix()
         )
     elif self.ext == 'mzxml':
         self.tf = store_byte_in_tmp(
             b_content,
             prefix=self.fname,
             suffix='.mzXML',
             directory=self.target_dir.absolute().as_posix()
         )
         exp = pyopenms.MSExperiment()
         pyopenms.MzXMLFile().load(self.tf.name, exp)
         target_path = self.__get_mzml_path().absolute().as_posix()
         pyopenms.MzMLFile().store(target_path, exp)
Пример #7
0
    def compute_bin_im(self, run: int, bin: int, dir: str = '.') -> float:
        """Computes the intensity-weighted average IM value for a given bin.

        Keyword arguments:
        run: the pass that the bin is in (1 or 2)
        bin: the bin to compute the average IM for
        dir: the directory to write and read temporary files to

        Returns: the intensity-weighted average IM value for a given bin.
        """
        exp = ms.MSExperiment()
        ms.MzMLFile().load(dir + '/b-' + str(run) + '-' + str(bin) + '.mzML',
                           exp)
        total_intensity, average_im = 0, 0

        all_points = []
        for i in range(exp.getNrSpectra()):
            spec = exp.getSpectrum(i)
            all_points.extend(util.get_spectrum_points(spec))

        for i in range(len(all_points)):
            total_intensity += all_points[i][2]

        if total_intensity != 0:
            for i in range(len(all_points)):
                average_im += all_points[i][3] * (all_points[i][2] /
                                                  total_intensity)

        return average_im
Пример #8
0
def _write_spectra_mzml(filename: str, spectra: Iterable[sus.MsmsSpectrum]) \
        -> None:
    """
    Write the given spectra to an mzML file.

    Parameters
    ----------
    filename : str
        The mzML file name where the spectra will be written.
    spectra : Iterable[sus.MsmsSpectrum]
        The spectra to be written to the mzML file.
    """
    experiment = pyopenms.MSExperiment()
    for spectrum in tqdm.tqdm(spectra, desc='Spectra written', unit='spectra'):
        mzml_spectrum = pyopenms.MSSpectrum()
        mzml_spectrum.setMSLevel(2)
        mzml_spectrum.setNativeID(spectrum.identifier)
        precursor = pyopenms.Precursor()
        precursor.setMZ(spectrum.precursor_mz)
        precursor.setCharge(spectrum.precursor_charge)
        mzml_spectrum.setPrecursors([precursor])
        mzml_spectrum.set_peaks([spectrum.mz, spectrum.intensity])
        if hasattr(spectrum, 'retention_time'):
            mzml_spectrum.setRT(spectrum.retention_time)
        if hasattr(spectrum, 'filename'):
            mzml_spectrum.setMetaValue('filename',
                                       str.encode(spectrum.filename))
        if hasattr(spectrum, 'scan'):
            mzml_spectrum.setMetaValue('scan', str.encode(str(spectrum.scan)))
        if hasattr(spectrum, 'cluster'):
            mzml_spectrum.setMetaValue('cluster',
                                       str.encode(str(spectrum.cluster)))
        experiment.addSpectrum(mzml_spectrum)
    pyopenms.MzMLFile().store(filename, experiment)
Пример #9
0
    def main(self):
        #after path_parsing method we have self.src_full_name_list
        print("FeatureFindingMetabo implementation")
        
        for f in get_list_full_names(self.src):

            print("Source file:", f)
            # to prepare(init) empty list and entity;
            self.init_entity(**self.kw)
            
            input_map = oms.PeakMap() # the 1st step: load map;
            fm = oms.FeatureMap()
            oms.MzMLFile().load(f, input_map)
            # the 2nd step: apply_ffm;
            self.mtd.entity.run(input_map, self.output_mt)
            self.epd.entity.detectPeaks(self.output_mt, self.splitted_mt)
            self.ffm.entity.run(self.splitted_mt, fm, self.filtered_mt)
            # the 3d step: is store result into file;
            dst_full_file_name = os.path.join(self.dst,\
                convert_src_to_dst_file_name(f,
                                            self.dst,
                                            self.suffix_dst_files,
                                            self.ext_dst_files) )
           
            oms.FeatureXMLFile().store(dst_full_file_name, fm)
            
            print("Centroided data stored into:", dst_full_file_name)
Пример #10
0
def main(options):
    precursor_tolerance = options.precursor_tolerance
    product_tolerance = options.product_tolerance
    out = options.outfile
    chromat_in = options.infile
    traml_in = options.traml_in

    # precursor_tolerance = 0.05
    # product_tolerance = 0.05
    # out = "/tmp/out.mzML"
    # chromat_in = "../source/TEST/TOPP/MRMMapping_input.chrom.mzML"
    # traml_in = "../source/TEST/TOPP/MRMMapping_input.TraML"

    ff = pyopenms.MRMFeatureFinderScoring()
    chromatogram_map = pyopenms.MSExperiment()
    fh = pyopenms.FileHandler()
    fh.loadExperiment(chromat_in, chromatogram_map)
    targeted = pyopenms.TargetedExperiment()
    tramlfile = pyopenms.TraMLFile()
    tramlfile.load(traml_in, targeted)

    output = algorithm(chromatogram_map, targeted, precursor_tolerance,
                       product_tolerance)

    pyopenms.MzMLFile().store(out, output)
Пример #11
0
def basic(filename):
    """Calculate the basic metrics available from virtually every mzML file."""
    exp = oms.MSExperiment()
    oms.MzMLFile().load(click.format_filename(filename), exp)
    rq = basicqc.getBasicQuality(exp)
    rqs.append(rq)

    finale()
Пример #12
0
    def open_profile_mzml(self):

        self._log('Loading profile data from `%s`.' % self.profile_mzml)

        # As I understand this belongs to the peak picking
        # hence I moved here, we don't need these attributes in __init__
        self.profile_map = oms.MSExperiment()
        oms.MzMLFile().load(self.profile_mzml, self.profile_map)
Пример #13
0
def read_mzml(infile):
    # init variables
    mzml_file = oms.MzMLFile()
    exp = oms.MSExperiment()

    # load spectra into exp
    mzml_file.load(infile, exp)
    return (exp)
Пример #14
0
def main():

    # register command line arguments
    model = CTDModel(
        name='NameOfThePyTOPPTool',  # required
        version='1.0',  # required
        description=
        'This is an example tool how to write pyTOPP tools compatible with the OpenMS workflow ecosystem.',
        manual='RTF',
        docurl='http://dummy.url/docurl.html',
        category='Example',
        executableName='exampletool',
        executablePath='/path/to/exec/exampletool-1.0/exampletool')

    # Register in / out etc. with CTDModel
    model.add(
        'input',
        required=True,
        type='input-file',
        is_list=False,
        file_formats=['mzML'],  # filename restrictions
        description='Input file')

    model.add(
        'output',
        required=True,
        type='output-file',
        is_list=False,
        file_formats=['mzML'],  # filename restrictions
        description='Output file')

    defaults = pms.PeakPickerHiRes().getDefaults()

    # expose algorithm parameters in command line options
    addParamToCTDopts(defaults, model)

    # parse command line
    # if -write_ini is provided, store model in CTD file, exit with error code 0
    # if -ini is provided, load CTD file into defaults Param object and return new model with paraneters set as defaults
    arg_dict, openms_params = parseCTDCommandLine(sys.argv, model, defaults)

    # data processing
    fh = pms.MzMLFile()
    fh.setLogType(pms.LogType.CMD)
    input_map = pms.MSExperiment()

    fh.load(arg_dict["input"], input_map)

    pp = pms.PeakPickerHiRes()
    pp.setParameters(openms_params)
    out_map = pms.MSExperiment()
    pp.pickExperiment(input_map, out_map)

    out_map = addDataProcessing(
        out_map, openms_params,
        pms.DataProcessing.ProcessingAction.PEAK_PICKING)
    fh = pms.FileHandler()
    fh.storeExperiment(arg_dict["output"], out_map)
Пример #15
0
    def test_acquisitioninfomemberaccess(self):
        exp = pyopenms.MSExperiment()
        pyopenms.MzMLFile().load(self.filename_mzml, exp)

        # Basically test that the output is non-zero (e.g. the data is
        # correctly relayed to python)
        # The functionality is not tested here!

        # starting point
        self.assertEqual(exp[0].getAcquisitionInfo().size(), 1)
        self.assertNotEqual(exp[0].getAcquisitionInfo().size(), 0)

        # metainfo
        exp[0].getAcquisitionInfo().size()  # is 1
        self.assertEqual(exp[0].getAcquisitionInfo()[0].isMetaEmpty(),
                         True)  # is True

        spectra = exp.getSpectra()
        aqis = spectra[0].getAcquisitionInfo()
        aqi = aqis[0]  # get a copy
        aqi.setMetaValue('key', 420)  # modify it
        aqis[0] = aqi  # and set entry
        spectra[0].setAcquisitionInfo(aqis)
        exp.setSpectra(spectra)
        self.assertEqual(exp[0].getAcquisitionInfo()[0].getMetaValue('key'),
                         420)  # should be 420

        acin = pyopenms.Acquisition()
        acin.setMetaValue('key', 42)
        self.assertEqual(acin.getMetaValue('key'), 42)  # is 42
        self.assertEqual(acin.isMetaEmpty(), False)  # is False

        # list/vector assignment
        magicnumber = 3
        neac = pyopenms.AcquisitionInfo()
        for i in range(0, magicnumber):
            neac.push_back(acin)

        self.assertEqual(neac.size(), magicnumber)  # is magicnumber

        # iteration
        for i in neac:
            self.assertEqual(i.isMetaEmpty(), False)  # always is False

        # accession already tested in 2nd section
        tmp = exp.getSpectra()
        tmp[0].setAcquisitionInfo(neac)
        exp.setSpectra(tmp)

        self.assertEqual(exp[0].getAcquisitionInfo().size(),
                         magicnumber)  # should be magicnumber

        for i in exp[0].getAcquisitionInfo():
            self.assertEqual(i.isMetaEmpty(), False)  # should always be False

        # resize
        neac.resize(0)
        self.assertEqual(neac.size(), 0)
    def test_readfile_content(self):
        exp = pyopenms.MSExperiment()
        pyopenms.MzMLFile().load(self.filename, exp)
        exp_size = exp.size()
        saccess = pyopenms.SpectrumAccessOpenMS(exp)

        ### double mz # mz around which should be extracted
        ### double rt_start # rt start of extraction (in seconds)
        ### double rt_end # rt end of extraction (in seconds)
        ### libcpp_string id # identifier
        targeted = []
        coord = pyopenms.ExtractionCoordinates()
        coord.mz = 618.31
        coord.rt_start = 4000
        coord.rt_end = 5000
        coord.id = b"tr3"
        targeted.append(coord)

        coord = pyopenms.ExtractionCoordinates()
        coord.mz = 628.45
        coord.rt_start = 4000
        coord.rt_end = 5000
        coord.id = b"tr1"
        targeted.append(coord)

        coord = pyopenms.ExtractionCoordinates()
        coord.mz = 654.38
        coord.rt_start = 4000
        coord.rt_end = 5000
        coord.id = b"tr2"
        targeted.append(coord)

        trafo = pyopenms.TransformationDescription()

        # Start with length zero
        tmp_out = [pyopenms.OSChromatogram() for i in range(len(targeted))]
        self.assertEqual(len(tmp_out[0].getIntensityArray()), 0)

        extractor = pyopenms.ChromatogramExtractorAlgorithm()
        mz_extraction_window = 10.0
        ppm = False
        extractor.extractChromatograms(saccess, tmp_out, targeted,
                                       mz_extraction_window, ppm, -1.0,
                                       b"tophat")

        # Basically test that the output is non-zero (e.g. the data is
        # correctly relayed to python)
        # The functionality is not tested here!
        self.assertEqual(len(tmp_out), len(targeted))
        self.assertNotEqual(len(tmp_out), 0)

        # End with different length
        self.assertEqual(len(tmp_out[0].getIntensityArray()), exp_size)
        self.assertNotEqual(len(tmp_out[0].getIntensityArray()), 0)
        self.assertNotEqual(len(tmp_out[0].getTimeArray()), 0)
        self.assertNotEqual(tmp_out[0].getIntensityArray()[0], 0)
        self.assertNotEqual(tmp_out[0].getTimeArray()[0], 0)
Пример #17
0
 def convert(self):
     """
     Generates MGF format MS2 spectra and writes them into the output file.
     """
     
     file = oms.MzMLFile()
     msdata = oms.MSExperiment()
     file.load(self.mzml_file, msdata)
     
     outfile = open(self.mgf_file, "w")
     
     # Create header
     outfile.write("COM=Testfile\n")
     outfile.write("ITOL=1\n")
     outfile.write("ITOLU=Da\n")
     outfile.write("CLE=Trypsin\n")
     outfile.write("CHARGE=1,2,3\n")
     
     # Iterate through all spectra,
     # skip all MS1 spectra and then write mgf format
     nr_ms2_spectra = 0
     
     for spectrum in msdata:
         
         if spectrum.getMSLevel() == 1:
             continue
         
         nr_ms2_spectra += 1
         outfile.write("\nBEGIN IONS\n")
         outfile.write("TITLE=%s\n" % spectrum.getNativeID())
         outfile.write("RTINSECONDS=%s\n" % spectrum.getRT())
         
         try:
             outfile.write("PEPMASS=%s\n" % spectrum.getPrecursors()[0].getMZ())
             ch = spectrum.getPrecursors()[0].getCharge()
             
             if ch > 0:
                 outfile.write("CHARGE=%s\n" % ch)
             
         except IndexError:
             outfile.write("PEPMASS=unknown\n")
         
         for peak in spectrum:
             outfile.write("%s %s\n" % (peak.getMZ(), peak.getIntensity() ))
         
         outfile.write("END IONS\n")
     
     if nr_ms2_spectra == 0:
         
         self._log(
             'Could not find any MS2 spectra in the input, '
             'thus the output MGF file is empty!',
             -1,
         )
     
     outfile.close()
Пример #18
0
 def get_openms_file_type(self, suffix):
     import pyopenms
     # Define file format and return corresponding pyopenms file object
     if suffix.lower() == '.mzxml':
         return pyopenms.MzXMLFile()
     elif suffix.lower() == '.mzml':
         return pyopenms.MzMLFile()
     elif suffix.lower() == '.mzdata':
         return pyopenms.MzDataFile()
     else:
         print('Data format is not supported!!')
Пример #19
0
    def write_exps(self, dir: str) -> None:
        """Writes the "cached" experiments to disk."""
        exp = ms.MSExperiment()  # Maybe use an OnDiscExperiment?
        for i in range(self.num_bins):
            try:
                ms.MzMLFile().load(dir + '/b-0-' + str(i) + '.mzML', exp)
            except:
                pass
            util.combine_experiments(exp, self.exps[0][i])
            ms.MzMLFile().store(dir + '/b-0-' + str(i) + '.mzML', exp)

        for i in range(self.num_bins + 1):
            try:
                ms.MzMLFile().load(dir + '/b-1-' + str(i) + '.mzML', exp)
            except:
                pass
            util.combine_experiments(exp, self.exps[1][i])
            ms.MzMLFile().store(dir + '/b-1-' + str(i) + '.mzML', exp)

        self.reset_write_cache()
Пример #20
0
def _getFileHandeler(iftype: FileType):
    if iftype == FileType.MZML:
        return pyopenms.MzMLFile()
    elif iftype == FileType.MZXML:
        return pyopenms.MzXMLFile()
    elif iftype == FileType.MS2:
        return MS2File()
    elif iftype == FileType.MGF:
        return MascotGenericFile()
    else:
        raise NotImplementedError('{} not implemented!'.format(iftype.value))
Пример #21
0
def main(options):

    # load TraML file
    targeted = pyopenms.TargetedExperiment()
    pyopenms.TraMLFile().load(options.traml_in, targeted)

    # Create empty files as input and finally as output
    empty_swath = pyopenms.MSExperiment()
    trafo = pyopenms.TransformationDescription()
    output = pyopenms.MSExperiment()

    # load input
    for infile in options.infiles:
        exp = pyopenms.MSExperiment()
        pyopenms.FileHandler().loadExperiment(infile, exp)

        transition_exp_used = pyopenms.TargetedExperiment()

        do_continue = True
        if options.is_swath:
            do_continue = pyopenms.OpenSwathHelper(
            ).checkSwathMapAndSelectTransitions(exp, targeted,
                                                transition_exp_used,
                                                options.min_upper_edge_dist)
        else:
            transition_exp_used = targeted

        if do_continue:
            # set up extractor and run
            tmp_out = pyopenms.MSExperiment()
            extractor = pyopenms.ChromatogramExtractor()
            extractor.extractChromatograms(exp, tmp_out, targeted,
                                           options.extraction_window,
                                           options.ppm, trafo,
                                           options.rt_extraction_window,
                                           options.extraction_function)
            # add all chromatograms to the output
            for chrom in tmp_out.getChromatograms():
                output.addChromatogram(chrom)

    dp = pyopenms.DataProcessing()
    pa = pyopenms.ProcessingAction().SMOOTHING
    dp.setProcessingActions(set([pa]))

    chromatograms = output.getChromatograms()
    for chrom in chromatograms:
        this_dp = chrom.getDataProcessing()
        this_dp.append(dp)
        chrom.setDataProcessing(this_dp)

    output.setChromatograms(chromatograms)

    pyopenms.MzMLFile().store(options.outfile, output)
Пример #22
0
    def test_readfile_content(self):
        exp = pyopenms.MSExperiment()
        pyopenms.MzMLFile().load(self.filename, exp)
        saccess = pyopenms.SpectrumAccessOpenMS(exp)
        spectrum = saccess.getSpectrumById(0)
        mz = spectrum.getMZArray()
        intensity = spectrum.getIntensityArray()

        self.assertAlmostEqual(mz[0], 350.0000305)
        self.assertAlmostEqual(intensity[0], 0.0)
        self.assertAlmostEqual(mz[10], 358.075134277)
        self.assertAlmostEqual(intensity[10], 9210.931640625)
Пример #23
0
def mzMLReader(in_file):
    """
    One line wrapper for OpenMS mzML reading. Returns the "exp" of a file.
    
    Parameters:
    -----------------------
    in_file: str, 
              location of the mzML file.
    """
    file = oms.MzMLFile()
    exp = oms.MSExperiment()
    file.load(in_file, exp)
    return exp
Пример #24
0
def main(options):
    
    # generate fragmentationtype lookup
    lookup = {}
    methods = pyopenms.ActivationMethod()
    for attr in dir(methods):
        value = getattr(methods,attr)
        if isinstance(value,int):
            lookup[value] = attr
    
    print "loading MS Experiment "
    exp = pyopenms.MSExperiment()
    fh = pyopenms.FileHandler()
    fh.loadExperiment(options.infile,exp)
    
    print "checking spectra types:"
    fragmentationTypes = {}
    for s in exp:
        typ = getSpectrumType(s,lookup)
        cont = continousSpectrumCheck(s)
        fragmentationTypes[typ] = fragmentationTypes.get(typ, [] ) + [cont]


    isContinousSpectrum = {}
    for typ in fragmentationTypes:
        check = percentile75(fragmentationTypes[typ])
        isContinousSpectrum[typ] = check
        if check == True:
            print "\t" + typ + " has continous spectra data"
        else:
            print "\t" + typ + " has centroided spectra data"

    print "picking spectra"
    expNew = pyopenms.MSExperiment()
    picker = pyopenms.PeakPickerHiRes()
    for s in exp:
        typ = getSpectrumType(s,lookup)
        if isContinousSpectrum[typ] == True:
            newSpec = pyopenms.MSSpectrum()
            picker.pick(s,newSpec)
            expNew.addSpectrum(newSpec)
        else:
            expNew.addSpectrum(s)
    
    print "saving file to ",options.outfile
    mzFile = pyopenms.MzMLFile()
    fileoptions = mzFile.getOptions()
    fileoptions.setCompression(True)
    mzFile.setOptions(fileoptions)
    mzFile.store(options.outfile,expNew)
    print "finished"
Пример #25
0
def read_mzml_or_mzxml_impl(path, psms, theoretical, max_delta_ppm, filetype):
    assert filetype in ('mzml', 'mzxml')
    fh = po.MzMLFile() if filetype == 'mzml' else po.MzXMLFile()
    fh.setLogType(po.LogType.CMD)
    input_map = po.MSExperiment()
    fh.load(path, input_map)

    peaks_list = []
    for scan_id, modified_peptide, precursor_charge in psms.itertuples(
            index=None):
        peaks_list.append(
            psm_df(input_map, theoretical, max_delta_ppm, scan_id,
                   modified_peptide, precursor_charge))

    if len(peaks_list) > 0:
        reps = np.array([e[0] for e in peaks_list])
        transitions = pd.DataFrame({
            'fragment':
            np.concatenate([e[1] for e in peaks_list]),
            'product_mz':
            np.concatenate([e[2] for e in peaks_list]),
            'intensity':
            np.concatenate([e[3] for e in peaks_list]),
            'scan_id':
            np.repeat([e[4] for e in peaks_list], reps),
            'precursor_mz':
            np.repeat([e[5] for e in peaks_list], reps),
            'modified_peptide':
            np.repeat([e[6] for e in peaks_list], reps),
            'precursor_charge':
            np.repeat([e[7] for e in peaks_list], reps)
        })
        # Multiple peaks might be identically annotated, only use most intense
        transitions = transitions.groupby([
            'scan_id', 'modified_peptide', 'precursor_charge', 'precursor_mz',
            'fragment', 'product_mz'
        ])['intensity'].max().reset_index()
    else:
        transitions = pd.DataFrame({
            'scan_id': [],
            'modified_peptide': [],
            'precursor_charge': [],
            'precursor_mz': [],
            'fragment': [],
            'product_mz': [],
            'intensity': []
        })
    return (transitions)
Пример #26
0
    def open_centroid_mzml(self):

        self._log('Loading centroid data from `%s`.' % self.centroid_mzml)

        # opening and reading centroided data from mzML
        self.centroid_mzml_fh = oms.MzMLFile()
        self.centroid_input_map = oms.MSExperiment()
        self.centroid_mzml_options = oms.PeakFileOptions()
        self.centroid_mzml_options.setMaxDataPoolSize(10000)
        self.centroid_mzml_options.setMSLevels([1, 1])
        self.centroid_mzml_fh.setOptions(self.centroid_mzml_options)
        self.centroid_mzml_fh.load(
            self.centroid_mzml,
            self.centroid_input_map,
        )
        self.centroid_input_map.updateRanges()
Пример #27
0
    def deisotope_spectra(self,
                          infile,
                          in_type="MGF",
                          n_jobs=-1,
                          return_type="spectrum",
                          show_progress=False):
        """
        Function to deisotope spectra
        """

        #process a MGF
        if in_type == "MGF":
            MGF_file = PFR.MGF_Reader()
            MGF_file.load(infile)

            results_store = Parallel(n_jobs=n_jobs)\
                (delayed(self.parallel_helper)(spectrum, return_type, show_progress, ii) for ii, spectrum in enumerate(MGF_file))

        elif in_type.lower() == "mzml":
            mzml_file = oms.MzMLFile()
            exp = oms.MSExperiment()
            mzml_file.load(infile, exp)

            #get the MS2 spectra
            spectra_PFR = []
            for spectrum in exp:
                if spectrum.getMSLevel() == 2:
                    spectra_PFR.append(
                        PFR.MS2_spectrum(
                            spectrum.getNativeID(), spectrum.getRT(),
                            spectrum.getPrecursors()[0].getMZ(),
                            spectrum.getPrecursors()[0].getIntensity(),
                            spectrum.getPrecursors()[0].getCharge(),
                            np.matrix(spectrum.get_peaks()).transpose()))

            results_store = Parallel(n_jobs=n_jobs)\
                (delayed(self.parallel_helper)(spectrum, return_type, show_progress, ii) for ii, spectrum in enumerate(spectra_PFR))
        else:
            print("In type is not supported.")
            sys.exit()

        if return_type == "df":
            results_store_df = pd.concat(results_store)
            return (results_store_df)

        else:
            return (results_store)
def main(options):

    # generate fragmentationtype lookup
    lookup = {}
    methods = pyopenms.ActivationMethod()
    for attr in dir(methods):
        value = getattr(methods, attr)
        if isinstance(value, int):
            lookup[value] = attr

    print "loading MS Experiment "
    exp = pyopenms.MSExperiment()
    fh = pyopenms.FileHandler()
    fh.loadExperiment(options.infile, exp)

    print "getting fragment spectra types:"
    fragmentationTypes = set()
    for s in exp:
        if s.getMSLevel() != 1:
            typ = getSpectrumType(s, lookup)
            fragmentationTypes.add(typ)

    # writing new files
    filepart, suffix = os.path.splitext(options.outfile)
    filenames = []
    for typ in fragmentationTypes:
        expNew = pyopenms.MSExperiment()
        for s in exp:
            if s.getMSLevel() == 1 or getSpectrumType(s, lookup) == typ:
                expNew.addSpectrum(s)

        print "saving file"
        mzFile = pyopenms.MzMLFile()
        fileoptions = mzFile.getOptions()
        fileoptions.setCompression(True)
        mzFile.setOptions(fileoptions)
        name = filepart + "_" + typ + ".mzML"
        filenames.append(name)
        mzFile.store(name, expNew)
        del expNew

    print "create zip file", options.outfile
    zipFile = zipfile.ZipFile(options.outfile, "w", allowZip64=True)
    for name in filenames:
        zipFile.write(name, os.path.basename(name))
    zipFile.close()
    print "finished"
Пример #29
0
def main(options):

    # generate fragmentationtype lookup
    lookup = {}
    methods = pyopenms.ActivationMethod()
    for attr in dir(methods):
        value = getattr(methods, attr)
        if isinstance(value, int):
            lookup[value] = attr

    print "loading MS Experiment "
    exp = pyopenms.MSExperiment()
    fh = pyopenms.FileHandler()
    fh.loadExperiment(options.infile, exp)

    print "getting fragment spectra types:"
    fragmentationTypes = {}
    for s in exp:
        if s.getMSLevel() != 1:
            typ = getSpectrumType(s, lookup)
            fragmentationTypes[typ] = fragmentationTypes.get(typ, 0) + 1
    print "found the following spectra types:"
    for typ in fragmentationTypes:
        print "typ '" + typ + "' with " + str(
            fragmentationTypes[typ]) + " spectra"

    print "extracting all " + str(
        fragmentationTypes.get(
            options.extractType,
            0)) + " spectra with type " + options.extractType

    expNew = pyopenms.MSExperiment()
    for s in exp:
        if s.getMSLevel() == 1 or getSpectrumType(
                s, lookup) == options.extractType:
            expNew.addSpectrum(s)

    print "saving file"
    mzFile = pyopenms.MzMLFile()
    fileoptions = mzFile.getOptions()
    fileoptions.setCompression(True)
    mzFile.setOptions(fileoptions)

    mzFile.store(options.outfile, expNew)

    print "finished"
Пример #30
0
 def __init__(self, path2file: str) -> MzMLExperiment:
     """class constructor, constructs an MzML object from an MzML file
     
     :param path2file: the path to the mzML file 
     :type path2file: str
     :raises IOError: In case loading the file failed 
     :return: a new object constructed from the provided file 
     :rtype: MzMLExperiment
     """
     self.exp = poms.MSExperiment()
     self._spectra_tree = None
     try:
         poms.MzMLFile().load(path2file, self.exp)
     except RuntimeError as exp:
         raise IOError(
             f'While loading the input file: {path2file} the following error was encountered: {exp}'
         )