def test0(): fh = pyopenms.MzXMLFile() here = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(here, "test2.mzXML").encode() class Consumer(object): def __init__(self): self.speclevels = [] self.rts = [] def consumeSpectrum(self, spec): self.speclevels.append(spec.getMSLevel()) self.rts.append(spec.getRT()) def consumeChromatogram(self, chromo): raise Exception( "should never be called as we have no chromoatograms in example file" ) def setExpectedSize(self, num_specs, num_chromo): assert num_specs == 5, num_specs assert num_chromo == 0, num_chromo def setExperimentalSettings(self, exp): assert isinstance(exp, pyopenms.ExperimentalSettings) consumer = Consumer() fh.transform(path, consumer) cc = Counter(consumer.speclevels) assert set(cc.keys()) == set([1, 2]) assert cc[1] == 2 assert cc[2] == 3 assert abs(min(consumer.rts) - 4200.76) < 0.01 assert abs(max(consumer.rts) - 4202.03) < 0.01
def __get_mzml(self, file): b_content = file.bcore if self.ext == 'raw': self.tf = store_byte_in_tmp( b_content, prefix=self.fname, suffix='.RAW', directory=self.target_dir.absolute().as_posix() ) self.cmd_msconvert = self.__build_cmd_msconvert() self.__run_cmd() elif self.ext == 'mzml': self.tf = store_byte_in_tmp( b_content, prefix=self.fname, suffix='.mzML', directory=self.target_dir.absolute().as_posix() ) elif self.ext == 'mzxml': self.tf = store_byte_in_tmp( b_content, prefix=self.fname, suffix='.mzXML', directory=self.target_dir.absolute().as_posix() ) exp = pyopenms.MSExperiment() pyopenms.MzXMLFile().load(self.tf.name, exp) target_path = self.__get_mzml_path().absolute().as_posix() pyopenms.MzMLFile().store(target_path, exp)
def _getFileHandeler(iftype: FileType): if iftype == FileType.MZML: return pyopenms.MzMLFile() elif iftype == FileType.MZXML: return pyopenms.MzXMLFile() elif iftype == FileType.MS2: return MS2File() elif iftype == FileType.MGF: return MascotGenericFile() else: raise NotImplementedError('{} not implemented!'.format(iftype.value))
def get_openms_file_type(self, suffix): import pyopenms # Define file format and return corresponding pyopenms file object if suffix.lower() == '.mzxml': return pyopenms.MzXMLFile() elif suffix.lower() == '.mzml': return pyopenms.MzMLFile() elif suffix.lower() == '.mzdata': return pyopenms.MzDataFile() else: print('Data format is not supported!!')
def read_mzml_or_mzxml_impl(path, psms, theoretical, max_delta_ppm, filetype): assert filetype in ('mzml', 'mzxml') fh = po.MzMLFile() if filetype == 'mzml' else po.MzXMLFile() fh.setLogType(po.LogType.CMD) input_map = po.MSExperiment() fh.load(path, input_map) peaks_list = [] for scan_id, modified_peptide, precursor_charge in psms.itertuples( index=None): peaks_list.append( psm_df(input_map, theoretical, max_delta_ppm, scan_id, modified_peptide, precursor_charge)) if len(peaks_list) > 0: reps = np.array([e[0] for e in peaks_list]) transitions = pd.DataFrame({ 'fragment': np.concatenate([e[1] for e in peaks_list]), 'product_mz': np.concatenate([e[2] for e in peaks_list]), 'intensity': np.concatenate([e[3] for e in peaks_list]), 'scan_id': np.repeat([e[4] for e in peaks_list], reps), 'precursor_mz': np.repeat([e[5] for e in peaks_list], reps), 'modified_peptide': np.repeat([e[6] for e in peaks_list], reps), 'precursor_charge': np.repeat([e[7] for e in peaks_list], reps) }) # Multiple peaks might be identically annotated, only use most intense transitions = transitions.groupby([ 'scan_id', 'modified_peptide', 'precursor_charge', 'precursor_mz', 'fragment', 'product_mz' ])['intensity'].max().reset_index() else: transitions = pd.DataFrame({ 'scan_id': [], 'modified_peptide': [], 'precursor_charge': [], 'precursor_mz': [], 'fragment': [], 'product_mz': [], 'intensity': [] }) return (transitions)
def oms_ffmetabo_single_file(filename, max_peaks_per_file=5000): feature_map = oms.FeatureMap() mass_traces = [] mass_traces_split = [] mass_traces_filtered = [] exp = oms.MSExperiment() peak_map = oms.PeakMap() options = oms.PeakFileOptions() options.setMSLevels([1]) if filename.lower().endswith('.mzxml'): fh = oms.MzXMLFile() elif filename.lower().endswith('.mzml'): fh = oms.MzMLFile() else: assert False, filename fh.setOptions(options) # Peak map fh.load(filename, exp) #for chrom in exp.getChromatograms(): # peak_map.addChrom(chrom) for spec in exp.getSpectra(): peak_map.addSpectrum(spec) mass_trace_detect = oms.MassTraceDetection() mass_trace_detect.run(peak_map, mass_traces, max_peaks_per_file) elution_peak_detection = oms.ElutionPeakDetection() elution_peak_detection.detectPeaks(mass_traces, mass_traces_split) feature_finding_metabo = oms.FeatureFindingMetabo() feature_finding_metabo.run( mass_traces_split, feature_map, mass_traces_filtered) feature_map.sortByOverallQuality() return feature_map
def read_mzml_or_mzxml_impl(path, psms, theoretical, max_delta_ppm, filetype): assert filetype in ('mzml', 'mzxml') fh = po.MzMLFile() if filetype=='mzml' else po.MzXMLFile() fh.setLogType(po.LogType.CMD) input_map = po.MSExperiment() fh.load(path, input_map) peaks_list = [] for ix, psm in psms.iterrows(): scan_id = psm['scan_id'] ionseries = theoretical[psm['modified_peptide']][psm['precursor_charge']] spectrum = input_map.getSpectrum(scan_id - 1) fragments = [] product_mzs = [] intensities = [] for peak in spectrum: fragment, product_mz = annotate_mass(peak.getMZ(), ionseries, max_delta_ppm) if fragment is not None: fragments.append(fragment) product_mzs.append(product_mz) intensities.append(peak.getIntensity()) peaks = pd.DataFrame({'fragment': fragments, 'product_mz': product_mzs, 'intensity': intensities}) peaks['scan_id'] = scan_id peaks['precursor_mz'] = po.AASequence.fromString(po.String(psm['modified_peptide'])).getMonoWeight(po.Residue.ResidueType.Full, psm['precursor_charge']) / psm['precursor_charge']; peaks['modified_peptide'] = psm['modified_peptide'] peaks['precursor_charge'] = psm['precursor_charge'] # Baseline normalization to highest annotated peak max_intensity = np.max(peaks['intensity']) if max_intensity > 0: peaks['intensity'] = peaks['intensity'] * (10000 / max_intensity) peaks_list.append(peaks) if len(peaks_list) > 0: transitions = pd.concat(peaks_list) # Multiple peaks might be identically annotated, only use most intense transitions = transitions.groupby(['scan_id','modified_peptide','precursor_charge','precursor_mz','fragment','product_mz'])['intensity'].max().reset_index() else: transitions = pd.DataFrame({'scan_id': [], 'modified_peptide': [], 'precursor_charge': [], 'precursor_mz': [], 'fragment': [], 'product_mz': [], 'intensity': []}) return(transitions)
def testMxxxFile(): """ @tests: MzDataFile.__init__ MzDataFile.endProgress MzDataFile.getLogType MzDataFile.load MzDataFile.setLogType MzDataFile.setProgress MzDataFile.startProgress MzDataFile.store MzMLFile.__init__ MzMLFile.endProgress MzMLFile.getLogType MzMLFile.load MzMLFile.setLogType MzMLFile.setProgress MzMLFile.startProgress MzMLFile.store MzXMLFile.__init__ MzXMLFile.endProgress MzXMLFile.getLogType MzXMLFile.load MzXMLFile.setLogType MzXMLFile.setProgress MzXMLFile.startProgress MzXMLFile.store """ mse = pyopenms.MSExperiment() fh = pyopenms.MzDataFile() _testProgressLogger(fh) fh.store("test.mzData", mse) fh.load("test.mzData", mse) fh = pyopenms.MzMLFile() _testProgressLogger(fh) fh.store("test.mzML", mse) fh.load("test.mzML", mse) fh = pyopenms.MzXMLFile() _testProgressLogger(fh) fh.store("test.mzXML", mse) fh.load("test.mzXML", mse)
def readms(input_file): #only 'mzml,mzdata or mzxml' format #ms_format = re.search('\.\w+',input_file) import pyopenms ms_format = os.path.splitext(input_file)[1] #ms_format = ms_format.group() ms_format = ms_format.lower() msdata = pyopenms.MSExperiment() if ms_format == '.mzxml': file = pyopenms.MzXMLFile() elif ms_format == '.mzml': file = pyopenms.MzMLFile() elif ms_format == '.mzdata': file = pyopenms.MzDataFile() else: raise Exception('ERROR: %s is wrong format' % input_file) file.load(r'%s' % input_file, msdata) ms = [] intensity = [] rt = [] for spectrum in msdata: if spectrum.getMSLevel() == 1: rt.append(spectrum.getRT()) p_ms = [] p_intensity = [] for peak in spectrum: if peak.getIntensity() != 0: p_ms.append(peak.getMZ()) p_intensity.append(peak.getIntensity()) #print len(p_intensity) ms_index = np.argsort(-np.array(p_intensity)) ms.append(np.array(p_ms)[ms_index]) intensity.append(np.array(p_intensity)[ms_index]) #scan+=1 rt1 = np.array(rt) rt_mean_interval = np.mean(np.diff(rt1)) #print rt_mean_interval #rt_mean_interval = np.mean(rt1[1:]-rt1[:-1]) #return ms,intensity,rt,scan,rt_max_interval return ms, intensity, rt, rt_mean_interval
def main(): parser = argparse.ArgumentParser(description="PeakPickerHiRes") parser.add_argument( "-in", action="store", type=str, dest="in_", metavar="input_file", ) parser.add_argument( "-out", action="store", type=str, metavar="output_file", ) parser.add_argument( "-ini", action="store", type=str, metavar="ini_file", ) parser.add_argument( "-dict_ini", action="store", type=str, metavar="python_dict_ini_file", ) parser.add_argument( "-write_ini", action="store", type=str, metavar="ini_file", ) parser.add_argument( "-write_dict_ini", action="store", type=str, metavar="python_dict_ini_file", ) args = parser.parse_args() run_mode = args.in_ is not None and args.out is not None\ and (args.ini is not None or args.dict_ini is not None) write_mode = args.write_ini is not None or args.write_dict_ini is not None ok = run_mode or write_mode if not ok: parser.error("either specify -in, -out and -(dict)ini for running " "the peakpicker\nor -write(dict)ini for creating std " "ini file") defaults = pms.PeakPickerHiRes().getDefaults() if args.write_dict_ini or args.write_ini: if args.write_dict_ini: with open(args.write_dict_ini, "w") as fp: pprint.pprint(defaults.asDict(), stream=fp) if args.write_ini: defaults.store(args.write_ini) else: if args.ini: param = pms.Param() param.load(args.ini) defaults.update(param, False, False) elif args.dict_ini: with open(args.dict_ini, "r") as fp: try: dd = eval(fp.read()) except: raise Exception("could not parse %s" % args.dict_ini) defaults.updateFrom(dd) fh = pms.MzXMLFile() fh.setLogType(pms.LogType.CMD) input_map = pms.MSExperiment() fh.load(args.in_, input_map) run_peak_picker(input_map, defaults, args.out)
def load_ms_file(ms_experiment, path_to_file): """ Load mzML / mzXML file into given ms_experiment object, filter to MSLevel = 1 - as only those supported for peakFinders :param ms_experiment: :param path_to_file: :param set_options: :return: """ options = oms.PeakFileOptions() options.setMSLevels([1]) # MS1 is comparable to mcc-ims input # RuntimeError: FeatureFinder can only operate on MS level 1 data. Please do not use MS/MS data # support mzXML and mzML -file suffix = Path(path_to_file).suffix.lower() if suffix == ".mzxml": fh = oms.MzXMLFile() elif suffix == ".mzml": fh = oms.MzMLFile() else: raise UnsupportedGCMSFiletypeError( "Unsupported filetype. Only mzXML and mzML format is supported.") fh.setOptions(options) # load data into experiment fh.load( str(path_to_file), ms_experiment) # if problems loading - let pyopenms error bubble up ms_experiment.updateRanges() # get keys function loads the return into an empty list - very straightforward to guess # k = [] # ms_experiment[0].getKeys(k) # ms_experiment[1].getKeys(k) # ms_experiment[2].getKeys(k) # ms_experiment[3].getKeys(k) # list_functions(ms_experiment[0]) # # ms_experiment[0].getAcquisitionInfo()# # # oms.CachedmzML.store("myCache.mzML", ms_experiment) # # # Now load data # cfile = oms.CachedmzML() # oms.CachedmzML.load("myCache.mzML", cfile) # # meta_data = cfile.getMetaData() # meta_data.metaRegistry() # help(meta_data) # list_functions(meta_data) # meta_data.getKeys(k) # # # k = [] # list_functions(\ # ms_experiment[0].getDataProcessing()[0].getMetaValue(k)) # print(k) # list_functions(fh.getOptions()) # try centroided approach to pickPeaks # if not centroided, # apply smoothing and peak detection to form centroided data = peakMap / FeatureXML file return ms_experiment