def toMSSpectrum(self): """converts to pyopenms.MSSpectrum""" spec = pyopenms.MSSpectrum() spec.setRT(self.rt) spec.setMSLevel(self.msLevel) ins = spec.getInstrumentSettings() pol = { '0': pyopenms.IonSource.Polarity.POLNULL, '+': pyopenms.IonSource.Polarity.POSITIVE, '-': pyopenms.IonSource.Polarity.NEGATIVE }[self.polarity] ins.setPolarity(pol) spec.setInstrumentSettings(ins) oms_pcs = [] for mz, I in self.precursors: p = pyopenms.Precursor() p.setMZ(mz) p.setIntensity(I) oms_pcs.append(p) spec.setPrecursors(oms_pcs) if IS_PYOPENMS_2: mz = self.peaks[:, 0] I = self.peaks[:, 1] spec.set_peaks((mz, I)) else: spec.set_peaks(self.peaks) spec.updateRanges() return spec
def _write_spectra_mzml(filename: str, spectra: Iterable[sus.MsmsSpectrum]) \ -> None: """ Write the given spectra to an mzML file. Parameters ---------- filename : str The mzML file name where the spectra will be written. spectra : Iterable[sus.MsmsSpectrum] The spectra to be written to the mzML file. """ experiment = pyopenms.MSExperiment() for spectrum in tqdm.tqdm(spectra, desc='Spectra written', unit='spectra'): mzml_spectrum = pyopenms.MSSpectrum() mzml_spectrum.setMSLevel(2) mzml_spectrum.setNativeID(spectrum.identifier) precursor = pyopenms.Precursor() precursor.setMZ(spectrum.precursor_mz) precursor.setCharge(spectrum.precursor_charge) mzml_spectrum.setPrecursors([precursor]) mzml_spectrum.set_peaks([spectrum.mz, spectrum.intensity]) if hasattr(spectrum, 'retention_time'): mzml_spectrum.setRT(spectrum.retention_time) if hasattr(spectrum, 'filename'): mzml_spectrum.setMetaValue('filename', str.encode(spectrum.filename)) if hasattr(spectrum, 'scan'): mzml_spectrum.setMetaValue('scan', str.encode(str(spectrum.scan))) if hasattr(spectrum, 'cluster'): mzml_spectrum.setMetaValue('cluster', str.encode(str(spectrum.cluster))) experiment.addSpectrum(mzml_spectrum) pyopenms.MzMLFile().store(filename, experiment)
def handle_compressed_frame(allmz, allint, allim, mslevel, rtime, center, width): mz = np.concatenate(allmz) intens = np.concatenate(allint) ims = np.concatenate(allim) fda = pyopenms.FloatDataArray() fda.setName("Ion Mobility") fda.resize(len(mz)) for k, val in enumerate(ims): fda[k] = val sframe = pyopenms.MSSpectrum() sframe.setMSLevel(mslevel) sframe.setRT(rtime) sframe.setFloatDataArrays([fda]) p = pyopenms.Precursor() if mslevel == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) sframe.setPrecursors([p]) sframe.set_peaks((mz, intens)) sframe.sortByPosition() return sframe
def run_list_conversions(self): pc = pyopenms.Precursor() allpcs = 500 * [pc] li = [] for i in range(500): if (i + 1) % 100 == 0: show_mem("%4d runs" % i) spec = pyopenms.MSSpectrum() spec.setPrecursors(allpcs) spec.setPrecursors(allpcs) li.append(spec) del spec del li
def testPrecursor(): """ @tests: Precursor.__init__ Precursor.getIntensity Precursor.getMZ Precursor.setIntensity Precursor.setMZ """ pc = pyopenms.Precursor() pc.setMZ(123.0) pc.setIntensity(12.0) assert pc.getMZ() == 123.0 assert pc.getIntensity() == 12.0
def getSwathExperiment(nr_scans, nr_swathes, precusorsisolation): exp = pyopenms.MSExperiment() for spec_cnt in range(1, nr_scans): ms1_spec = pyopenms.MSSpectrum() ms1_spec.setMSLevel(1) ms1_spec.setRT(spec_cnt * 10) middle_scan = nr_scans / 2 intensity = norm.pdf((spec_cnt - middle_scan) / 4.0) pk_list = [[500.01, intensity * 3000], [510.05, intensity * 3000]] peaks = numpy.array(pk_list, dtype=numpy.float32) ms1_spec.set_peaks(peaks) exp.addSpectrum(ms1_spec) # Swath 1: 500.01, 500.15, 500.25 # Swath 2: 501.01, 501.15, 501.25 # Swath 3: 502.01, 502.15, 502.25 # Swath 4: 504.01, 504.15, 504.25 # Swath 5: 505.01, 505.15, 505.25 for i in range(nr_swathes): middle_scan = nr_scans / 2 + i # shift the middle of the gaussian by one in each scan intensity = norm.pdf((spec_cnt - middle_scan) / 4.0) intensity *= 1.2 # 20% higher intensity in each swath spec = pyopenms.MSSpectrum() spec.setMSLevel(2) spec.setRT(spec_cnt * 10 + i + 1) prec = pyopenms.Precursor() if precusorsisolation == "OpenSwath": prec.setIsolationWindowLowerOffset(400 + i * 25) prec.setIsolationWindowUpperOffset(425 + i * 25) elif precusorsisolation == "Pwiz": prec.setIsolationWindowLowerOffset(12.5) prec.setIsolationWindowUpperOffset(12.5) elif precusorsisolation == "Missing": pass else: raise Exception( "precusorsisolation needs to be {Missing,Pwiz,OpenSwath}") prec.setMZ(400 + i * 25 + 12.5) spec.setPrecursors([prec]) pk_list = [[500.01 + i, intensity * 3000], [500.15 + i, intensity * 3000 / 2.0], [500.25 + i, intensity * 3000 / 3.0]] peaks = numpy.array(pk_list, dtype=numpy.float32) spec.set_peaks(peaks) exp.addSpectrum(spec) return exp
def store_frame(frame_id, td, conn, exp, verbose=False, compressFrame=True): """ Store a single frame as an individual mzML file Note that there are two ways to store the data: (i) Multiple spectra per frame (for visualization), compressFrame is False. This is the easiest way to visualize and process the data but involves a few hacks, namely storing the IM axis as the RT of each spectrum. (ii) One spectrum per frame, compressFrame is True. This puts all peaks into a single spectrum (while storing the IM data in an extra array). This is more efficient for storage and allows analysis that is ignorant of the IM dimension. Note that msms = 2 means that we have an MS2 scan whereas msms = 8 stands for pasef scan. """ # Get a projected mass spectrum: q = conn.execute( "SELECT NumScans, Time, Polarity, MsMsType FROM Frames WHERE Id={0}". format(frame_id)) tmp = q.fetchone() num_scans = tmp[0] time = tmp[1] pol = tmp[2] msms = int(tmp[3]) center = -1 width = -1 next_scan_switch = -1 mslevel = 1 scan_data = [] scan_data_it = 0 # Check whether we have a MS2 or a PASEF scan if msms == 2: q = conn.execute( "SELECT TriggerMass, IsolationWidth, PrecursorCharge, CollisionEnergy FROM FrameMsMsInfo WHERE Frame={0}" .format(frame_id)) tmp = q.fetchone() center = float(tmp[0]) width = float(tmp[1]) mslevel = 2 elif msms == 8: q = conn.execute( "SELECT IsolationMz, IsolationWidth, ScanNumBegin, ScanNumEnd, CollisionEnergy FROM PasefFrameMsMsInfo WHERE Frame={0} ORDER BY IsolationMz ASC" .format(frame_id)) scandata = q.fetchall() tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) next_scan_switch = scan_start mslevel = 2 if verbose: print "mslevel", mslevel, msms # Get the mapping of the ion mobility axis scan_number_axis = np.arange(num_scans, dtype=np.float64) ook0_axis = td.scanNumToOneOverK0(frame_id, scan_number_axis) allmz = [] allint = [] allim = [] # Traverse in reversed order to get low ion mobilities first for k, scan in reversed( list(enumerate(td.readScans(frame_id, 0, num_scans)))): index = np.array(scan[0], dtype=np.float64) mz = td.indexToMz(frame_id, index) intens = scan[1] drift_time = ook0_axis[k] if compressFrame: allmz.append(mz) allint.append(intens) allim.append([drift_time for dr_time in mz]) # We have multiple MS2 spectra in each frame, we need to separate # them based on the information from PasefFrameMsMsInfo which # indicates the switch scan and the isolation parameter for each # quadrupole isolation. if next_scan_switch != -1 and next_scan_switch == k: if verbose: print "Switch to new scan at", k, "with mapping", scandata sframe = handle_compressed_frame(allmz, allint, allim, mslevel, time, center, width) exp.consumeSpectrum(sframe) allmz = [] allint = [] allim = [] if k == 0: continue scan_data_it += 1 tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) next_scan_switch = scan_start continue # Store data in OpenMS Spectrum file -> each TOF push is an individual # spectrum and we store the ion mobility in the precursor. The frame # can be reconstructed by grouping all spectra with the same RT. s = pyopenms.MSSpectrum() s.setMSLevel(mslevel) s.set_peaks((mz, intens)) s.setRT(time) p = pyopenms.Precursor() p.setDriftTime(drift_time) if mslevel == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) s.setPrecursors([p]) exp.consumeSpectrum(s) if compressFrame and next_scan_switch == -1: sframe = handle_compressed_frame(allmz, allint, allim, mslevel, time, center, width) exp.consumeSpectrum(sframe)
print "Will filter with criteria ", filter_criteria, "(inverse: %s)" % inverse chroms_out = [] try: # PyMzML path import pymzml exp2 = pyopenms.MSExperiment() run = pymzml.run.Reader(infile, build_index_from_scratch=True) for key in run.info['offsets'].keys(): if key == "indexList": continue if key == "TIC": continue if (inverse and not re.search(filter_criteria, key)) \ or (not inverse and re.search(filter_criteria, key)): c = pyopenms.MSChromatogram() c.setNativeID(str(key)) pr = pyopenms.Precursor() chrom = run[key] #try: pr.setMZ(chrom["precursors"][0]["mz"]) #except Exception: pass c.setPrecursor(pr) timea = numpy.array(chrom.time, dtype=numpy.float32) inta = numpy.array(chrom.i, dtype=numpy.float32) peaks = numpy.ndarray(shape=(len(timea), 2), dtype=numpy.float32) peaks[:, 0] = timea peaks[:, 1] = inta c.set_peaks(peaks) chroms_out.append(c) except ImportError:
def store_frame(frame_id, td, conn, exp, verbose=False, compressFrame=True, keep_frames=False): """ Store a single frame as an individual mzML file Note that there are two ways to store the data: (i) Multiple spectra per frame (for visualization), compressFrame is False. This is the easiest way to visualize and process the data but involves a few hacks, namely storing the IM axis as the RT of each spectrum. (ii) One spectrum per frame, compressFrame is True. This puts all peaks into a single spectrum (while storing the IM data in an extra array). This is more efficient for storage and allows analysis that is ignorant of the IM dimension. Note that msms = 2 means that we have an MS2 scan whereas msms = 8 stands for pasef scan. (New tdf 5.1 has msms = 9 for pasef scan) """ # Get a projected mass spectrum: q = conn.execute( "SELECT NumScans, Time, Polarity, MsMsType FROM Frames WHERE Id={0}". format(frame_id)) tmp = q.fetchone() num_scans = tmp[0] time = tmp[1] pol = tmp[2] msms = int(tmp[3]) center = -1 width = -1 next_scan_switch = -1 mslevel = 1 scan_data = [] scan_data_it = 0 in_scan = False scandata = None # Check whether we have a MS2 or a PASEF scan if msms == 2: q = conn.execute( "SELECT TriggerMass, IsolationWidth, PrecursorCharge, CollisionEnergy FROM FrameMsMsInfo WHERE Frame={0}" .format(frame_id)) tmp = q.fetchone() center = float(tmp[0]) width = float(tmp[1]) mslevel = 2 # new tdf 5.1 has pasef scan msms = 9 elif msms == 9: q = conn.execute( "SELECT IsolationMz, IsolationWidth, ScanNumBegin, ScanNumEnd, CollisionEnergy, Frame FROM DiaFrameMsMsWindows INNER JOIN DiaFrameMsMsInfo ON DiaFrameMsMsWindows.WindowGroup = DiaFrameMsMsInfo.WindowGroup WHERE Frame={0} ORDER BY ScanNumBegin DESC" .format(frame_id)) scandata = q.fetchall() tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) next_scan_switch = scan_end # Check if we already are in the new scan (if there is no # gap between scans, happens for diaPASEF): if next_scan_switch == num_scans: next_scan_switch = scan_start in_scan = True mslevel = 2 elif msms == 8: q = conn.execute( "SELECT IsolationMz, IsolationWidth, ScanNumBegin, ScanNumEnd, CollisionEnergy FROM PasefFrameMsMsInfo WHERE Frame={0} ORDER BY ScanNumBegin DESC" .format(frame_id)) scandata = q.fetchall() tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) next_scan_switch = scan_end # Check if we already are in the new scan (if there is no # gap between scans, happens for diaPASEF): if next_scan_switch == num_scans: next_scan_switch = scan_start in_scan = True mslevel = 2 else: # MS1 pass if verbose: print("Frame", frame_id, "mslevel", mslevel, msms, "contains nr scans:", num_scans, "and nr pasef scans", len(scandata) if scandata else -1) print("Scandata for PASEF:", scandata) if keep_frames: next_scan_switch = -1 # Get the mapping of the ion mobility axis scan_number_axis = np.arange(num_scans, dtype=np.float64) ook0_axis = td.scanNumToOneOverK0(frame_id, scan_number_axis) nr_scans_created = 0 allmz = [] allint = [] allim = [] # Traverse in reversed order to get low ion mobilities first (and high scan times first) for k, scan in reversed( list(enumerate(td.readScans(frame_id, 0, num_scans)))): index = np.array(scan[0], dtype=np.float64) mz = td.indexToMz(frame_id, index) intens = scan[1] drift_time = ook0_axis[k] if compressFrame: allmz.append(mz) allint.append(intens) allim.append([drift_time for dr_time in mz]) # We have multiple MS2 spectra in each frame, we need to separate # them based on the information from PasefFrameMsMsInfo which # indicates the switch scan and the isolation parameter for each # quadrupole isolation. if next_scan_switch >= 0 and next_scan_switch >= k: if verbose: print("Switch to new scan at", k, "/", next_scan_switch, "store scan of size", len(allmz)) if in_scan: # Only store spectrum when actually inside a scan, skip the "between scan" pushes sframe = handle_compressed_frame(allmz, allint, allim, mslevel, time, center, width) sframe.setNativeID("frame=%s_scan=%s" % (frame_id, next_scan_switch)) exp.consumeSpectrum(sframe) nr_scans_created += 1 allmz = [] allint = [] allim = [] if k == 0: continue if in_scan: scan_data_it += 1 if scan_data_it >= len(scandata): if verbose: print( "LEFT the last scan, nothing else to do here") next_scan_switch = -2 continue # Already prepare for next scan tmp = scandata[scan_data_it] center = float(tmp[0]) width = float(tmp[1]) scan_start = int(tmp[2]) scan_end = int(tmp[3]) in_scan = False next_scan_switch = scan_end if verbose: print("LEAVING scan now, next scan starts at:", next_scan_switch) # Check if we already are in the new scan (if there is no # gap between scans, happens for diaPASEF): if k == next_scan_switch: if verbose: print("STARTING new scan immediately at", k, ":", center - width / 2.0, center + width / 2.0, "scan will end at:", next_scan_switch) next_scan_switch = scan_start in_scan = True else: in_scan = True next_scan_switch = scan_start if verbose: print("STARTING new scan at", k, ":", center - width / 2.0, center + width / 2.0, "scan will end at:", next_scan_switch) continue # Store data in OpenMS Spectrum file -> each TOF push is an individual # spectrum and we store the ion mobility in the precursor. The frame # can be reconstructed by grouping all spectra with the same RT. s = pyopenms.MSSpectrum() s.setMSLevel(mslevel) s.set_peaks((mz, intens)) s.setRT(time) s.setNativeID("frame=%s spec %s" % (frame_id, k)) p = pyopenms.Precursor() p.setDriftTime(drift_time) if mslevel == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) s.setPrecursors([p]) exp.consumeSpectrum(s) # Store data compressed for cases where the whole frame represents a single spectrum (e.g. MS1) if compressFrame and next_scan_switch == -1: sframe = handle_compressed_frame(allmz, allint, allim, mslevel, time, center, width) sframe.setNativeID("frame=%s" % frame_id) exp.consumeSpectrum(sframe) nr_scans_created += 1 if scandata is not None and (nr_scans_created != len(scandata)): raise Exception("Something went quite wrong here, we expected", len(scandata), "scans, but only created", nr_scans_created)
def store_frame(frame_id, td, conn, exp, verbose=False, compressFrame=True): """ Store a single frame as an individual mzML file Note that there are two ways to store the data: (i) Multiple spectra per frame (for visualization), compressFrame is False. This is the easiest way to visualize and process the data but involves a few hacks, namely storing the IM axis as the RT of each spectrum. (ii) One spectrum per frame, compressFrame is True. This puts all peaks into a single spectrum (while storing the IM data in an extra array). This is more efficient for storage and allows analysis that is ignorant of the IM dimension. """ # Get a projected mass spectrum: q = conn.execute("SELECT NumScans, Time, Polarity, MsMsType FROM Frames WHERE Id={0}".format(frame_id)) tmp = q.fetchone() num_scans = tmp[0] time = tmp[1] pol = tmp[2] msms = int(tmp[3]) center = -1 width = -1 mslevel = 1 if msms == 2: q = conn.execute("SELECT TriggerMass, IsolationWidth, PrecursorCharge, CollisionEnergy FROM FrameMsMsInfo WHERE Frame={0}".format(frame_id)) tmp = q.fetchone() center = float(tmp[0]) width = float(tmp[1]) mslevel = 2 if verbose: print "mslevel", mslevel, msms # Get the mapping of the ion mobility axis scan_number_axis = np.arange(num_scans, dtype=np.float64) ook0_axis = td.scanNumToOneOverK0(frame_id, scan_number_axis) allmz = [] allint = [] allim = [] # Traverse in reversed order to get low ion mobilities first for k, scan in reversed(list(enumerate(td.readScans(frame_id, 0, num_scans)))): index = np.array(scan[0], dtype=np.float64) mz = td.indexToMz(frame_id, index) intens = scan[1] drift_time = ook0_axis [k] if compressFrame: allmz.append(mz) allint.append(intens) allim.append([drift_time for k in mz]) continue # Store data in OpenMS Spectrum file -> each TOF push is an individual # spectrum and we store the ion mobility in the precursor. The frame # can be reconstructed by grouping all spectra with the same RT. s = pyopenms.MSSpectrum() s.setMSLevel(mslevel) s.set_peaks( (mz, intens) ) s.setRT(time) p = pyopenms.Precursor() p.setDriftTime(drift_time) if msms == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) s.setPrecursors([p]) exp.consumeSpectrum(s) if compressFrame: mz = np.concatenate(allmz) intens = np.concatenate(allint) ims = np.concatenate(allim) # print " leeen", len(mz), len(intens) fda = pyopenms.FloatDataArray() fda.setName("Ion Mobility") fda.resize(len(mz)) for k,val in enumerate(ims): fda[k] = val sframe = pyopenms.MSSpectrum() sframe.setMSLevel(mslevel) sframe.setRT(time) sframe.setFloatDataArrays([fda]) p = pyopenms.Precursor() if msms == 2: p.setMZ(center) p.setIsolationWindowUpperOffset(width / 2.0) p.setIsolationWindowLowerOffset(width / 2.0) sframe.setPrecursors([p]) sframe.set_peaks( (mz, intens) ) sframe.sortByPosition() exp.consumeSpectrum(sframe)
def load(self, ifname: str, peakMap: pyopenms.MSExperiment): inF = open(ifname, 'r') lines = inF.read().splitlines() curLine = 0 nLines = len(lines) #generate spectrum list spectraList = list() while curLine < nLines: if lines[curLine] == 'BEGIN IONS': spectrum = pyopenms.MSSpectrum() spectrum.setMSLevel(2) precursor = pyopenms.Precursor() curLine += 1 while curLine < nLines: if lines[curLine][0].isalpha(): match = re.search('^([A-Z]+)=(.+)$', lines[curLine]) if match.group(1) == 'TITLE': titleData = match.group(2).split(',') for s in titleData: if re.search('^scan[_=]', s): match = re.search('^scan[_=]([0-9]+)', s) assert (len(match.groups()) == 1) spectrum.setNativeID('scan={}'.format( match.group(1))) elif match.group(1) == 'PEPMASS': preMZ = [ float(x) for x in match.group(2).split(' ') ] assert (len(preMZ) <= 2) precursor.setMZ(preMZ[0]) if len(preMZ) > 1: precursor.setIntensity(preMZ[1]) elif match.group(1) == 'CHARGE': match = re.search('^([0-9])[+-]{0,1}$', match.group(2)) assert (len(match.groups()) == 1) precursor.setCharge(int(match.group(1))) elif match.group(1) == 'RTINSECONDS': spectrum.setRT(float(match.group(2))) elif lines[curLine][0].isnumeric(): while curLine < nLines and lines[curLine] != 'END IONS': ion = [float(x) for x in lines[curLine].split(' ')] assert (len(ion) == 2) ion_temp = pyopenms.Peak1D() ion_temp.setMZ(ion[0]) ion_temp.setIntensity(ion[1]) spectrum.push_back(ion_temp) curLine += 1 break curLine += 1 spectrum.setPrecursors([precursor]) spectraList.append(spectrum) curLine += 1 peakMap.setSpectra(spectraList)