def BillerBiemann(im, points=3, scans=1): """ @summary: BillerBiemann Deconvolution Deconvolution based on the algorithm of Biller and Biemann (1974) @param im: An IntensityMatrix object @type im: pyms.GCMS.Class.IntensityMatrix @param points: Peak if maxima over 'points' number of scans (Default 3) @type points: IntType @param scans: To compensate for spectra skewing, peaks from 'scans' scans are combined (Default 1). @type scans: IntType @return: List of Peak objects @rtype: ListType @author: Andrew Isaac """ rt_list = im.get_time_list() mass_list = im.get_mass_list() peak_list = [] maxima_im = get_maxima_matrix(im, points, scans) numrows = len(maxima_im) for row in range(numrows): if sum(maxima_im[row]) > 0: rt = rt_list[row] ms = MassSpectrum(mass_list, maxima_im[row]) peak = Peak(rt, ms) peak.set_pt_bounds([0, row, 0]) # store IM index for convenience peak_list.append(peak) return peak_list
def BillerBiemann(im, points=3, scans=1): """ @summary: BillerBiemann Deconvolution Deconvolution based on the algorithm of Biller and Biemann (1974) @param im: An IntensityMatrix object @type im: pyms.GCMS.Class.IntensityMatrix @param points: Peak if maxima over 'points' number of scans (Default 3) @type points: IntType @param scans: To compensate for spectra skewing, peaks from 'scans' scans are combined (Default 1). @type scans: IntType @return: List of Peak objects @rtype: ListType @author: Andrew Isaac """ rt_list = im.get_time_list() mass_list = im.get_mass_list() peak_list = [] maxima_im = get_maxima_matrix(im, points, scans) numrows = len(maxima_im) for row in range(numrows): if sum(maxima_im[row]) > 0: rt = rt_list[row] ms = MassSpectrum(mass_list, maxima_im[row]) peak = Peak(rt, ms) peak.set_pt_bounds([0,row,0]) # store IM index for convenience peak_list.append(peak) return peak_list
def BillerBiemann(im: IntensityMatrix, points: int = 3, scans: int = 1) -> List[Peak]: """ Deconvolution based on the algorithm of Biller and Biemann (1974) :param im: An :class:`~pyms.IntensityMatrix.IntensityMatrix` object :type im: ~pyms.IntensityMatrix.IntensityMatrix :param points: Number of scans over which to consider a maxima to be a peak. Default ``3`` :type points: int, optional :param scans: Number of scans to combine peaks from to compensate for spectra skewing. Default ``1`` :type scans: int, optional :return: List of detected peaks :rtype: List[:class:`pyms.Peak.Class.Peak`] :authors: Andrew Isaac, Dominic Davis-Foster (type assertions) """ if not isinstance(im, IntensityMatrix): raise TypeError("'im' must be an IntensityMatrix object") if not isinstance(points, int): raise TypeError("'points' must be an integer") if not isinstance(scans, int): raise TypeError("'scans' must be an integer") rt_list = im.time_list mass_list = im.mass_list peak_list = [] maxima_im = get_maxima_matrix(im, points, scans) numrows = len(maxima_im) for row in range(numrows): if sum(maxima_im[row]) > 0: rt = rt_list[row] ms = MassSpectrum(mass_list, maxima_im[row]) peak = Peak(rt, ms) peak.bounds = [0, row, 0] # store IM index for convenience peak_list.append(peak) return peak_list
def BillerBiemann(im: BaseIntensityMatrix, points: int = 3, scans: int = 1) -> List[Peak]: """ Deconvolution based on the algorithm of Biller and Biemann (1974). :param im: :param points: Number of scans over which to consider a maxima to be a peak. :param scans: Number of scans to combine peaks from to compensate for spectra skewing. :return: List of detected peaks :authors: Andrew Isaac, Dominic Davis-Foster (type assertions) """ if not isinstance(im, BaseIntensityMatrix): raise TypeError("'im' must be an IntensityMatrix object") if not isinstance(points, int): raise TypeError("'points' must be an integer") if not isinstance(scans, int): raise TypeError("'scans' must be an integer") rt_list = im.time_list mass_list = im.mass_list peak_list = [] maxima_im = get_maxima_matrix(im, points, scans) for row_idx, row in enumerate(maxima_im): if sum(row) > 0: rt = rt_list[row_idx] ms = MassSpectrum(mass_list, row) peak = Peak(rt, ms) peak.bounds = (0, row_idx, 0) # store IM index for convenience # TODO: can the bounds be determined from the intensity matrix? peak_list.append(peak) return peak_list
def composite_peak(peak_list, minutes=False): """ @summary: Create a peak that consists of a composite spectrum from all spectra in the list of peaks. @param peak_list: A list of peak objects @type peak_list: ListType @param minutes: Return retention time as minutes @type minutes: BooleanType @return: Peak Object with combined mass spectra of 'peak_list' @type: pyms.Peak.Class.Peak @author: Andrew Isaac """ first = True count = 0 avg_rt = 0 new_ms = None for peak in peak_list: if peak is not None: ms = peak.get_mass_spectrum() spec = numpy.array(ms.mass_spec, dtype='d') if first: avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d') mass_list = ms.mass_list first = False # scale all intensities to [0,100] max_spec = max(spec) / 100.0 if max_spec > 0: spec = spec / max_spec else: spec = spec * 0 avg_rt += peak.get_rt() avg_spec += spec count += 1 if count > 0: avg_rt = avg_rt / count #if minutes == True: #avg_rt = avg_rt/60.0 avg_spec = avg_spec / count avg_spec = avg_spec.tolist() # list more compact than ndarray new_ms = MassSpectrum(mass_list, avg_spec) return Peak(avg_rt, new_ms, minutes) else: return None
def fill_peaks(data, peak_list, D, minutes=False): """ @summary: Gets the best matching Retention Time and spectra from 'data' for each peak in the peak list. @param data: A data IntensityMatrix that has the same mass range as the peaks in the peak list @type data: pyms.GCMS.Class.IntensityMatrix @param peak_list: A list of peak objects @type peak_list: ListType @param D: Peak width standard deviation in seconds. Determines search window width. @type D: FloatType @param minutes: Return retention time as minutes @type minutes: BooleanType @return: List of Peak Objects @type: ListType @author: Andrew Isaac """ # Test for best match in range where RT weight is greater than _TOL _TOL = 0.001 cutoff = D * math.sqrt(-2.0 * math.log(_TOL)) # Penalise for neighboring peaks # reweight so RT weight at nearest peak is _PEN _PEN = 0.5 datamat = data.get_matrix_list() mass_list = data.get_mass_list() datatimes = data.get_time_list() minrt = min(datatimes) maxrt = max(datatimes) rtl = 0 rtr = 0 new_peak_list = [] for ii in xrange(len(peak_list)): spec = peak_list[ii].get_mass_spectrum().mass_spec spec = numpy.array(spec, dtype='d') rt = peak_list[ii].get_rt() spec_SS = numpy.sum(spec**2, axis=0) # get neighbour RT's if ii > 0: rtl = peak_list[ii - 1].rt if ii < len(peak_list) - 1: rtr = peak_list[ii + 1].rt # adjust weighting for neighbours rtclose = min(abs(rt - rtl), abs(rt - rtr)) Dclose = rtclose / math.sqrt(-2.0 * math.log(_PEN)) if Dclose > 0: Dclose = min(D, Dclose) else: Dclose = D # Get bounds rtlow = rt - cutoff if rtlow < minrt: rtlow = minrt lowii = data.get_index_at_time(rtlow) rtup = rt + cutoff if rtup > maxrt: rtup = maxrt upii = data.get_index_at_time(rtup) # Get sub matrix of scans in bounds submat = datamat[lowii:upii + 1] submat = numpy.array(submat, dtype='d') subrts = datatimes[lowii:upii + 1] subrts = numpy.array(subrts, dtype='d') submat_SS = numpy.sum(submat**2, axis=1) # transpose spec (as matrix) for dot product spec = numpy.transpose([spec]) # dot product on rows toparr = numpy.dot(submat, spec) botarr = numpy.sqrt(spec_SS * submat_SS) # convert back to 1-D array toparr = toparr.ravel() # scaled dot product of each scan cosarr = toparr / botarr # RT weight of each scan rtimearr = numpy.exp(-((subrts - rt) / float(Dclose))**2 / 2.0) # weighted scores scorearr = cosarr * rtimearr # index of best score best_ii = scorearr.argmax() # Add new peak bestrt = subrts[best_ii] bestspec = submat[best_ii].tolist() ms = MassSpectrum(mass_list, bestspec) new_peak_list.append(Peak(bestrt, ms, minutes)) return new_peak_list
# stdlib import os from copy import copy, deepcopy from timeit import timeit # pyms from pyms.GCMS.IO.JCAMP import JCAMP_reader from pyms.IntensityMatrix import build_intensity_matrix_i from pyms.Peak.Class import Peak data = JCAMP_reader(os.path.join("data", "ELEY_1_SUBTRACT.JDX")) im_i = build_intensity_matrix_i(data) scan_i = im_i.get_index_at_time(31.17 * 60.0) ms = im_i.get_ms_at_index(scan_i) peak = Peak(12.34, ms) def copy_peak(): return copy(peak) def deepcopy_peak(): return deepcopy(peak) print(timeit(copy_peak)) print(timeit(deepcopy_peak)) def copy_ms():
def composite_peak(peak_list, minutes=False): """ @summary: Create a peak that consists of a composite spectrum from all spectra in the list of peaks @param peak_list: A list of peak objects @type peak_list: ListType @param minutes: Return retention time as minutes @type minutes: BooleanType @return: Peak Object with combined mass spectra of 'peak_list' @type: pyms.Peak.Class.Peak @author: Andrew Isaac @author: David Kainer """ first = True count = 0 avg_rt = 0 new_ms = None # DK: first mark peaks in the list that are outliers by RT, but only if there are more than 3 peaks in the list rts = [] if len(peak_list) > 3: for peak in peak_list: rts.append( peak.get_rt() ) is_outlier = median_outliers(rts) for i, val in enumerate(is_outlier): if val: peak_list[i].isoutlier = True # DK: the average RT and average mass spec for the compo peak is now calculated from peaks that are NOT outliers. # This should improve the ability to order peaks and figure out badly aligned entries for peak in peak_list: if peak is not None and peak.check_outlier() == False: ms = peak.get_mass_spectrum() spec = numpy.array(ms.mass_spec, dtype='d') if first: avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d') mass_list = ms.mass_list first = False # scale all intensities to [0,100] max_spec = max(spec)/100.0 if max_spec > 0: spec = spec/max_spec else: spec = spec*0 avg_rt += peak.get_rt() avg_spec += spec count += 1 if count > 0: avg_rt = avg_rt/count if minutes == True: avg_rt = avg_rt/60.0 avg_spec = avg_spec/count avg_spec = avg_spec.tolist() # list more compact than ndarray new_ms = MassSpectrum(mass_list, avg_spec) return Peak(avg_rt, new_ms, minutes) else: return None
from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.Peak.Class import Peak # read file and convert to intensity matrix andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) im = build_intensity_matrix_i(data) # Get the scan of a known TIC peak (at RT 31.17 minutes) # get the index of the scan nearest to 31.17 minutes (converted to seconds) scan_i = im.get_index_at_time(31.17*60.0) # get the MassSpectrum Object ms = im.get_ms_at_index(scan_i) # create a Peak object peak = Peak(31.17, ms, minutes=True) print peak.get_UID() # modify the range and null TMS ions peak.crop_mass(60, 450) peak.null_mass(73) peak.null_mass(147) # New UID after modification print peak.get_UID() # Create another peak from an isomer of the first peak (at RT 31.44 minutes) scan_i = im.get_index_at_time(31.44*60.0) ms = im.get_ms_at_index(scan_i)
# Extract the |MassSpectrum| at 31.17 minutes in this example. # In[4]: index = im.get_index_at_time(31.17*60.0) ms = im.get_ms_at_index(index) # Create a |Peak| object for the given retention time. # In[5]: from pyms.Peak.Class import Peak peak = Peak(31.17, ms, minutes=True) # By default the retention time is assumed to be in seconds. The parameter # ``minutes`` can be set to ``True`` if the retention time is given in minutes. # Internally, PyMassSpec stores retention times in seconds, so the ``minutes`` # parameter ensures the input and output of the retention time are in the same # units. # # ## Peak Object properties # # The retention time of the peak, in seconds, can be returned with |pyms.Peak.Class.Peak.rt|. # The mass spectrum can be returned with |pyms.Peak.Class.Peak.mass_spectrum|. # # The |Peak| object constructs a unique identification (UID) based on the spectrum # and retention time. This helps in managing lists of peaks (covered in the next
from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.Peak.Class import Peak # read file and convert to intensity matrix andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) im = build_intensity_matrix_i(data) # Get the scan of a known TIC peak (at RT 31.17 minutes) # get the index of the scan nearest to 31.17 minutes (converted to seconds) scan_i = im.get_index_at_time(31.17 * 60.0) # get the MassSpectrum Object ms = im.get_ms_at_index(scan_i) # create a Peak object peak = Peak(31.17, ms, minutes=True) # Get the retention time (in seconds) print peak.get_rt() # Get the peaks unique ID # Consists of the two most abundant ions and their ratio, # and the retention time (in the format set by minutes=True or False) print peak.get_UID() # Create another peak from an isomer of the first peak (at RT 31.44 minutes) scan_i = im.get_index_at_time(31.44 * 60.0) ms = im.get_ms_at_index(scan_i) peak2 = Peak(31.44, ms, minutes=True) print peak2.get_UID()
def peak(im_i): scan_i = im_i.get_index_at_time(31.17 * 60.0) ms = im_i.get_ms_at_index(scan_i) return Peak(12.34, ms)
from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.Peak.Class import Peak # read file and convert to intensity matrix andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) im = build_intensity_matrix_i(data) # Get the scan of a known TIC peak (at RT 31.17 minutes) # get the index of the scan nearest to 31.17 minutes (converted to seconds) scan_i = im.get_index_at_time(31.17*60.0) # get the MassSpectrum Object ms = im.get_ms_at_index(scan_i) # create a Peak object peak = Peak(31.17, ms, minutes=True) # Get the retention time (in seconds) print peak.get_rt() # Get the peaks unique ID # Consists of the two most abundant ions and their ratio, # and the retention time (in the format set by minutes=True or False) print peak.get_UID() # Create another peak from an isomer of the first peak (at RT 31.44 minutes) scan_i = im.get_index_at_time(31.44*60.0) ms = im.get_ms_at_index(scan_i) peak2 = Peak(31.44, ms, minutes=True) print peak2.get_UID()
from pyms.GCMS.IO.ANDI import ANDI_reader from pyms.Peak.Class import Peak # read file and convert to intensity matrix andi_file = "data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) im = build_intensity_matrix_i(data) # Get the scan of a known TIC peak (at RT 31.17 minutes) # get the index of the scan nearest to 31.17 minutes (converted to seconds) scan_i = im.get_index_at_time(31.17*60.0) # get the MassSpectrum Object ms = im.get_ms_at_index(scan_i) # create a Peak object peak = Peak(31.17, ms, minutes=True) # Get the retention time (in seconds) print(peak.rt) # Get the peaks unique ID # Consists of the two most abundant ions and their ratio, # and the retention time (in the format set by minutes=True or False) print(peak.UID) # Create another peak from an isomer of the first peak (at RT 31.44 minutes) scan_i = im.get_index_at_time(31.44*60.0) ms = im.get_ms_at_index(scan_i) peak2 = Peak(31.44, ms, minutes=True) print(peak2.UID)
from pyms.GCMS.IO.ANDI.Function import ANDI_reader from pyms.Peak.Class import Peak # read file and convert to intensity matrix andi_file = "/x/PyMS/data/gc01_0812_066.cdf" data = ANDI_reader(andi_file) im = build_intensity_matrix_i(data) # Get the scan of a known TIC peak (at RT 31.17 minutes) # get the index of the scan nearest to 31.17 minutes (converted to seconds) scan_i = im.get_index_at_time(31.17 * 60.0) # get the MassSpectrum Object ms = im.get_ms_at_index(scan_i) # create a Peak object peak = Peak(31.17, ms, minutes=True) print peak.get_UID() # modify the range and null TMS ions peak.crop_mass(60, 450) peak.null_mass(73) peak.null_mass(147) # New UID after modification print peak.get_UID() # Create another peak from an isomer of the first peak (at RT 31.44 minutes) scan_i = im.get_index_at_time(31.44 * 60.0) ms = im.get_ms_at_index(scan_i)