def BillerBiemann(im, points=3, scans=1): """ @summary: BillerBiemann Deconvolution Deconvolution based on the algorithm of Biller and Biemann (1974) @param im: An IntensityMatrix object @type im: pyms.GCMS.Class.IntensityMatrix @param points: Peak if maxima over 'points' number of scans (Default 3) @type points: IntType @param scans: To compensate for spectra skewing, peaks from 'scans' scans are combined (Default 1). @type scans: IntType @return: List of Peak objects @rtype: ListType @author: Andrew Isaac """ rt_list = im.get_time_list() mass_list = im.get_mass_list() peak_list = [] maxima_im = get_maxima_matrix(im, points, scans) numrows = len(maxima_im) for row in range(numrows): if sum(maxima_im[row]) > 0: rt = rt_list[row] ms = MassSpectrum(mass_list, maxima_im[row]) peak = Peak(rt, ms) peak.set_pt_bounds([0, row, 0]) # store IM index for convenience peak_list.append(peak) return peak_list
def normalize_mass_spec(mass_spec: MassSpectrum, relative_to: Optional[float] = None, inplace: bool = False, max_intensity: float = 100) -> MassSpectrum: """ Normalize the intensities in the given Mass Spectrum to values between 0 and ``max_intensity``, which by default is 100.0. :param mass_spec: The Mass Spectrum to normalize :type mass_spec: :class:`pyms.Spectrum.MassSpectrum` :param relative_to: The largest intensity in the original data set. If not None the intensities are computed relative to this value. If None the value is calculated from the mass spectrum. This can be useful when normalizing several mass spectra to each other. :type relative_to: int or float :param inplace: Whether the normalization should be applied to the :class:`~pyms.Spectrum.MassSpectrum` object given, or to a copy (default behaviour). :type inplace: bool, optional. :param max_intensity: The maximum intensity in the normalized spectrum. If omitted the range 0-100.0 is used. If an integer the normalized intensities will be integers. :type max_intensity: int, float :return: The normalized mass spectrum :rtype: :class:`pyms.Spectrum.MassSpectrum` """ if relative_to is None: relative_to = max(mass_spec.intensity_list) normalized_intensity_list = [ (x / float(relative_to)) * max_intensity for x in mass_spec.intensity_list] if isinstance(max_intensity, int): normalized_intensity_list = [round(x) for x in normalized_intensity_list] if inplace: mass_spec.intensity_list = normalized_intensity_list return mass_spec else: normalized_mass_spec = MassSpectrum(mass_spec.mass_list, normalized_intensity_list) return normalized_mass_spec
def composite_peak(peak_list, minutes=False): """ @summary: Create a peak that consists of a composite spectrum from all spectra in the list of peaks. @param peak_list: A list of peak objects @type peak_list: ListType @param minutes: Return retention time as minutes @type minutes: BooleanType @return: Peak Object with combined mass spectra of 'peak_list' @type: pyms.Peak.Class.Peak @author: Andrew Isaac """ first = True count = 0 avg_rt = 0 new_ms = None for peak in peak_list: if peak is not None: ms = peak.get_mass_spectrum() spec = numpy.array(ms.mass_spec, dtype='d') if first: avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d') mass_list = ms.mass_list first = False # scale all intensities to [0,100] max_spec = max(spec) / 100.0 if max_spec > 0: spec = spec / max_spec else: spec = spec * 0 avg_rt += peak.get_rt() avg_spec += spec count += 1 if count > 0: avg_rt = avg_rt / count #if minutes == True: #avg_rt = avg_rt/60.0 avg_spec = avg_spec / count avg_spec = avg_spec.tolist() # list more compact than ndarray new_ms = MassSpectrum(mass_list, avg_spec) return Peak(avg_rt, new_ms, minutes) else: return None
def fill_peaks(data, peak_list, D, minutes=False): """ @summary: Gets the best matching Retention Time and spectra from 'data' for each peak in the peak list. @param data: A data IntensityMatrix that has the same mass range as the peaks in the peak list @type data: pyms.GCMS.Class.IntensityMatrix @param peak_list: A list of peak objects @type peak_list: ListType @param D: Peak width standard deviation in seconds. Determines search window width. @type D: FloatType @param minutes: Return retention time as minutes @type minutes: BooleanType @return: List of Peak Objects @type: ListType @author: Andrew Isaac """ # Test for best match in range where RT weight is greater than _TOL _TOL = 0.001 cutoff = D * math.sqrt(-2.0 * math.log(_TOL)) # Penalise for neighboring peaks # reweight so RT weight at nearest peak is _PEN _PEN = 0.5 datamat = data.get_matrix_list() mass_list = data.get_mass_list() datatimes = data.get_time_list() minrt = min(datatimes) maxrt = max(datatimes) rtl = 0 rtr = 0 new_peak_list = [] for ii in xrange(len(peak_list)): spec = peak_list[ii].get_mass_spectrum().mass_spec spec = numpy.array(spec, dtype='d') rt = peak_list[ii].get_rt() spec_SS = numpy.sum(spec**2, axis=0) # get neighbour RT's if ii > 0: rtl = peak_list[ii - 1].rt if ii < len(peak_list) - 1: rtr = peak_list[ii + 1].rt # adjust weighting for neighbours rtclose = min(abs(rt - rtl), abs(rt - rtr)) Dclose = rtclose / math.sqrt(-2.0 * math.log(_PEN)) if Dclose > 0: Dclose = min(D, Dclose) else: Dclose = D # Get bounds rtlow = rt - cutoff if rtlow < minrt: rtlow = minrt lowii = data.get_index_at_time(rtlow) rtup = rt + cutoff if rtup > maxrt: rtup = maxrt upii = data.get_index_at_time(rtup) # Get sub matrix of scans in bounds submat = datamat[lowii:upii + 1] submat = numpy.array(submat, dtype='d') subrts = datatimes[lowii:upii + 1] subrts = numpy.array(subrts, dtype='d') submat_SS = numpy.sum(submat**2, axis=1) # transpose spec (as matrix) for dot product spec = numpy.transpose([spec]) # dot product on rows toparr = numpy.dot(submat, spec) botarr = numpy.sqrt(spec_SS * submat_SS) # convert back to 1-D array toparr = toparr.ravel() # scaled dot product of each scan cosarr = toparr / botarr # RT weight of each scan rtimearr = numpy.exp(-((subrts - rt) / float(Dclose))**2 / 2.0) # weighted scores scorearr = cosarr * rtimearr # index of best score best_ii = scorearr.argmax() # Add new peak bestrt = subrts[best_ii] bestspec = submat[best_ii].tolist() ms = MassSpectrum(mass_list, bestspec) new_peak_list.append(Peak(bestrt, ms, minutes)) return new_peak_list
def composite_peak(peak_list, minutes=False): """ @summary: Create a peak that consists of a composite spectrum from all spectra in the list of peaks @param peak_list: A list of peak objects @type peak_list: ListType @param minutes: Return retention time as minutes @type minutes: BooleanType @return: Peak Object with combined mass spectra of 'peak_list' @type: pyms.Peak.Class.Peak @author: Andrew Isaac @author: David Kainer """ first = True count = 0 avg_rt = 0 new_ms = None # DK: first mark peaks in the list that are outliers by RT, but only if there are more than 3 peaks in the list rts = [] if len(peak_list) > 3: for peak in peak_list: rts.append( peak.get_rt() ) is_outlier = median_outliers(rts) for i, val in enumerate(is_outlier): if val: peak_list[i].isoutlier = True # DK: the average RT and average mass spec for the compo peak is now calculated from peaks that are NOT outliers. # This should improve the ability to order peaks and figure out badly aligned entries for peak in peak_list: if peak is not None and peak.check_outlier() == False: ms = peak.get_mass_spectrum() spec = numpy.array(ms.mass_spec, dtype='d') if first: avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d') mass_list = ms.mass_list first = False # scale all intensities to [0,100] max_spec = max(spec)/100.0 if max_spec > 0: spec = spec/max_spec else: spec = spec*0 avg_rt += peak.get_rt() avg_spec += spec count += 1 if count > 0: avg_rt = avg_rt/count if minutes == True: avg_rt = avg_rt/60.0 avg_spec = avg_spec/count avg_spec = avg_spec.tolist() # list more compact than ndarray new_ms = MassSpectrum(mass_list, avg_spec) return Peak(avg_rt, new_ms, minutes) else: return None