Пример #1
0
def BillerBiemann(im, points=3, scans=1):
    """
    @summary: BillerBiemann Deconvolution

        Deconvolution based on the algorithm of Biller and Biemann (1974)

    @param im: An IntensityMatrix object
    @type im: pyms.GCMS.Class.IntensityMatrix
    @param points: Peak if maxima over 'points' number of scans (Default 3)
    @type points: IntType
    @param scans: To compensate for spectra skewing,
        peaks from 'scans' scans are combined (Default 1).
    @type scans: IntType

    @return: List of Peak objects
    @rtype: ListType

    @author: Andrew Isaac
    """

    rt_list = im.get_time_list()
    mass_list = im.get_mass_list()
    peak_list = []
    maxima_im = get_maxima_matrix(im, points, scans)
    numrows = len(maxima_im)
    for row in range(numrows):
        if sum(maxima_im[row]) > 0:
            rt = rt_list[row]
            ms = MassSpectrum(mass_list, maxima_im[row])
            peak = Peak(rt, ms)
            peak.set_pt_bounds([0, row, 0])  # store IM index for convenience
            peak_list.append(peak)

    return peak_list
Пример #2
0
def normalize_mass_spec(mass_spec: MassSpectrum, relative_to: Optional[float] = None, inplace: bool = False, max_intensity: float = 100) -> MassSpectrum:
	"""
	Normalize the intensities in the given Mass Spectrum to values between 0 and ``max_intensity``,
	which by default is 100.0.

	:param mass_spec: The Mass Spectrum to normalize
	:type mass_spec: :class:`pyms.Spectrum.MassSpectrum`
	:param relative_to: The largest intensity in the original data set.
		If not None the intensities are computed relative to this value.
		If None the value is calculated from the mass spectrum.
		This can be useful when normalizing several mass spectra to each other.
	:type relative_to: int or float
	:param inplace: Whether the normalization should be applied to the
		:class:`~pyms.Spectrum.MassSpectrum` object given, or to a copy (default behaviour).
	:type inplace: bool, optional.
	:param max_intensity: The maximum intensity in the normalized spectrum.
		If omitted the range 0-100.0 is used.
		If an integer the normalized intensities will be integers.
	:type max_intensity: int, float
	:return: The normalized mass spectrum
	:rtype: :class:`pyms.Spectrum.MassSpectrum`
	"""

	if relative_to is None:
		relative_to = max(mass_spec.intensity_list)

	normalized_intensity_list = [
			(x / float(relative_to)) * max_intensity
			for x in mass_spec.intensity_list]

	if isinstance(max_intensity, int):
		normalized_intensity_list = [round(x) for x in normalized_intensity_list]

	if inplace:
		mass_spec.intensity_list = normalized_intensity_list
		return mass_spec
	else:
		normalized_mass_spec = MassSpectrum(mass_spec.mass_list, normalized_intensity_list)

		return normalized_mass_spec
Пример #3
0
def composite_peak(peak_list, minutes=False):
    """
    @summary: Create a peak that consists of a composite spectrum from all
        spectra in the list of peaks.

    @param peak_list: A list of peak objects
    @type peak_list: ListType
    @param minutes: Return retention time as minutes
    @type minutes: BooleanType

    @return: Peak Object with combined mass spectra of 'peak_list'
    @type: pyms.Peak.Class.Peak

    @author: Andrew Isaac
    """

    first = True
    count = 0
    avg_rt = 0
    new_ms = None
    for peak in peak_list:
        if peak is not None:
            ms = peak.get_mass_spectrum()
            spec = numpy.array(ms.mass_spec, dtype='d')
            if first:
                avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d')
                mass_list = ms.mass_list
                first = False
            # scale all intensities to [0,100]
            max_spec = max(spec) / 100.0
            if max_spec > 0:
                spec = spec / max_spec
            else:
                spec = spec * 0
            avg_rt += peak.get_rt()
            avg_spec += spec
            count += 1
    if count > 0:
        avg_rt = avg_rt / count
        #if minutes == True:
        #avg_rt = avg_rt/60.0
        avg_spec = avg_spec / count
        avg_spec = avg_spec.tolist()  # list more compact than ndarray
        new_ms = MassSpectrum(mass_list, avg_spec)
        return Peak(avg_rt, new_ms, minutes)
    else:
        return None
Пример #4
0
def fill_peaks(data, peak_list, D, minutes=False):
    """
    @summary: Gets the best matching Retention Time and spectra from 'data' for
        each peak in the peak list.

    @param data: A data IntensityMatrix that has the same mass range as the
        peaks in the peak list
    @type data: pyms.GCMS.Class.IntensityMatrix
    @param peak_list: A list of peak objects
    @type peak_list: ListType
    @param D: Peak width standard deviation in seconds.  Determines search
        window width.
    @type D: FloatType
    @param minutes: Return retention time as minutes
    @type minutes: BooleanType

    @return: List of Peak Objects
    @type: ListType

    @author: Andrew Isaac
    """

    # Test for best match in range where RT weight is greater than _TOL
    _TOL = 0.001
    cutoff = D * math.sqrt(-2.0 * math.log(_TOL))

    # Penalise for neighboring peaks
    # reweight so RT weight at nearest peak is _PEN
    _PEN = 0.5

    datamat = data.get_matrix_list()
    mass_list = data.get_mass_list()
    datatimes = data.get_time_list()
    minrt = min(datatimes)
    maxrt = max(datatimes)
    rtl = 0
    rtr = 0
    new_peak_list = []
    for ii in xrange(len(peak_list)):
        spec = peak_list[ii].get_mass_spectrum().mass_spec
        spec = numpy.array(spec, dtype='d')
        rt = peak_list[ii].get_rt()
        spec_SS = numpy.sum(spec**2, axis=0)

        # get neighbour RT's
        if ii > 0:
            rtl = peak_list[ii - 1].rt
        if ii < len(peak_list) - 1:
            rtr = peak_list[ii + 1].rt
        # adjust weighting for neighbours
        rtclose = min(abs(rt - rtl), abs(rt - rtr))
        Dclose = rtclose / math.sqrt(-2.0 * math.log(_PEN))

        if Dclose > 0:
            Dclose = min(D, Dclose)
        else:
            Dclose = D

        # Get bounds
        rtlow = rt - cutoff
        if rtlow < minrt:
            rtlow = minrt
        lowii = data.get_index_at_time(rtlow)

        rtup = rt + cutoff
        if rtup > maxrt:
            rtup = maxrt
        upii = data.get_index_at_time(rtup)

        # Get sub matrix of scans in bounds
        submat = datamat[lowii:upii + 1]
        submat = numpy.array(submat, dtype='d')
        subrts = datatimes[lowii:upii + 1]
        subrts = numpy.array(subrts, dtype='d')

        submat_SS = numpy.sum(submat**2, axis=1)

        # transpose spec (as matrix) for dot product
        spec = numpy.transpose([spec])
        # dot product on rows

        toparr = numpy.dot(submat, spec)
        botarr = numpy.sqrt(spec_SS * submat_SS)

        # convert back to 1-D array
        toparr = toparr.ravel()

        # scaled dot product of each scan
        cosarr = toparr / botarr

        # RT weight of each scan
        rtimearr = numpy.exp(-((subrts - rt) / float(Dclose))**2 / 2.0)

        # weighted scores
        scorearr = cosarr * rtimearr

        # index of best score
        best_ii = scorearr.argmax()

        # Add new peak
        bestrt = subrts[best_ii]
        bestspec = submat[best_ii].tolist()
        ms = MassSpectrum(mass_list, bestspec)
        new_peak_list.append(Peak(bestrt, ms, minutes))

    return new_peak_list
Пример #5
0
def composite_peak(peak_list, minutes=False):

    """
    @summary: Create a peak that consists of a composite spectrum from all
        spectra in the list of peaks

    @param peak_list: A list of peak objects
    @type peak_list: ListType
    @param minutes: Return retention time as minutes
    @type minutes: BooleanType

    @return: Peak Object with combined mass spectra of 'peak_list'
    @type: pyms.Peak.Class.Peak

    @author: Andrew Isaac
    @author: David Kainer
    """

    first = True
    count = 0
    avg_rt = 0
    new_ms = None

    # DK: first mark peaks in the list that are outliers by RT, but only if there are more than 3 peaks in the list
    rts = []
    if len(peak_list) > 3:
        for peak in peak_list:
            rts.append( peak.get_rt() )

        is_outlier = median_outliers(rts)

        for i, val in enumerate(is_outlier):
            if val:
                peak_list[i].isoutlier = True


    # DK: the average RT and average mass spec for the compo peak is now calculated from peaks that are NOT outliers.
    # This should improve the ability to order peaks and figure out badly aligned entries

    for peak in peak_list:
        if peak is not None and peak.check_outlier() == False:
            ms = peak.get_mass_spectrum()
            spec = numpy.array(ms.mass_spec, dtype='d')
            if first:
                avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d')
                mass_list = ms.mass_list
                first = False
            # scale all intensities to [0,100]
            max_spec = max(spec)/100.0
            if max_spec > 0:
                spec = spec/max_spec
            else:
                spec = spec*0
            avg_rt += peak.get_rt()
            avg_spec += spec
            count += 1
    if count > 0:
        avg_rt = avg_rt/count
        if minutes == True:
            avg_rt = avg_rt/60.0
        avg_spec = avg_spec/count
        avg_spec = avg_spec.tolist()  # list more compact than ndarray
        new_ms = MassSpectrum(mass_list, avg_spec)
        return Peak(avg_rt, new_ms, minutes)
    else:
        return None