Пример #1
0
    def from_mz_int_pairs(cls, mz_int_pairs):
        """
		Construct a MassSpectrum from a list of (m/z, intensity) tuples.

		:param mz_int_pairs:
		:type mz_int_pairs: list of tuple
		"""

        err_msg = "`mz_int_pairs` must be a list of (m/z, intensity) tuples."

        if (not is_sequence(mz_int_pairs) or not is_sequence(mz_int_pairs[0])
                # or not isinstance(mz_int_pairs[0][0], Number)
            ):
            raise TypeError(err_msg)

        if not len(mz_int_pairs[0]) == 2:
            raise ValueError(err_msg)

        mass_list = []
        intensity_list = []
        for mass, intensity in mz_int_pairs:
            mass_list.append(float(mass))
            intensity_list.append(float(intensity))

        return cls(mass_list, intensity_list)
Пример #2
0
def rmsd(list1: Union[Sequence, numpy.ndarray],
         list2: Union[Sequence, numpy.ndarray]) -> float:
    """
    Calculates RMSD for the 2 lists

    :param list1: First data set
    :type list1: list, tuple, or numpy.core.ndarray
    :param list2: Second data set
    :type list2: list, tuple, or numpy.core.ndarray

    :return: RMSD value
    :rtype: float

    :author: Qiao Wang
    :author: Andrew Isaac
    :author: Vladimir Likic
    """

    if not is_sequence(list1):
        raise TypeError("'list1' must be a Sequence")

    if not is_sequence(list2):
        raise TypeError("'list2' must be a Sequence")

    total = 0.0
    for i in range(len(list1)):
        total = total + (list1[i] - list2[i])**2
    _rmsd = math.sqrt(total / len(list1))
    return _rmsd
Пример #3
0
def exprl2alignment(expr_list: List[Experiment]) -> List[Alignment]:
    """
	Converts experiments into alignments

	:param expr_list: The list of experiments to be converted into an alignment objects
	:type expr_list: list of :class:`pyms.Experiment.Experiment`

	:return: A list of alignment objects for the experiments
	:rtype: list of :class:`pyms.DPA.Alignment.Alignment`

	:author: Vladimir Likic
	"""

    if not is_sequence(expr_list):
        raise TypeError("'expr_list' must be a Sequence")

    alignments = []

    for item in expr_list:
        if not isinstance(item, Experiment):
            raise TypeError("list items must be 'Experiment' instances")

        alignments.append(Alignment(item))

    return alignments
Пример #4
0
	def __init__(
			self,
			time_list: Sequence[float],
			mass_list: Sequence[float],
			intensity_array: Union[Sequence[Sequence[float]], numpy.ndarray],
			):
		# sanity check
		if not is_sequence_of(time_list, _number_types):
			raise TypeError("'time_list' must be a Sequence of numbers")

		if not is_sequence_of(mass_list, _number_types):
			raise TypeError("'mass_list' must be a Sequence of numbers")

		if not is_sequence(intensity_array) or not is_sequence_of(intensity_array[0], _number_types):
			raise TypeError("'intensity_array' must be a Sequence, of Sequences, of numbers")

		if not isinstance(intensity_array, numpy.ndarray):
			intensity_array = numpy.array(intensity_array)

		if not len(time_list) == len(intensity_array):
			raise ValueError("'time_list' is not the same length as 'intensity_array'")

		if not len(mass_list) == len(intensity_array[0]):
			raise ValueError("'mass_list' is not the same size as 'intensity_array'")

		self._time_list = list(time_list)
		self._mass_list = list(mass_list)

		self._intensity_array = intensity_array

		self._min_rt = min(time_list)
		self._max_rt = max(time_list)

		self._min_mass = min(mass_list)
		self._max_mass = max(mass_list)
Пример #5
0
    def __init__(self, ia, time_list, mass=None):
        """
		:param ia: Ion chromatogram intensity values
		:type ia: numpy.array
		:param time_list: A list of ion chromatogram retention times
		:type time_list: list
		:param mass: Mass of ion chromatogram (Null if TIC)
		:type mass: int or float

		:author: Lewis Lee, Vladimir Likic
		"""

        if not isinstance(ia, numpy.ndarray):
            raise TypeError("'ia' must be a numpy array")

        if not is_sequence(time_list) or not all(
                isinstance(time, Number) for time in time_list):
            raise TypeError("'time_list' must be a list of numbers")

        if len(ia) != len(time_list):
            raise ValueError("Intensity array and time list differ in length")

        if mass and not isinstance(mass, Number):
            raise TypeError("'mass' must be a number")

        self._intensity_array = ia
        self._time_list = time_list
        self._mass = mass
        self._time_step = self.__calc_time_step()
        self._min_rt = min(time_list)
        self._max_rt = max(time_list)
Пример #6
0
def median_bounds(im: BaseIntensityMatrix,
                  peak: Peak,
                  shared: bool = True) -> Tuple[float, float]:
    """
	Calculates the median of the left and right bounds found for each apexing peak mass.

	:param im: The originating IntensityMatrix object.
	:param peak:
	:param shared: Include shared ions shared with neighbouring peak.

	:return: Median left and right boundary offset in points.

	:authors: Andrew Isaac, Dominic Davis-Foster
	"""

    if not isinstance(im, BaseIntensityMatrix):
        raise TypeError("'im' must be an IntensityMatrix object")
    if not isinstance(peak, Peak):
        raise TypeError("'peak' must be a Peak object")
    if not isinstance(shared, bool):
        raise TypeError("'shared' must be a boolean")

    mat = im.intensity_array
    ms = peak.mass_spectrum

    rt = peak.rt
    apex = im.get_index_at_time(rt)

    # check if RT based index is similar to stored index
    if is_sequence(peak.bounds):
        bounds = cast(Sequence, peak.bounds)
        if apex - 1 < bounds[1] < apex + 1:
            apex = bounds[1]

    # get peak masses with non-zero intensity
    mass_ii = [ii for ii in range(len(ms.mass_list)) if ms.mass_spec[ii] > 0]

    # get stats on boundaries
    left_list = []
    right_list = []

    for ii in mass_ii:
        # get ion chromatogram as list
        ia = [mat[scan][ii] for scan in range(len(mat))]
        area, left, right, l_share, r_share = ion_area(ia, apex)
        if shared or not l_share:
            left_list.append(left)
        if shared or not r_share:
            right_list.append(right)

    # return medians
    # NB if shared=True, lists maybe empty
    l_med = 0.0
    r_med = 0.0
    if len(left_list) > 0:
        l_med = median(left_list)
    if len(right_list) > 0:
        r_med = median(right_list)

    return l_med, r_med
Пример #7
0
def load_peaks(file_name: Union[str, pathlib.Path]) -> Peak:
    """
    Loads the peak_list stored with 'store_peaks'

    :param file_name: File name of peak list
    :type file_name: str or os.PathLike

    :return: The list of Peak objects
    :rtype: :class:`list` of :class:`pyms.Peak.Class.Peak`

    :author: Andrew Isaac
    :author: Dominic Davis-Foster (pathlib support)
    """

    if not is_path(file_name):
        raise TypeError("'file_name' must be a string or a PathLike object")

    file_name = prepare_filepath(file_name, mkdirs=False)

    fp = file_name.open('rb')
    peak_list = pickle.load(fp)
    fp.close()

    if not is_sequence(peak_list):
        raise IOError("The selected file is not a List")
    if not len(peak_list) > 0 or not isinstance(peak_list[0], Peak):
        raise IOError("The selected file is not a list of Peak objects")

    return peak_list
Пример #8
0
def MAD(v: Union[Sequence, numpy.ndarray]) -> float:
    """
    Median absolute deviation

    :param v: List of values to calculate the median absolute deviation of
    :type v: list, tuple, or numpy.core.ndarray

    :return: median absolute deviation
    :rtype: float

    :author: Vladimir Likic
    """

    if not is_sequence(v):
        raise TypeError("'v' must be a Sequence")

    m = median(v)
    m_list = []

    for xi in v:
        d = math.fabs(xi - m)
        m_list.append(d)

    mad = median(m_list) / 0.6745

    return mad
Пример #9
0
	def __init__(self, time_list, mass_list, intensity_array):
		"""
		Initialize the IntensityMatrix data
		"""

		# sanity check
		if not is_sequence_of(time_list, Number):
			raise TypeError("'time_list' must be a Sequence of Numbers")

		if not is_sequence_of(mass_list, Number):
			raise TypeError("'mass_list' must be a Sequence of Numbers")

		if not is_sequence(intensity_array) or not is_sequence_of(intensity_array[0], Number):
			raise TypeError("'intensity_array' must be a Sequence, of Sequences, of Numbers")

		if not isinstance(intensity_array, numpy.ndarray):
			intensity_array = numpy.array(intensity_array)

		if not len(time_list) == len(intensity_array):
			raise ValueError("'time_list' is not the same length as 'intensity_array'")

		if not len(mass_list) == len(intensity_array[0]):
			raise ValueError("'mass_list' is not the same size as 'intensity_array'")

		self._time_list = time_list
		self._mass_list = mass_list

		self._intensity_array = intensity_array

		self._min_rt = min(time_list)
		self._max_rt = max(time_list)

		self._min_mass = min(mass_list)
		self._max_mass = max(mass_list)

		# Try to include parallelism.
		try:
			from mpi4py import MPI
			comm = MPI.COMM_WORLD
			num_ranks = comm.Get_size()
			rank = comm.Get_rank()
			M, N = len(intensity_array), len(intensity_array[0])
			lrr = (rank * M / num_ranks, (rank + 1) * M / num_ranks)
			lcr = (rank * N / num_ranks, (rank + 1) * N / num_ranks)
			m, n = (lrr[1] - lrr[0], lcr[1] - lcr[0])
			self.comm = comm
			self.num_ranks = num_ranks
			self.rank = rank
			self.M = M
			self.N = N
			self.local_row_range = lrr
			self.local_col_range = lcr
			self.m = m
			self.n = n

		# If we can't import mpi4py then continue in serial.
		except ModuleNotFoundError:
			pass
Пример #10
0
    def sele_rt_range(self, rt_range: Sequence[str]):
        """
		Discards all peaks which have the retention time outside the specified range.

		:param rt_range: Min, max retention time given as a sequence ``[rt_min, rt_max]``.
		"""

        if not is_sequence(rt_range):
            raise TypeError("'rt_range' must be a Sequence")

        peaks_sele = sele_peaks_by_rt(self._peak_list, rt_range)
        self._peak_list = peaks_sele
Пример #11
0
    def sele_rt_range(self, rt_range):
        """
		Discards all peaks which have the retention time outside the specified range

		:param rt_range: Min, max retention time given as a list [rt_min, rt_max]
		:type rt_range: ~collections.abc.Sequence
		"""

        if not is_sequence(rt_range):
            raise TypeError("'rt_range' must be a Sequence")

        peaks_sele = sele_peaks_by_rt(self._peak_list, rt_range)
        self._peak_list = peaks_sele
Пример #12
0
    def intensity_array(self, ia: Union[Sequence, numpy.ndarray]):
        """
		Sets the value for the intensity array.

		:param ia: An array of new intensity values

		:author: Vladimir Likic
		"""

        if not is_sequence(ia):
            raise TypeError("'intensity_array' must be a Sequence")

        if not isinstance(ia, numpy.ndarray):
            ia = numpy.array(ia)

        self._intensity_array = ia
Пример #13
0
def sele_peaks_by_rt(peaks: Union[Sequence, numpy.ndarray],
                     rt_range: Sequence[str]) -> Peak:
    """
    Selects peaks from a retention time range

    :param peaks: A list of peak objects
    :type peaks: list or tuple or numpy.ndarray
    :param rt_range: A list of two time strings, specifying lower and
           upper retention times
    :type rt_range: ~collections.abc.Sequence[str]

    :return: A list of peak objects
    :rtype: :class:`list` of :class:`pyms.Peak.Class.Peak`
    """

    if not is_peak_list(peaks):
        raise TypeError("'peaks' must be a Sequence of Peak objects")

    if not is_sequence(rt_range):
        raise TypeError("'rt_range' must be a Sequence")
    else:
        if len(rt_range) != 2:
            raise ValueError("'rt_range' must have exactly two elements")

        if not isinstance(rt_range[0], str) or not isinstance(
                rt_range[1], str):
            raise TypeError(
                "lower/upper retention time limits must be strings")

    rt_lo = time_str_secs(rt_range[0])
    rt_hi = time_str_secs(rt_range[1])

    if rt_lo >= rt_hi:
        raise ValueError("lower retention time limit must be less than upper")

    peaks_sele = []

    for peak in peaks:
        rt = peak.rt
        if rt_lo < rt < rt_hi:
            peaks_sele.append(peak)

    # print("%d peaks selected" % (len(peaks_sele)))

    return peaks_sele
Пример #14
0
    def bounds(self, value: Sequence[int]):
        """
		Sets peak boundaries in points.

		:param value: A 3-element tuple containing the left, apex, and right
			peak boundaries in points. Left and right are offsets.
		"""

        if not is_sequence(value):
            raise TypeError("'Peak.bounds' must be a Sequence")

        if len(value) != 3:
            raise ValueError("'Peak.bounds' must have exactly 3 elements")

        for index, item in enumerate(value):
            if not isinstance(item, int):
                raise TypeError(
                    f"'Peak.bounds' element #{index} must be an integer")

        self._pt_bounds = cast(Tuple[int, int, int], tuple(value[:3]))
Пример #15
0
    def bounds(self, value):
        """
		Sets peak boundaries in points

		:param value: A list containing left, apex, and right
			peak boundaries in points, left and right are offsets
		:type value: list
		"""

        if not is_sequence(value):
            raise TypeError("'Peak.bounds' must be a list")

        if len(value) != 3:
            raise ValueError("'Peak.bounds' must have exactly 3 elements")

        for index, item in enumerate(value):
            if not isinstance(item, int):
                raise TypeError(
                    f"'Peak.bounds' element #{index} must be an integer")

        self._pt_bounds = value