def test_read_expr_list(filtered_peak_list, datadir, expr_filename): expr_list = read_expr_list(datadir / "read_expr_list.txt") assert isinstance(expr_list, list) assert is_sequence_of(expr_list, Experiment) expr = expr_list[0] assert isinstance(expr.expr_code, str) assert expr.expr_code == "ELEY_1_SUBTRACT" assert isinstance(expr.peak_list, list) assert is_sequence_of(expr.peak_list, Peak) assert expr.peak_list == filtered_peak_list expr.sele_rt_range(["6.5m", "21m"]) # Errors for obj in [*test_numbers, test_dict, *test_lists]: with pytest.raises(TypeError): read_expr_list(obj) with pytest.raises(IOError): read_expr_list("non-existent.expr") with pytest.raises((IOError, UnicodeDecodeError)): read_expr_list("not-an-experiment.expr") with pytest.raises(IOError): read_expr_list("__init__.py")
def test_read_expr_list(filtered_peak_list, pyms_datadir, expr_filename, tmp_pathplus): (tmp_pathplus / "read_expr_list.txt").write_lines([str(expr_filename)] * 5) expr_list = read_expr_list(tmp_pathplus / "read_expr_list.txt") assert isinstance(expr_list, list) assert is_sequence_of(expr_list, Experiment) expr = expr_list[0] assert isinstance(expr.expr_code, str) assert expr.expr_code == "ELEY_1_SUBTRACT" assert isinstance(expr.peak_list, list) assert is_sequence_of(expr.peak_list, Peak) assert expr.peak_list == filtered_peak_list expr.sele_rt_range(["6.5m", "21m"]) # Errors for obj in [*test_numbers, test_dict, *test_lists]: with pytest.raises(TypeError): read_expr_list(obj) # type: ignore with pytest.raises( FileNotFoundError, match="No such file or directory: .*non-existent.expr.*"): read_expr_list("non-existent.expr") with pytest.raises( FileNotFoundError, match="No such file or directory: 'not-an-experiment.expr'"): read_expr_list("not-an-experiment.expr") with pytest.raises(FileNotFoundError, match="No such file or directory: .*__init__.py.*"): read_expr_list("__init__.py")
def __init__(self, intensity_list: Union[Sequence[float], numpy.ndarray], time_list: Sequence[float], mass: Optional[float] = None): if not is_sequence_of(intensity_list, _number_types): raise TypeError("'intensity_list' must be a Sequence of numbers") if not is_sequence_of(time_list, _number_types): raise TypeError("'time_list' must be a Sequence of numbers") if len(intensity_list) != len(time_list): raise ValueError( "'intensity_list' and 'time_list' differ in length") if mass is not None and not is_number(mass): raise TypeError("'mass' must be a number or None") if not isinstance(intensity_list, numpy.ndarray): intensity_list = numpy.array(intensity_list) self._intensity_array = intensity_list self._time_list = list(time_list) self._mass: Optional[float] = mass self._time_step = self._calc_time_step() self._min_rt = min(time_list) self._max_rt = max(time_list)
def __init__( self, time_list: Sequence[float], mass_list: Sequence[float], intensity_array: Union[Sequence[Sequence[float]], numpy.ndarray], ): # sanity check if not is_sequence_of(time_list, _number_types): raise TypeError("'time_list' must be a Sequence of numbers") if not is_sequence_of(mass_list, _number_types): raise TypeError("'mass_list' must be a Sequence of numbers") if not is_sequence(intensity_array) or not is_sequence_of(intensity_array[0], _number_types): raise TypeError("'intensity_array' must be a Sequence, of Sequences, of numbers") if not isinstance(intensity_array, numpy.ndarray): intensity_array = numpy.array(intensity_array) if not len(time_list) == len(intensity_array): raise ValueError("'time_list' is not the same length as 'intensity_array'") if not len(mass_list) == len(intensity_array[0]): raise ValueError("'mass_list' is not the same size as 'intensity_array'") self._time_list = list(time_list) self._mass_list = list(mass_list) self._intensity_array = intensity_array self._min_rt = min(time_list) self._max_rt = max(time_list) self._min_mass = min(mass_list) self._max_mass = max(mass_list)
def __init__(self, time_list, mass_list, intensity_array): """ Initialize the IntensityMatrix data """ # sanity check if not is_sequence_of(time_list, Number): raise TypeError("'time_list' must be a Sequence of Numbers") if not is_sequence_of(mass_list, Number): raise TypeError("'mass_list' must be a Sequence of Numbers") if not is_sequence(intensity_array) or not is_sequence_of(intensity_array[0], Number): raise TypeError("'intensity_array' must be a Sequence, of Sequences, of Numbers") if not isinstance(intensity_array, numpy.ndarray): intensity_array = numpy.array(intensity_array) if not len(time_list) == len(intensity_array): raise ValueError("'time_list' is not the same length as 'intensity_array'") if not len(mass_list) == len(intensity_array[0]): raise ValueError("'mass_list' is not the same size as 'intensity_array'") self._time_list = time_list self._mass_list = mass_list self._intensity_array = intensity_array self._min_rt = min(time_list) self._max_rt = max(time_list) self._min_mass = min(mass_list) self._max_mass = max(mass_list) # Try to include parallelism. try: from mpi4py import MPI comm = MPI.COMM_WORLD num_ranks = comm.Get_size() rank = comm.Get_rank() M, N = len(intensity_array), len(intensity_array[0]) lrr = (rank * M / num_ranks, (rank + 1) * M / num_ranks) lcr = (rank * N / num_ranks, (rank + 1) * N / num_ranks) m, n = (lrr[1] - lrr[0], lcr[1] - lcr[0]) self.comm = comm self.num_ranks = num_ranks self.rank = rank self.M = M self.N = N self.local_row_range = lrr self.local_col_range = lcr self.m = m self.n = n # If we can't import mpi4py then continue in serial. except ModuleNotFoundError: pass
def __init__(self, time_list: Sequence[float], scan_list: Sequence[Scan]): if not is_sequence_of(time_list, _number_types): raise TypeError("'time_list' must be a Sequence of numbers") if not is_sequence_of(scan_list, Scan): raise TypeError("'scan_list' must be a Sequence of Scan objects") self._time_list = list(time_list) self._scan_list = list(scan_list) self._set_time() self._set_min_max_mass() self._calc_tic()
def test_load_expr(filtered_peak_list, pyms_datadir, expr_filename): expr = load_expr(expr_filename) assert isinstance(expr, Experiment) assert isinstance(expr.expr_code, str) assert expr.expr_code == "ELEY_1_SUBTRACT" assert isinstance(expr.peak_list, list) assert is_sequence_of(expr.peak_list, Peak) assert expr.peak_list == filtered_peak_list expr.sele_rt_range(["6.5m", "21m"]) # Errors for obj in [*test_numbers, test_dict, *test_lists]: with pytest.raises(TypeError): load_expr(obj) # type: ignore with pytest.raises( FileNotFoundError, match="No such file or directory: .*non-existent.expr.*"): load_expr(pyms_datadir / "non-existent.expr") with pytest.raises(TypeError, match="The loaded file is not an experiment file"): load_expr(pyms_datadir / "not-an-experiment.expr")
def __init__(self, time_list, scan_list): """ Initialize the GC-MS data """ if not is_sequence_of(time_list, Number): raise TypeError("'time_list' must be a Sequence of numbers") if not is_sequence_of(scan_list, Scan): raise TypeError("'scan_list' must be a Sequence of Scan objects") self._time_list = time_list self._scan_list = scan_list self.__set_time() self.__set_min_max_mass() self.__calc_tic()
def is_peak_list(peaks: Any) -> bool: """ Returns whether ``peaks`` is a valid peak list. :author: Dominic Davis-Foster """ return is_sequence_of(peaks, Peak)
def get_maxima_indices(ion_intensities: Union[Sequence, numpy.nd.array], points: int = 3) -> List: """ Find local maxima. :param ion_intensities: A list of intensities for a single ion :type ion_intensities: ~collections.abc.Sequence or numpy.ndarray :param points: Number of scans over which to consider a maxima to be a peak. Default ``3`` :type points: int, optional :return: A list of scan indices :rtype: list :author: Andrew Isaac, Dominic Davis-Foster (type assertions) """ if not is_sequence_of(ion_intensities, Number): raise TypeError("'ion_intensities' must be a List of Numbers") if not isinstance(points, int): raise TypeError("'points' must be an integer") # find peak inflection points # use a 'points' point window # for a plateau after a rise, need to check if it is the left edge of # a peak peak_point = [] edge = -1 points = int(points) half = int(points / 2) points = 2 * half + 1 # ensure odd number of points for index in range(len(ion_intensities) - points + 1): left = ion_intensities[index:index + half] mid = ion_intensities[index + half] right = ion_intensities[index + half + 1:index + points] # max in middle if mid > max(left) and mid > max(right): peak_point.append(index + half) edge = -1 # ignore previous rising edge # flat from rise (left of peak?) if mid > max(left) and mid == max(right): edge = index + half # ignore previous rising edge, update latest # fall from flat if mid == max(left) and mid > max(right): if edge > -1: centre = int((edge + index + half) / 2) # mid point peak_point.append(centre) edge = -1 return peak_point
def is_peak_list(peaks: List) -> bool: """ Returns True if 'peaks' is a valid peak list, False otherwise :param peaks: A list of peak objects :type peaks: list :return: A boolean indicator :rtype: bool :author: Dominic Davis-Foster """ return is_sequence_of(peaks, Peak)
def __init__(self, alignments: List[Alignment], D: float, gap: float): if not is_sequence_of(alignments, Alignment): raise TypeError( "'alignments' must be a Sequence of Alignment objects") if not isinstance(D, float): raise TypeError("'D' must be a float") if not isinstance(gap, float): raise TypeError("'gap' must be a float") self.alignments = alignments self.D = D self.gap = gap self._sim_matrix() self._dist_matrix() self._guide_tree()
def __init__(self, alignments, D, gap): """ Models pairwise alignment of alignments """ if not is_sequence_of(alignments, Alignment): raise TypeError("'alignments' must be a Sequence of Alignment objects") if not isinstance(D, float): raise TypeError("'D' must be a float") if not isinstance(gap, float): raise TypeError("'gap' must be a float") self.alignments = alignments self.D = D self.gap = gap self._sim_matrix() self._dist_matrix() self._guide_tree()
def test_load_expr(filtered_peak_list, datadir, expr_filename): expr = load_expr(expr_filename) assert isinstance(expr, Experiment) assert isinstance(expr.expr_code, str) assert expr.expr_code == "ELEY_1_SUBTRACT" assert isinstance(expr.peak_list, list) assert is_sequence_of(expr.peak_list, Peak) assert expr.peak_list == filtered_peak_list expr.sele_rt_range(["6.5m", "21m"]) # Errors for obj in [*test_numbers, test_dict, *test_lists]: with pytest.raises(TypeError): load_expr(obj) with pytest.raises(IOError): load_expr(datadir / "non-existent.expr") with pytest.raises(IOError): load_expr(datadir / "not-an-experiment.expr")
def test_peak_list(expr, filtered_peak_list): assert isinstance(expr.peak_list, list) assert is_sequence_of(filtered_peak_list, Peak) assert expr.peak_list == filtered_peak_list
def write_common_ion_csv(self, area_file_name: Union[str, pathlib.Path], top_ion_list: List, minutes: bool = True): """ Writes the alignment to CSV files This function writes two files: one containing the alignment of peak retention times and the other containing the alignment of peak areas. :param area_file_name: The name for the areas alignment file :type area_file_name: str or os.PathLike :param top_ion_list: A list of the highest intensity common ion along the aligned peaks :type top_ion_list: ~collections.abc.Sequence :param minutes: An optional indicator whether to save retention times in minutes. If False, retention time will be saved in seconds :type minutes: bool, optional :author: Woon Wai Keen :author: Andrew Isaac :author: Sean O'Callaghan :author: Vladimir Likic :author: Dominic Davis-Foster (pathlib support) """ # TODO: minutes currently does nothing if not is_path(area_file_name): raise TypeError( "'area_file_name' must be a string or a PathLike object") if not is_sequence_of(top_ion_list, Number): raise TypeError("'top_ion_list' must be a Sequence of Numbers") area_file_name = prepare_filepath(area_file_name) with area_file_name.open("w") as fp: # create header header = ['"UID"', '"RTavg"', '"Quant Ion"'] for item in self.expr_code: header.append(f'"{item}"') # write headers fp.write(",".join(header) + "\n") rtsums = [] rtcounts = [] # The following two arrays will become list of lists # such that: # areas = [ [align1_peak1, align2_peak1, .....,alignn_peak1] # [align1_peak2, ................................] # ............................................. # [align1_peakm,....................,alignn_peakm] ] areas: List[List] = [] new_peak_lists: List[List[Peak]] = [] for peak_list in self.peakpos: index = 0 for peak in peak_list: # one the first iteration, populate the lists if len(areas) < len(peak_list): areas.append([]) new_peak_lists.append([]) rtsums.append(0) rtcounts.append(0) if peak is not None: rt = peak.rt # get the area of the common ion for the peak # an area of 'na' shows that while the peak was # aligned, the common ion was not present area = peak.get_ion_area(top_ion_list[index]) areas[index].append(area) new_peak_lists[index].append(peak) # The following code to the else statement is # just for calculating the average rt rtsums[index] += rt rtcounts[index] += 1 else: areas[index].append(None) index += 1 out_strings = [] index = 0 # now write the strings for the file for area_list in areas: # write initial info: # peak unique id, peak average rt compo_peak = composite_peak(new_peak_lists[index]) peak_UID = compo_peak.UID peak_UID_string = f'"{peak_UID}"' rt_avg = rtsums[index] / rtcounts[index] out_strings.append( f"{peak_UID_string},{rt_avg / 60:.3f},{top_ion_list[index]:f}" ) for area in area_list: if area is not None: out_strings[index] += f",{area:.4f}" else: out_strings[index] += ",NA" index += 1 # now write the file # print("length of areas[0]", len(areas[0])) # print("length of areas", len(areas)) # print("length of out_strings", len(out_strings)) for row in out_strings: fp.write(row + "\n")
def get_maxima_indices(ion_intensities: Union[Sequence, numpy.ndarray], points: int = 3) -> List[int]: """ Returns the scan indices for the apexes of the ion. :param ion_intensities: A list of intensities for a single ion. :param points: Number of scans over which to consider a maxima to be a peak. :author: Andrew Isaac, Dominic Davis-Foster (type assertions) **Example:** .. code-block:: python >>> # A trivial set of data with two clear peaks >>> data = [1, 2, 3, 4, 5, 4, 3, 2, 1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1] >>> get_maxima_indices(data) [4, 13] >>> # Wider window (more points) >>> get_maxima_indices(data, points=10) [13] """ if not is_sequence_of(ion_intensities, _number_types): raise TypeError("'ion_intensities' must be a sequence of numbers") if not isinstance(points, int): raise TypeError("'points' must be an integer") # find peak inflection points # use a 'points' point window # for a plateau after a rise, need to check if it is the left edge of a peak peak_point = [] edge = -1 points = int(points) half = int(points / 2) points = 2 * half + 1 # ensure odd number of points for index in range(len(ion_intensities) - points + 1): left = ion_intensities[index:index + half] mid = ion_intensities[index + half] right = ion_intensities[index + half + 1:index + points] # print(left, mid, right) if mid > max(left) and mid > max(right): # the max value is in the middle peak_point.append(index + half) edge = -1 # ignore previous rising edge elif mid > max(left) and mid == max(right): # start of plateau following rise (left of peak?) edge = index + half # ignore previous rising edge, update latest elif mid == max(left) and mid > max(right): # start of fall from plateau if edge > -1: centre = int((edge + index + half) / 2) # mid point peak_point.append(centre) edge = -1 return peak_point