def test_stack(): rng = np.random.default_rng(0) points_raw = rng.random((10, 4)) * 100 lines_raw = rng.random((10, 7)) * 500 points = pept.PointData(points_raw, sample_size=4) lines = pept.LineData(lines_raw, sample_size=4) # Test it returns points back p = Stack().fit(points) assert p is points, "Stack did not return a single PointData back" # Test it returns lines back ls = Stack().fit(lines) assert ls is lines, "Stack did not return a single LineData back" # Test it concatenates a list of two points points2 = Stack().fit([points, points]) assert np.all(points2.points[:10] == points.points[:10]) # Test it concatenates a list of two lines lines2 = Stack().fit([lines, lines]) assert np.all(lines2.lines[:10] == lines.lines[:10]) # Test list[list] flattening assert Stack().fit([[1, 2, 3]]) == [1, 2, 3], "List flattening wrong"
def test_good_data(self): samples = pept.LineData(self.good_data, sample_size = 200, overlap = 10, verbose = False) # Test private attributes assert samples._index == 0, "_index was not set to 0" assert samples._overlap == 10, "_overlap was not set correctly" assert samples._sample_size == 200, "_sample_size was not set correctly" assert np.array_equal(samples._line_data, self.good_data) == True assert samples._line_data.flags['C_CONTIGUOUS'] == True, "_line_data is not C-contiguous" assert samples._number_of_lines == len(samples._line_data), "_number_of_lines was not set correctly" # Test properties assert np.array_equal(samples.line_data, samples._line_data) == True assert samples.sample_size == samples._sample_size assert samples.overlap == samples._overlap assert samples.number_of_samples == 2, "number of samples was not calculated correctly" assert samples.number_of_lines == samples._number_of_lines # Test property setters samples.sample_size = 300 assert samples.sample_size == 300 assert samples._index == 0 samples.sample_size = 200 samples.overlap = 50 assert samples.overlap == 50 assert samples._index == 0 samples.overlap = 10
def test_error_overlap(self): samples = pept.LineData(self.good_data, sample_size=200, overlap=100, verbose=False) # Should not be able to set sample_size <= overlap samples.overlap = 200
def test_minpoints(): rng = np.random.default_rng(0) lines_raw = rng.random((10, 7)) * 100 lines = pept.LineData(lines_raw, sample_size=4) max_distance = 1000 cutoffs = np.array([0, 100, 0, 100, 0, 100], dtype=float) minpoints = pept.tracking.Minpoints(3, max_distance, cutoffs) print(minpoints) # Test `fit_sample` s1 = minpoints.fit_sample(lines[0]).points s2 = pept.utilities.find_minpoints(lines[0].lines, 3, max_distance, cutoffs) assert (s1 == s2).all(), "Cutpoints not found correctly" # Test `fit` traversed = minpoints.fit(lines) manual = [ pept.utilities.find_minpoints(ln.lines, 3, max_distance, cutoffs) for ln in lines ] assert all([(t.points == m).all() for t, m in zip(traversed, manual)]), \ "Traversed list of cutpoints not found correctly"
def test_fpi(): rng = np.random.default_rng(0) lines_raw = rng.random((1000, 7)) * 100 lines = pept.LineData(lines_raw, sample_size=200) ex = "sequential" voxels = Voxelize((50, 50, 50)).fit(lines, ex) positions = FPI().fit(voxels, ex) print(positions)
def lines_trace( lines, width=2.0, color=None, opacity=0.6, colorbar=True, colorbar_col=0, colorscale="Magma", colorbar_title=None, ): '''Static method for creating a Plotly trace of lines. See `PlotlyGrapher.add_lines` for the full documentation. ''' if not isinstance(lines, pept.LineData): lines = pept.LineData(lines) marker = dict( width=width, color=color, ) if colorbar: if color is None: marker['color'] = [] marker.update(colorscale=colorscale) if colorbar_title is not None: marker.update(colorbar=dict(title=colorbar_title)) coords_x = np.full(3 * len(lines.lines), np.nan) coords_x[0::3] = lines.lines[:, 1] coords_x[1::3] = lines.lines[:, 4] coords_y = np.full(3 * len(lines.lines), np.nan) coords_y[0::3] = lines.lines[:, 2] coords_y[1::3] = lines.lines[:, 5] coords_z = np.full(3 * len(lines.lines), np.nan) coords_z[0::3] = lines.lines[:, 3] coords_z[1::3] = lines.lines[:, 6] if colorbar and color is None: if isinstance(colorbar_col, str): color_data = lines[colorbar_col] else: color_data = lines.lines[:, colorbar_col] marker['color'] = np.repeat(color_data, 3) return go.Scatter3d(x=coords_x, y=coords_y, z=coords_z, mode='lines', opacity=opacity, line=marker)
def test_birmingham_method(): rng = np.random.default_rng(0) lines_raw = rng.random((5000, 7)) * 100 lines = pept.LineData(lines_raw, sample_size=200) location = BirminghamMethod(0.5, get_used=True).fit_sample(lines[0]) print(location) locations = BirminghamMethod(0.5).fit(lines, "sequential") print(locations)
def test_lines_centroids(): rng = np.random.default_rng(0) lines_raw = rng.random((1000, 7)) * 100 lines = pept.LineData(lines_raw, sample_size=200) LinesCentroids().fit_sample(lines) ex = "sequential" LinesCentroids().fit(lines, ex) LinesCentroids().fit(lines[0:0], ex)
def test_voxelizer(): rng = np.random.default_rng(0) lines_raw = rng.random((1000, 7)) * 100 lines = pept.LineData(lines_raw, sample_size=200) vox = Voxelize((20, 20, 20)).fit_sample(lines) assert "_lines" in vox.attrs ex = "sequential" LinesCentroids().fit(lines, ex) LinesCentroids().fit(lines[0:0], ex)
def test_hdbscan(): rng = np.random.default_rng(0) lines_raw = rng.random((5000, 7)) * 100 lines = pept.LineData(lines_raw, sample_size=200) ex = "sequential" cutpoints = Cutpoints(0.5).fit(lines, ex) clustered = HDBSCAN(0.15, 2).fit(cutpoints, ex) print(clustered) clustered2 = HDBSCAN(0.15, 2).fit(clustered, ex) print(clustered2)
def test_peptml(): rng = np.random.default_rng(0) lines_raw = rng.random((5000, 7)) * 100 lines = pept.LineData(lines_raw, sample_size=200) ex = "sequential" cutpoints = Cutpoints(0.5).fit(lines, ex) clustered = HDBSCAN(0.15, 2).fit(cutpoints, ex) centres = (SplitLabels() + Centroids() + Stack(30, 29)).fit(clustered, ex) clustered2 = HDBSCAN(0.6, 2).fit(centres, ex) centres2 = (SplitLabels() + Centroids()).fit(clustered2, ex) print(centres2)
def copy(self): '''Create a deep copy of an instance of this class, including a new inner numpy array `lines`. Returns ------- pept.LineData A new instance of the `pept.LineData` class with the same attributes as this instance, deep-copied. ''' return pept.LineData( self._lines.copy(order = "C"), sample_size = self._sample_size, overlap = self._overlap, verbose = False )
def fit_sample(self, sample_lines): if not isinstance(sample_lines, pept.LineData): sample_lines = pept.LineData(sample_lines) # If cutoffs were not defined, automatically compute them if self.cutoffs is not None: cutoffs = self.cutoffs else: cutoffs = get_cutoffs(sample_lines.lines) # Only compute minpoints if there are at least num_lines LoRs if len(sample_lines.lines) >= self.num_lines: sample_minpoints = pept.utilities.find_minpoints( sample_lines.lines, self.num_lines, self.max_distance, cutoffs, append_indices=self.append_indices, ) else: ncols = 4 + self.num_lines if self.append_indices else 4 sample_minpoints = np.empty((0, ncols)) # Column names columns = ["t", "x", "y", "z"] if self.append_indices: columns += [f"line_index{i + 1}" for i in range(self.num_lines)] # Encapsulate minpoints in a PointData points = pept.PointData(sample_minpoints, columns=columns) # Add optional metadata to the points; because they have an underscore, # they won't be propagated when new objects are constructed points.attrs["_num_lines"] = self.num_lines points.attrs["_max_distance"] = self.max_distance points.attrs["_cutoffs"] = cutoffs # If LoR indices were appended, also include the constituent LoRs if self.append_indices: points.attrs["_lines"] = sample_lines return points
def test_split_labels(): rng = np.random.default_rng(0) points_raw = rng.random((10, 4)) * 100 labels = rng.integers(3, size=10) line_index = rng.integers(10, size=10) points = pept.PointData( np.c_[points_raw, labels, line_index], columns=["t", "x", "y", "z", "label", "line_index"], ) points.samples_indices = [[0, 10], [5, 5], [5, 10]] # Check each split label split = SplitLabels().fit_sample(points[0]) assert np.all(split[0].points[:, :4] == points_raw[labels == 0]) assert np.all(split[1].points[:, :4] == points_raw[labels == 1]) assert np.all(split[2].points[:, :4] == points_raw[labels == 2]) # Check with empty sample empty_split = SplitLabels().fit_sample(points[1]) assert len(empty_split[0].data) == 0 # Extracting `_lines` lines_raw = rng.random((10, 7)) * 500 lines = pept.LineData(lines_raw, sample_size=4) points.attrs["_lines"] = lines splines = SplitLabels().fit_sample(points[0]) assert "_lines" in splines[0].attrs splines = SplitLabels(extract_lines=True).fit_sample(points[0]) assert isinstance(splines[0], pept.LineData) # Test different settings SplitLabels().fit(points, "sequential") SplitLabels(remove_labels=False).fit(points, "sequential") SplitLabels(noise=True).fit(points, "sequential") SplitLabels(extract_lines=True).fit(points, "sequential")
lor = [ t[i], x[i], y[i], z[i], t[i + 1], x[i + 1], y[i + 1], z[i + 1] ] for item in lor: f.write("%s\t" % str(item)) f.write('\n') else: continue f.close() filename = 'data/lors.txt' makeLORs(data, mask, filename) lors = np.loadtxt(filename, usecols=(0, 1, 2, 3, 5, 6, 7)) lors = pept.LineData(lors) # Create a PlotlyGrapher instance, then have it create a Plotly figure. grapher = PlotlyGrapher() # Add a Plotly trace from the LoRs grapher.add_lines(lors) grapher.show()
def fit_sample(self, sample): '''Use the Birmingham method to track a tracer location from a numpy array (i.e. one sample) of LoRs. For the given `sample` of LoRs (a numpy.ndarray), this function minimises the distance between all of the LoRs, rejecting a fraction of lines that lie furthest away from the calculated distance. The process is repeated iteratively until a specified fraction (`fopt`) of the original subset of LORs remains. Parameters ---------- sample : (N, M>=7) numpy.ndarray The sample of LORs that will be clustered. Each LoR is expressed as a timestamps and a line defined by two points; the data columns are then `[time, x1, y1, z1, x2, y2, z2, extra...]`. get_used : bool, default False If `True`, the function will also return a boolean mask of the LoRs used to compute the tracer location - that is, a vector of the same length as `sample`, containing 1 for the rows that were used, and 0 otherwise. as_array : bool, default True If set to True, the tracked locations are returned as numpy arrays. If set to False, they are returned inside an instance of `pept.PointData` for ease of iteration and plotting. verbose : bool, default False Provide extra information when tracking a location: time the operation and show a progress bar. Returns ------- locations : numpy.ndarray or pept.PointData The tracked locations found. used : numpy.ndarray, optional If `get_used` is true, then also return a boolean mask of the LoRs used to compute the tracer location - that is, a vector of the same length as `sample`, containing 1 for the rows that were used, and 0 otherwise. [ Used for multi-particle tracking, not implemented yet] Raises ------ ValueError If `sample` is not a numpy array of shape (N, M), where M >= 7. ''' if not isinstance(sample, pept.LineData): sample = pept.LineData(sample) locations, used = birmingham_method(sample.lines, self.fopt) # Propagate any LineData attributes besides `columns` attrs = sample.extra_attrs() locations = pept.PointData( [locations], columns=["t", "x", "y", "z", "error"], **attrs, ) # If `get_used`, also attach a `._lines` attribute with the lines used if self.get_used: locations.attrs["_lines"] = sample.copy( data=np.c_[sample.lines, used], columns=sample.columns + ["used"], ) return locations
def fit(self, lines): return pept.LineData(lines)
def find_minpoints(sample_lines, num_lines, max_distance, cutoffs=None, append_indices=False): '''Compute the minimum distance points (MDPs) from all combinations of `num_lines` lines given in an array of lines `sample_lines`. Given a sample of lines, this functions computes the minimum distance points (MDPs) for every possible combination of `num_lines` lines. The returned numpy array contains all MDPs that satisfy the following: 1. Are within the `cutoffs`. 2. Are closer to all the constituent LoRs than `max_distance`. Parameters ---------- sample_lines: (M, N) numpy.ndarray A 2D array of lines, where each line is defined by two points such that every row is formatted as `[t, x1, y1, z1, x2, y2, z2, etc.]`. It *must* have at least 2 lines and the combination size `num_lines` *must* be smaller or equal to the number of lines. Put differently: 2 <= num_lines <= len(sample_lines). num_lines: int The number of lines in each combination of LoRs used to compute the MDP. This function considers every combination of `numlines` from the input `sample_lines`. It must be smaller or equal to the number of input lines `sample_lines`. max_distance: float The maximum allowed distance between an MDP and its constituent lines. If any distance from the MDP to one of its lines is larger than `max_distance`, the MDP is thrown away. cutoffs: (6,) numpy.ndarray, optional An array of spatial cutoff coordinates with *exactly 6 elements* as [x_min, x_max, y_min, y_max, z_min, z_max]. If any MDP lies outside this region, it is thrown away. If it is `None`, they are computed automatically by calling `get_cutoffs`. The default is `None`. append_indices: bool, default False A boolean specifying whether to include the indices of the lines used to compute each MDP. If `False`, the output array will only contain the [time, x, y, z] of the MDPs. If `True`, the output array will have extra columns [time, x, y, z, line_idx(1), ..., line_idx(n)] where n = `num_lines`. Returns ------- minpoints: (M, N) numpy.ndarray A 2D array of `float`s containing the time and coordinates of the MDPs [time, x, y, z]. The time is computed as the average of the constituent lines. If `append_indices` is `True`, then `num_lines` indices of the constituent lines are appended as extra columns: [time, x, y, z, line_idx1, line_idx2, ..]. The first column (for time) is sorted. Raises ------ ValueError If `sample_lines` is not a numpy array with shape (N, M >= 7). ValueError If 2 <= num_lines <= len(sample_lines) is not satisfied. ValueError If `cutoffs` is not a one-dimensional array with values `[min_x, max_x, min_y, max_y, min_z, max_z]` See Also -------- pept.tracking.peptml.Minpoints : Compute minpoints from `pept.LineData`. pept.utilities.read_csv : Fast CSV file reading into numpy arrays. ''' if not isinstance(sample_lines, pept.LineData): sample_lines = pept.LineData(sample_lines) lines = sample_lines.lines lines = np.asarray(lines, order='C', dtype=float) num_lines = int(num_lines) max_distance = float(max_distance) if cutoffs is None: cutoffs = get_cutoffs(sample_lines) else: cutoffs = np.asarray(cutoffs, order='C', dtype=float) if cutoffs.ndim != 1 or len(cutoffs) != 6: raise ValueError( ("\n[ERROR]: cutoffs should be a one-dimensional array with " "values [min_x, max_x, min_y, max_y, min_z, max_z]. Received " f"{cutoffs}.\n")) sample_minpoints = pept.utilities.find_minpoints( lines, num_lines, max_distance, cutoffs, append_indices=append_indices) columns = ["t", "x", "y", "z"] if append_indices: columns += [f"line_index{i + 1}" for i in range(num_lines)] points = pept.PointData(sample_minpoints, columns=columns) # Add optional metadata to the points; because they have an underscore, # they won't be propagated when new objects are constructed points._max_distance = max_distance points._cutoffs = cutoffs points._num_lines = num_lines if append_indices: points._lines = sample_lines return points
def test_pipeline(): class F1(pept.base.LineDataFilter): def fit_sample(self, sample_lines): sample_lines.lines[:] += 1 sample_lines.attrs["attr1"] = "New attribute added by F1" return sample_lines class F2(pept.base.LineDataFilter): def fit_sample(self, sample_lines): sample_lines.lines[:] += 2 sample_lines.attrs["attr2"] = "New attribute added by F2" return sample_lines class R1(pept.base.Reducer): def fit(self, lines): return tuple(lines) class R2(pept.base.Reducer): def fit(self, lines): return pept.LineData(lines) # Generate some dummy LineData lines_raw = np.arange(70).reshape(10, 7) lines = pept.LineData(lines_raw, sample_size=4) # Test pipeline creation assert isinstance(F1() + F2(), pept.base.Pipeline) assert isinstance(pept.base.Pipeline([F1(), F2()]), pept.base.Pipeline) assert isinstance(F1() + F2() + R1(), pept.base.Pipeline) # Test fit_sample pipe = F1() + F2() print(pipe) lp1 = pipe.fit_sample(lines[0]).lines lp2 = F2().fit_sample(F1().fit_sample(lines[0])).lines assert (lp1 == lp2).all(), "Apply simple pipeline steps manually" pipe = F1() + F2() + R1() print(pipe) lp1 = pipe.fit_sample(lines[0]) lp2 = F1().fit_sample(lines[0]) lp2 = F2().fit_sample(lp2) lp2 = R1().fit([lp2]) assert isinstance(lp1, tuple), "Final pipeline reducer to tuple" assert isinstance(lp2, tuple), "Final manual reducer to tuple" assert (lp1[0].lines == lp2[0].lines).all(), "Apply steps manually" # Test the attribute is added by the first filter assert "attr1" in F1().fit_sample(lines[0]).attrs assert "attr1" in pept.base.Pipeline([F1()]).fit_sample(lines[0]).attrs # Test fit # Simple filter-only pipeline pipe = F1() + F2() lp1 = pipe.fit(lines) lp2 = F2().fit(F1().fit(lines)) assert isinstance(lp1, list) assert isinstance(lp2, list) assert len(lp1) == len(lp2) == len(lines) assert all([(l1.lines == l2.lines).all() for l1, l2 in zip(lp1, lp2)]) # Pipeline ending in reducer pipe = F1() + F2() + R1() print(pipe) lp1 = pipe.fit(lines) lp2 = F1().fit(lines) lp2 = F2().fit(lp2) lp2 = R1().fit(lp2) assert isinstance(lp1, tuple) assert isinstance(lp2, tuple) assert len(lp1) == len(lp2) == len(lines) assert all([(l1.lines == l2.lines).all() for l1, l2 in zip(lp1, lp2)]) # Complex pipeline pipe = F1() + F2() + R2() + F1() + R1() print(pipe) lp1 = pipe.fit(lines) lp2 = F1().fit(lines) lp2 = F2().fit(lp2) lp2 = R2().fit(lp2) lp2 = F1().fit(lp2) lp2 = R1().fit(lp2) assert isinstance(lp1, tuple) assert isinstance(lp2, tuple) assert len(lp1) == len(lp2) == len(lines) assert all([(l1.lines == l2.lines).all() for l1, l2 in zip(lp1, lp2)]) # Test the attribute is added by the first filter assert "attr1" in F1().fit(lines)[0].attrs assert "attr1" in pept.base.Pipeline([F1()]).fit(lines)[0].attrs
def find_cutpoints(sample_lines, max_distance, cutoffs=None, append_indices=False): '''Find the cutpoints from a sample / array of LoRs. A cutpoint is the point in 3D space that minimises the distance between any two lines. For any two non-parallel 3D lines, this point corresponds to the midpoint of the unique segment that is perpendicular to both lines. This function considers every pair of lines in `sample_lines` and returns all the cutpoints that satisfy the following conditions: 1. The distance between the two lines is smaller than `max_distance`. 2. The cutpoint is within the `cutoffs`. Parameters ---------- sample_lines : (N, M >= 7) numpy.ndarray A sample of LoRs, where each row is `[time, x1, y1, z1, x2, y2, z2]`, such that every line is defined by the points `[x1, y1, z1]` and `[x2, y2, z2]`. max_distance : float The maximum distance between any two lines for their cutpoint to be considered. A good starting value would be 0.1 mm for small tracers and/or clean data, or 0.2 mm for larger tracers and/or noisy data. cutoffs : list, optional The cutoffs for each dimension, formatted as `[x_min, x_max, y_min, y_max, z_min, z_max]`. If it is `None`, they are computed automatically by calling `get_cutoffs`. The default is `None`. append_indices : bool, optional If set to `True`, the indices of the individual LoRs that were used to compute each cutpoint are also appended to the returned array. Default is `False`. Returns ------- cutpoints : (M, 4) or (M, 6) numpy.ndarray A numpy array of the calculated cutpoints. If `append_indices` is `False`, then the columns are [time, x, y, z]. If `append_indices` is `True`, then the columns are [time, x, y, z, i, j], where `i` and `j` are the LoR indices from `sample_lines` that were used to compute the weighted cutpoints. The time is the average between the timestamps of the two LoRs that were used to compute the cutpoint. The first column (for time) is sorted. Raises ------ ValueError If `sample_lines` is not a numpy array with shape (N, M >= 7). ValueError If `cutoffs` is not a one-dimensional array with values `[min_x, max_x, min_y, max_y, min_z, max_z]` See Also -------- pept.tracking.peptml.Cutpoints : Compute cutpoints from `pept.LineData`. pept.utilities.read_csv : Fast CSV file reading into numpy arrays. ''' if not isinstance(sample_lines, pept.LineData): sample_lines = pept.LineData(sample_lines) lines = sample_lines.lines lines = np.asarray(lines, order='C', dtype=float) max_distance = float(max_distance) # If cutoffs were not defined, automatically compute them if cutoffs is None: cutoffs = get_cutoffs(lines) else: cutoffs = np.asarray(cutoffs, order='C', dtype=float) if cutoffs.ndim != 1 or len(cutoffs) != 6: raise ValueError( ("\n[ERROR]: cutoffs should be a one-dimensional array with " "values [min_x, max_x, min_y, max_y, min_z, max_z]. Received " f"{cutoffs}.\n")) sample_cutpoints = pept.utilities.find_cutpoints( lines, max_distance, cutoffs, append_indices=append_indices) columns = ["t", "x", "y", "z"] if append_indices: columns += ["line_index1", "line_index2"] points = pept.PointData(sample_cutpoints, columns=columns) # Add optional metadata to the points; because they have an underscore, # they won't be propagated when new objects are constructed points._max_distance = max_distance points._cutoffs = cutoffs if append_indices: points._lines = sample_lines return points
def test_line_data(): # Test simple sample size, no overlap lines_raw = np.arange(70).reshape(10, 7) lines = pept.LineData(lines_raw, sample_size=4) print(lines) assert (lines[0].lines == lines_raw[:4]).all(), "Incorrect first sample" assert (lines[1].lines == lines_raw[4:8]).all(), "Incorrect second sample" assert len(lines) == 2, "Incorrent number of samples" assert np.all(lines["t"] == lines_raw[:, 0]), "Incorrect string indexing" # Test copying assert lines.copy() is not lines, "Copy is not deep" assert (lines.copy().lines == lines.lines).all(), "Incorrect copying" # Test changing sample size and overlap (int) lines.sample_size = 3 lines.overlap = 2 assert (lines[0].lines == lines_raw[:3]).all(), "Incorrect ssize changing" assert (lines[1].lines == lines_raw[1:4]).all(), "Incorrect overlapping" assert len(lines) == 8, "Incorrect number of samples after overlap" # Test changing sample size to List[Int] lines.sample_size = [3, 4, 2, 0] assert lines.overlap is None, "Overlap was not set to None" assert len(lines) == 4, "Incorrect number of samples" assert (lines[0].lines == lines_raw[:3]).all(), "List sample size" assert (lines[1].lines == lines_raw[3:7]).all(), "List sample size" assert (lines[2].lines == lines_raw[7:9]).all(), "List sample size" assert (lines[3].lines == lines_raw[9:9]).all(), "List sample size" # Test copying assert lines.copy().lines is not lines.lines, "Copy is not deep" assert lines.copy(deep=False).lines is lines.lines, "Not shallow copy" assert (lines.copy().lines == lines.lines).all(), "Incorrect copying" assert np.all(lines.copy().samples_indices == lines.samples_indices) lines.samples_indices = [[0, 5], [5, 5], [5, 10]] assert np.all(lines.copy().samples_indices == lines.samples_indices) # Test different constructors: copy, iterable, numpy-like lines_raw = np.arange(80).reshape(10, 8) columns = ["t", "x1", "y1", "z1", "x2", "y2", "z2", "error"] lines = pept.LineData(lines_raw, columns = columns) pept.LineData(lines) pept.LineData([lines, lines]) pept.LineData([range(7), range(7)]) # Test unnamed columns pept.LineData([range(8), range(8)]) pept.LineData([range(7)], columns = ["a", "b", "c", "d", "e", "f", "g", "h", "i"]) # Test columns propagation assert "error" in pept.LineData(lines).columns assert "error" in pept.LineData([lines, lines]).columns # Test attrs propagation lines.attrs["_lines"] = 123 lines.attrs["_attr2"] = [1, 2, 3] assert "_lines" in pept.LineData(lines).attrs assert "_attr2" in pept.LineData([lines, lines]).attrs assert "_lines" in lines[0].attrs assert "_attr2" in lines.copy().attrs # Test illegal changes to sample size and overlap with pytest.raises(ValueError): lines.sample_size = 3 lines.overlap = 3 with pytest.raises(ValueError): lines.sample_size = 0 lines.overlap = 3 lines.sample_size = 3 with pytest.raises(ValueError): lines.sample_size = -1 # Test illegal array shapes with pytest.raises(ValueError): pept.LineData(np.arange(12)) with pytest.raises(ValueError): pept.LineData(np.arange(12).reshape(2, 6)) with pytest.raises(ValueError): pept.LineData(np.arange(12).reshape(2, 2, 3))