def test_picker_nloci(): """Test if the number of foci detected by picker is correct""" obs_coords, _ = cud.picker(gauss12, precision=1) assert len(obs_coords) == 2
def test_picker_idx(patterns, matrix): """Test that index is not shifted when using picker""" obs_coords, _ = cud.picker(matrix, precision=None) assert np.all(obs_coords[0] == patterns)
def test_picker_speckles(): """Test if speckles are discarded by picker""" obs_coords, obs_mat = cud.picker(point_mat, precision=None) assert obs_coords is None assert obs_mat is None
def change_detection_pipeline( cool_files: Iterable[str], conditions: Iterable[str], kernel: Union[np.ndarray, str] = "loops", bed2d_file: Optional[str] = None, region: Optional[Union[Iterable[str], str]] = None, max_dist: Optional[int] = None, subsample: bool = True, percentile_thresh: float = 95.0, n_cpus: int = 4, ) -> pd.DataFrame: """ Run end to end pattern change detection pipeline on input cool files. A list of conditions of the same lengths as the sample list must be provided. The first condition in the list is used as the reference (control) state. Changes for a specific pattern are computed. A valid chromosight pattern name can be supplied (e.g. loops, borders, hairpins, ...) or a kernel matrix can be supplied directly instead. Positions with significant changes will be reported in a pandas dataframe. Optionally, a 2D bed file with positions of interest can be specified, in which case change value at these positions will be reported instead. Positive diff_scores mean the pattern intensity was increased relative to control (first condition). """ # Make sure each sample has an associated condition if len(cool_files) != len(conditions): raise ValueError( "The lists of cool files and conditions must have the same length") # If a pattern name was provided, load corresponding chromosight kernel if isinstance(kernel, str): kernel_name = kernel try: kernel = getattr(ck, kernel)["kernels"][0] except AttributeError: raise AttributeError(f"{kernel_name} is not a valid pattern name") elif isinstance(kernel, np.ndarray): kernel_name = "custom kernel" else: raise ValueError( "Kernel must either be a valid chromosight pattern name, or a 2D numpy.ndarray of floats" ) # Associate samples with their conditions samples = pd.DataFrame({ "cond": conditions, "cool": pai.get_coolers(cool_files) }) print( f"Changes will be computed relative to condition: {samples.cond.values[0]}" ) # Define each chromosome as a region, if None specified clr = samples.cool.values[0] if region is None: regions = clr.chroms()[:]["name"].tolist() elif isinstance(region, str): region = [region] pos_cols = [ "chrom1", "start1", "end1", "chrom2", "start2", "end2", "bin1", "bin2", "diff_score", ] if bed2d_file: positions = cio.load_bed2d(bed2d_file) for col in ["diff_score", " bin1", "bin2"]: positions[col] = np.nan else: positions = pd.DataFrame(columns=pos_cols) for reg in regions: # Subset bins to the range of interest bins = clr.bins().fetch(reg).reset_index(drop=True) diff, thresh = detection_matrix( samples, kernel, region=reg, subsample=subsample, max_dist=max_dist, percentile_thresh=percentile_thresh, n_cpus=n_cpus, ) # If positions were provided, return the change value for each of them if bed2d_file: tmp_chr = reg.split(":")[0] tmp_rows = (positions.chrom1 == tmp_chr) & (positions.chrom2 == tmp_chr) # If there are no positions of interest on this chromosome, just # skip it if not np.any(tmp_rows): continue tmp_pos = positions.loc[tmp_rows, :] # Convert both coordinates from genomic coords to bins for i in [1, 2]: tmp_pos["chrom"] = tmp_pos[f"chrom{i}"] tmp_pos["pos"] = (tmp_pos[f"start{i}"] + tmp_pos[f"end{i}"]) // 2 tmp_pos[f"bin{i}"] = coords_to_bins(clr, tmp_pos).astype(int) # Save bin coordinates from current chromosome to the full table positions.loc[tmp_rows, f"bin{i}"] = tmp_pos[f"bin{i}"] tmp_pos = tmp_pos.drop(columns=["pos", "chrom"]) # Retrieve diff values for each coordinate positions.loc[tmp_rows, "diff_score"] = diff[tmp_pos.start1 // clr.binsize, tmp_pos.start2 // clr.binsize, ].A1 # Otherwise report individual spots of change using chromosight else: # Pick "foci" of changed pixels and their local maxima tmp_pos, _ = cud.picker(abs(diff), thresh) # Get genomic positions from matrix coordinates tmp_pos = pd.DataFrame(tmp_pos, columns=["bin1", "bin2"]) for i in [1, 2]: coords = (bins.loc[tmp_pos[f"bin{i}"], ["chrom", "start", "end"]].reset_index( drop=True).rename( columns={ "chrom": f"chrom{i}", "start": f"start{i}", "end": f"end{i}", })) # Add axis' columns to dataframe tmp_pos = pd.concat([coords, tmp_pos], axis=1) # Retrieve diff values for each coordinate tmp_pos["diff_score"] = diff[tmp_pos.bin1, tmp_pos.bin2].A1 # Append new chromosome's rows positions = pd.concat([positions, tmp_pos], axis=0) positions = positions.loc[:, pos_cols, ] return positions
def test_picker_nloci(): """Test if the number of foci detected by picker is correct""" thresh = gauss12.data.mean() obs_coords, _ = cud.picker(gauss12, pearson=thresh) assert len(obs_coords) == 2
def test_picker_idx(patterns, matrix): """Test that index is not shifted when using picker""" thresh = matrix.data.mean() obs_coords, _ = cud.picker(matrix, pearson=thresh) assert np.all(obs_coords[0] == patterns)