def test_hic_genome_subsample(self, path): """Check results and error handling of contacts subsampling""" hic_genome = ccm.HicGenome(path) with self.assertRaises(ValueError): hic_genome.subsample(-1) with self.assertRaises(ValueError): hic_genome.subsample("a") original_contacts = hic_genome.matrix.sum() hic_genome.subsample(0.7) assert np.isclose(hic_genome.matrix.sum(), 0.7 * original_contacts) hic_genome = ccm.HicGenome(path) target_contacts = hic_genome.matrix.sum() * 0.2 hic_genome.subsample(target_contacts) assert np.isclose(hic_genome.matrix.sum(), target_contacts)
def test_hic_genome(path): """Test HicGenome instantiation and matrix splitting with bg2 and cool files""" # Simple instantiation test: no kernel config, no inter matrices hic_genome = ccm.HicGenome(path) hic_genome.make_sub_matrices() n_chroms = len(hic_genome.clr.chromnames) assert hic_genome.sub_mats.shape[0] == n_chroms assert hic_genome.max_dist is None # Test with inter + kernel hic_genome = ccm.HicGenome(path, inter=True, kernel_config=ck.loops) hic_genome.make_sub_matrices() assert hic_genome.sub_mats.shape[0] == n_chroms**2 - n_chroms assert hic_genome.max_dist == ck.loops["max_dist"] // hic_genome.clr.binsize assert hic_genome.largest_kernel == ck.loops["kernels"][0].shape[0]
def test_hic_genome_coords_to_bins(path): """Test conversion of bins to genomic coordinates""" hic_genome = ccm.HicGenome(path) coords = pd.DataFrame({"chrom": ["chr1", "chr2"], "pos": [150, 4000]}) exp_bins = np.array([0, 131]) obs_bins = hic_genome.coords_to_bins(coords) assert np.all(exp_bins == obs_bins)
def test_hic_genome_bins_to_coords(path): """Test conversion of bins to genomic coordinates""" hic_genome = ccm.HicGenome(path) idx = [0, 5, 8] exp_bins = hic_genome.bins.iloc[idx, :] obs_bins = hic_genome.bins_to_coords(idx) assert np.all(exp_bins == obs_bins)
def test_hic_genome_get_sub_mat_pattern(path): """Test full matrix to sub matrix bin conversion""" hic_genome = ccm.HicGenome(path) dummy_patterns = pd.DataFrame({"bin1": [0, 10, 50], "bin2": [1, 11, 51]}) obs_coords = hic_genome.get_sub_mat_pattern("chr1", "chr1", dummy_patterns) assert np.all(obs_coords.bin1 == dummy_patterns.bin1) assert np.all(obs_coords.bin2 == dummy_patterns.bin2)
def test_hic_genome_normalize(path): """Test if normalization of HicGenome object yields expected results""" hic_genome = ccm.HicGenome(path) valid_bins = preproc.get_detectable_bins(hic_genome.matrix, n_mads=5) hic_genome.normalize(iterations=100) filtered_mat = hic_genome.matrix.tocsr()[valid_bins[0], :] filtered_mat = filtered_mat[:, valid_bins[1]] bin_sums = preproc.sum_mat_bins(filtered_mat) assert np.allclose(bin_sums, 1, rtol=0.05)
def contact_map_subsample(self, path): """Check results and error handling of contacts subsampling""" with self.assertRaises(ValueError): ccm.HicGenome(path, sample=-1) with self.assertRaises(ValueError): ccm.HicGenome(path, sample="a") hic_genome = ccm.HicGenome(path) hic_genome.make_sub_matrices() for sub in hic_genome.sub_mats.contact_map: (s1, e1), (s2, e2) = sub.extent sub.matrix = sub.clr.matrix(sparse=True, balance=True)[s1:e1, s2:e2] sub.matrix.data[np.isnan(sub.matrix.data)] = 0 sub.matrix.eliminate_zeros() ori_sum = sub.clr.matrix(sparse=True, balance=False)[s1:e1, s2:e2].sum() sub.subsample(0.7, balance=False) breakpoint() print(0.7 * ori_sum, sub.matrix.sum()) assert np.isclose(int(0.7 * ori_sum), sub.matrix.sum())
def test_hic_genome_normalize(path): """Test if normalization of HicGenome object yields expected results""" hic_genome = ccm.HicGenome(path) hic_genome.normalize(force_norm=True)
def test_hic_genome_normalize(path): """Test if normalization of HicGenome object yields expected results""" hic_genome = ccm.HicGenome(path) hic_genome.normalize(norm='raw') hic_genome.normalize(norm='auto')