def test_genome_wide_norm(self): """Tests functionality to extract Obs/Exp values genome-wide with median normalization from synthetic Hi-C data.""" position_frame = pd.read_csv("testFiles/posPileups.csv") position_frame.loc[:, "mid"] = position_frame["pos"] arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0) pairing_score = HT.get_pairing_score_obs_exp(cooler_file, exp_f, 50000, arms=arms, norm=True) expected = pd.read_csv( "testFiles/test_pairingScore_obsExp_genomeWide_Norm.csv", dtype={ name: pairing_score.dtypes[name] for name in pairing_score.dtypes.index }, ) assert_frame_equal(pairing_score, expected)
def test_no_collapse_real_data(self): positions = pd.read_csv("testFiles/testAssignRegions.csv") arms = HT.get_arms_hg19() cooler_file = cooler.Cooler( "testFiles/test3_realdata.mcool::resolutions/50000") expected = HT.get_expected(cooler_file, arms) result = HT.do_pileup_obs_exp(cooler_file, expected, positions, proc=1, collapse=False) expected = np.load( "testFiles/real_data_obsexp_pileup_not_collapsed.npy") self.assertTrue(np.allclose(result, expected, equal_nan=True))
def test_center_enrichment(self): """Tests center enrichment of sliding diamond of a synthetically generated gaussian.""" x_values, y_values = HT.sliding_diamond(self.gaussian, side_len=6) center_mean = np.mean(y_values[np.where(np.abs(x_values) < 1)]) border_mean = np.mean(y_values[:5]) self.assertTrue(center_mean > 5 * border_mean)
def test_specific_regions_without_diag(self): # Simulated cooler """Tests functionality to extract ICCF values at specific regions from synthetic Hi-C data with blanking the main diagonal.""" position_frame = pd.read_csv("testFiles/posPileups.csv") position_frame.loc[:, "mid"] = position_frame["pos"] arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") pairing_score = HT.get_pairing_score( cooler_file, 50000, regions=position_frame, arms=arms, norm=False, blank_diag=True, ) expected = pd.read_csv( "testFiles/test_pairingScore_ICCF_specificRegions_withoutDiag.csv") assert_frame_equal(pairing_score, expected)
def test_genome_wide_norm_without_diag(self): """Tests functionality to extract ICCF values genome-wide with median normalization from synthetic Hi-C data with blanking the main diagonal.""" arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") pairing_score = HT.get_pairing_score(cooler_file, 50000, arms=arms, norm=True, blank_diag=True) expected = pd.read_csv( "testFiles/test_pairingScore_ICCF_genomeWide_withoutDiag_norm.csv", dtype={ name: pairing_score.dtypes[name] for name in pairing_score.dtypes.index }, index_col=0, ) assert_frame_equal(pairing_score, expected)
def test_odd_diamond_xnorm(self): """Tests sliding a diamond of odd sidelength with x normalization for small fixed matrix.""" x_values, y_values = HT.sliding_diamond(self.test_matrix, side_len=3, center_x=True) x_check = np.array([-1.5, -0.5, 0.5, 1.5]) y_check = np.array([3.666666666, 4.11111111, 4.77777777, 2.55555555]) self.assertTrue(all(np.isclose(x_values, x_check))) self.assertTrue(all(np.isclose(y_values, y_check)))
def test_even_diamond(self): """Tests sliding a diamond of even sidelength for small fixed matrix.""" x_values, y_values = HT.sliding_diamond(self.test_matrix, side_len=2, center_x=False) x_check = np.array([0.5, 1.5, 2.5, 3.5, 4.5]) y_check = np.array([3.5, 4.75, 4.75, 3.25, 2.0]) self.assertTrue(all(np.isclose(x_values, x_check))) self.assertTrue(all(np.isclose(y_values, y_check)))
def test_collapse(self): """Tests pileup of synthetic Hi-C data, with collapsing results.""" position_frame = pd.read_csv("testFiles/posPileups.csv") arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) assigned = HT.assign_regions(50000, 10000, position_frame["chrom"], position_frame["pos"], arms) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") result = HT.do_pileup_iccf(cooler_file, assigned, proc=1, collapse=True) expected = np.load("testFiles/test_pileups_iccf_collapse.npy") self.assertTrue(np.allclose(result, expected))
def test_odd_diamond(self): """Tests sliding a diamond of odd sidelength for small fixed matrix.""" x_values, y_values = HT.sliding_diamond(self.test_matrix, side_len=3, center_x=False) x_check = np.array([1, 2, 3, 4]) y_check = np.array([3.666666666, 4.11111111, 4.77777777, 2.55555555]) self.assertTrue(all(np.isclose(x_values, x_check))) self.assertTrue(all(np.isclose(y_values, y_check)))
def test_collapse_real_data(self): """tests whether pileup works on real data.""" positions = pd.read_csv("testFiles/testAssignRegions.csv") cooler_file = cooler.Cooler( "testFiles/test3_realdata.mcool::resolutions/50000") result = HT.do_pileup_iccf(cooler_file, positions, proc=1, collapse=True) expected = np.load("testFiles/real_data_iccf_pileup_collapsed.npy") self.assertTrue(np.allclose(result, expected))
def test_even_diamond_xnorm(self): """Tests sliding a diamond of even sidelength with x normalization (center is set to 0) for small fixed matrix..""" x_values, y_values = HT.sliding_diamond(self.test_matrix, side_len=2, center_x=True) x_check = np.array([-2, -1, 0, 1, 2]) y_check = np.array([3.5, 4.75, 4.75, 3.25, 2.0]) self.assertTrue(all(np.isclose(x_values, x_check))) self.assertTrue(all(np.isclose(y_values, y_check)))
def test_expected_real_data(self): """Tests expected counts for real Hi-C data with multiple chromosomal arms. Known values were provided and expected counts for each diagonal calculated.""" arms = HT.get_arms_hg19() cooler_file = cooler.Cooler( "testFiles/test3_realdata.mcool::/resolutions/50000") result = HT.get_expected( cooler_file, arms, proc=1, ignore_diagonals=0).drop(columns=["count.sum"]) check = pd.read_csv("testFiles/test_expected_realdata.csv") # merge regions for new expected format check.loc[:, "region"] = check.apply( lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1) check_final = (check.drop( columns=["chrom", "start", "end"])[result.columns].sort_values( by=["region", "diag"]).reset_index(drop=True)) sorted_result = result.sort_values(by=["region", "diag"]).reset_index( drop=True) assert_frame_equal(sorted_result, check_final)
def test_case1(self): """Test assignment of small, synthetic set of regions.""" bed_file = pd.read_csv("testFiles/testSmall.bed", sep="\t") result = HT.assign_regions( window=500000, binsize=50000, chroms=bed_file["chrom"], positions=bed_file["pos"], arms=self.arms, ) expected = pd.read_csv("testFiles/testAssignRegions.csv") assert_frame_equal(result, expected)
def test_region_spans_multiple_supports(self): """Tests assign regions when region spans multiple supports.""" bed_file = pd.read_csv("testFiles/multiple_support_regions.csv") result = HT.assign_regions( window=1000000, binsize=20000, chroms=bed_file["chrom"], positions=bed_file["pos"], arms=self.arms, ) expected = pd.read_csv("testFiles/multiple_support_regions_result.csv") assert_series_equal(result["region"], expected["region"])
def test_equal_sized_windows(self): """Test flexible pileup with equally sized windows (obs/exp).""" position_frame = pd.read_csv("testFiles/posPileupSymmetric.csv") arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") expected = HT.get_expected(cooler_file, arms, proc=2, ignore_diagonals=0) result = HT.extract_windows_different_sizes_obs_exp( position_frame, arms, cooler_file, expected) # load expected extracted windows with open("testFiles/test_pilesup_symmetric_obs_exp.pickle", "rb") as file_pointer: expected = pickle.load(file_pointer) self.assertTrue( all(np.allclose(i, j) for i, j in zip(result, expected)))
def test_specific_regions(self): """Tests functionality to extract Obs/Exp values at specific regions from synthetic Hi-C data.""" position_frame = pd.read_csv("testFiles/posPileups.csv") position_frame.loc[:, "mid"] = position_frame["pos"] arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0) pairing_score = HT.get_pairing_score_obs_exp(cooler_file, exp_f, 50000, regions=position_frame, arms=arms, norm=False) expected = pd.read_csv( "testFiles/test_pairingScore_obsExp_specificRegions.csv") assert_frame_equal(pairing_score, expected)
def test_synthetic_data(self): """Tests expected counts for synthetic Hi-C data. Known values were provided and expected counts for each diagonal calculated.""" result = HT.get_expected(self.cooler, self.arms, proc=1, ignore_diagonals=0) check = pd.read_csv("testFiles/test_expected_chrSyn.csv") # merge regions for new expected format check.loc[:, "region"] = check.apply( lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1) check_final = check.drop( columns=["chrom", "start", "end"])[result.columns] assert_frame_equal(result, check_final)
def test_regions_are_close_to_supports(self): """Tests assign regions when regions are close to supports -> bug was found""" bed_file = pd.read_csv( "testFiles/regions_close_to_support_boundary.tsv", sep="\t") result = HT.assign_regions( window=1000000, binsize=20000, chroms=bed_file["chrom"], positions=bed_file["pos"], arms=self.arms, ) expected = pd.read_csv( "testFiles/regions_close_to_support_boundary_result.tsv", delim_whitespace=True, ) assert_series_equal(result["region"], expected["region"])
def test_synthetic_data_mult_chroms(self): """Tests expected counts for synthetic Hi-C data with multiple chromosomal arms. Known values were provided and expected counts for each diagonal calculated.""" arms = pd.DataFrame({ "chrom": ["chrSyn", "chrSyn"], "start": [0, 2000000], "end": [2000000, 4990000], }) result = HT.get_expected(self.cooler, arms, proc=1, ignore_diagonals=0) check = pd.read_csv("testFiles/test_expected_multiple_chroms.csv") # merge regions for new expected format check.loc[:, "region"] = check.apply( lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1) check_final = check.drop( columns=["chrom", "start", "end"])[result.columns] assert_frame_equal(result, check_final)
def test_differently_sized_windows(self): """Test flexible pileup with equally sized windows.""" position_frame = pd.read_csv("testFiles/posPileupAsymmetric.csv") arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test3.mcool::/resolutions/10000") result = HT.extract_windows_different_sizes_iccf( position_frame, arms, cooler_file) # load expected extracted windows with open("testFiles/test_pilesup_asymmetric.pickle", "rb") as file_pointer: expected = pickle.load(file_pointer) self.assertTrue( all(np.allclose(i, j) for i, j in zip(result, expected)))
def test_differently_sized_windows_different_arms(self): """tests pileup on differently sized windows that are on different chromosomal arms.""" position_frame = pd.read_csv("testFiles/posPileupAsymmetric.csv") arms = pd.DataFrame({ "chrom": ["chrSyn", "chrSyn"], "start": [0, 250000], "end": [250000, 4990000], }) cooler_file = cooler.Cooler( "testFiles/test3.mcool::/resolutions/10000") result = HT.extract_windows_different_sizes_iccf( position_frame, arms, cooler_file) # load expected extracted windows with open("testFiles/test_pilesup_asymmetric.pickle", "rb") as file_pointer: expected = pickle.load(file_pointer) self.assertTrue( all(np.allclose(i, j) for i, j in zip(result, expected)))
def test_wrong_parameters(self): """Tests raising of error when specific region pileup is done with the norm parameter set to True.""" position_frame = pd.read_csv("testFiles/posPileups.csv") position_frame.loc[:, "mid"] = position_frame["pos"] arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0) bad_call = partial( HT.get_pairing_score_obs_exp, cooler_file, exp_f, 50000, regions=position_frame, arms=arms, norm=True, ) self.assertRaises(ValueError, bad_call)
def test_arms(self): """Tests whether fetching and generating of supports for chromosomal arms of hg19 works.""" check = pd.read_csv("./testFiles/arms.csv") arms = HT.get_arms_hg19() assert_frame_equal(check, arms)