def test_genome_wide_norm(self): """Tests functionality to extract Obs/Exp values genome-wide with median normalization from synthetic Hi-C data.""" position_frame = pd.read_csv("testFiles/posPileups.csv") position_frame.loc[:, "mid"] = position_frame["pos"] arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0) pairing_score = HT.get_pairing_score_obs_exp(cooler_file, exp_f, 50000, arms=arms, norm=True) expected = pd.read_csv( "testFiles/test_pairingScore_obsExp_genomeWide_Norm.csv", dtype={ name: pairing_score.dtypes[name] for name in pairing_score.dtypes.index }, ) assert_frame_equal(pairing_score, expected)
def test_no_collapse_real_data(self): positions = pd.read_csv("testFiles/testAssignRegions.csv") arms = HT.get_arms_hg19() cooler_file = cooler.Cooler( "testFiles/test3_realdata.mcool::resolutions/50000") expected = HT.get_expected(cooler_file, arms) result = HT.do_pileup_obs_exp(cooler_file, expected, positions, proc=1, collapse=False) expected = np.load( "testFiles/real_data_obsexp_pileup_not_collapsed.npy") self.assertTrue(np.allclose(result, expected, equal_nan=True))
def test_synthetic_data(self): """Tests expected counts for synthetic Hi-C data. Known values were provided and expected counts for each diagonal calculated.""" result = HT.get_expected(self.cooler, self.arms, proc=1, ignore_diagonals=0) check = pd.read_csv("testFiles/test_expected_chrSyn.csv") # merge regions for new expected format check.loc[:, "region"] = check.apply( lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1) check_final = check.drop( columns=["chrom", "start", "end"])[result.columns] assert_frame_equal(result, check_final)
def test_synthetic_data_mult_chroms(self): """Tests expected counts for synthetic Hi-C data with multiple chromosomal arms. Known values were provided and expected counts for each diagonal calculated.""" arms = pd.DataFrame({ "chrom": ["chrSyn", "chrSyn"], "start": [0, 2000000], "end": [2000000, 4990000], }) result = HT.get_expected(self.cooler, arms, proc=1, ignore_diagonals=0) check = pd.read_csv("testFiles/test_expected_multiple_chroms.csv") # merge regions for new expected format check.loc[:, "region"] = check.apply( lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1) check_final = check.drop( columns=["chrom", "start", "end"])[result.columns] assert_frame_equal(result, check_final)
def test_collapse(self): """Tests pileup of synthetic Hi-C data, with collapsing results.""" position_frame = pd.read_csv("testFiles/posPileups.csv") arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) assigned = HT.assign_regions(50000, 10000, position_frame["chrom"], position_frame["pos"], arms) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0) result = HT.do_pileup_obs_exp(cooler_file, exp_f, assigned, proc=1, collapse=True) expected = np.load("testFiles/test_pileups_obsExp_collapse.npy") self.assertTrue(np.allclose(result, expected))
def test_expected_real_data(self): """Tests expected counts for real Hi-C data with multiple chromosomal arms. Known values were provided and expected counts for each diagonal calculated.""" arms = HT.get_arms_hg19() cooler_file = cooler.Cooler( "testFiles/test3_realdata.mcool::/resolutions/50000") result = HT.get_expected( cooler_file, arms, proc=1, ignore_diagonals=0).drop(columns=["count.sum"]) check = pd.read_csv("testFiles/test_expected_realdata.csv") # merge regions for new expected format check.loc[:, "region"] = check.apply( lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1) check_final = (check.drop( columns=["chrom", "start", "end"])[result.columns].sort_values( by=["region", "diag"]).reset_index(drop=True)) sorted_result = result.sort_values(by=["region", "diag"]).reset_index( drop=True) assert_frame_equal(sorted_result, check_final)
def test_specific_regions(self): """Tests functionality to extract Obs/Exp values at specific regions from synthetic Hi-C data.""" position_frame = pd.read_csv("testFiles/posPileups.csv") position_frame.loc[:, "mid"] = position_frame["pos"] arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0) pairing_score = HT.get_pairing_score_obs_exp(cooler_file, exp_f, 50000, regions=position_frame, arms=arms, norm=False) expected = pd.read_csv( "testFiles/test_pairingScore_obsExp_specificRegions.csv") assert_frame_equal(pairing_score, expected)
def test_equal_sized_windows(self): """Test flexible pileup with equally sized windows (obs/exp).""" position_frame = pd.read_csv("testFiles/posPileupSymmetric.csv") arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") expected = HT.get_expected(cooler_file, arms, proc=2, ignore_diagonals=0) result = HT.extract_windows_different_sizes_obs_exp( position_frame, arms, cooler_file, expected) # load expected extracted windows with open("testFiles/test_pilesup_symmetric_obs_exp.pickle", "rb") as file_pointer: expected = pickle.load(file_pointer) self.assertTrue( all(np.allclose(i, j) for i, j in zip(result, expected)))
def test_wrong_parameters(self): """Tests raising of error when specific region pileup is done with the norm parameter set to True.""" position_frame = pd.read_csv("testFiles/posPileups.csv") position_frame.loc[:, "mid"] = position_frame["pos"] arms = pd.DataFrame({ "chrom": "chrSyn", "start": 0, "end": 4990000 }, index=[0]) cooler_file = cooler.Cooler( "testFiles/test2.mcool::/resolutions/10000") exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0) bad_call = partial( HT.get_pairing_score_obs_exp, cooler_file, exp_f, 50000, regions=position_frame, arms=arms, norm=True, ) self.assertRaises(ValueError, bad_call)