Пример #1
0
 def test_genome_wide_norm(self):
     """Tests functionality to extract Obs/Exp values
     genome-wide with median normalization from synthetic Hi-C data."""
     position_frame = pd.read_csv("testFiles/posPileups.csv")
     position_frame.loc[:, "mid"] = position_frame["pos"]
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0)
     pairing_score = HT.get_pairing_score_obs_exp(cooler_file,
                                                  exp_f,
                                                  50000,
                                                  arms=arms,
                                                  norm=True)
     expected = pd.read_csv(
         "testFiles/test_pairingScore_obsExp_genomeWide_Norm.csv",
         dtype={
             name: pairing_score.dtypes[name]
             for name in pairing_score.dtypes.index
         },
     )
     assert_frame_equal(pairing_score, expected)
Пример #2
0
 def test_no_collapse_real_data(self):
     positions = pd.read_csv("testFiles/testAssignRegions.csv")
     arms = HT.get_arms_hg19()
     cooler_file = cooler.Cooler(
         "testFiles/test3_realdata.mcool::resolutions/50000")
     expected = HT.get_expected(cooler_file, arms)
     result = HT.do_pileup_obs_exp(cooler_file,
                                   expected,
                                   positions,
                                   proc=1,
                                   collapse=False)
     expected = np.load(
         "testFiles/real_data_obsexp_pileup_not_collapsed.npy")
     self.assertTrue(np.allclose(result, expected, equal_nan=True))
Пример #3
0
 def test_synthetic_data(self):
     """Tests expected counts for synthetic Hi-C data.
     Known values were provided and expected counts for each diagonal
     calculated."""
     result = HT.get_expected(self.cooler,
                              self.arms,
                              proc=1,
                              ignore_diagonals=0)
     check = pd.read_csv("testFiles/test_expected_chrSyn.csv")
     # merge regions for new expected format
     check.loc[:, "region"] = check.apply(
         lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1)
     check_final = check.drop(
         columns=["chrom", "start", "end"])[result.columns]
     assert_frame_equal(result, check_final)
Пример #4
0
 def test_synthetic_data_mult_chroms(self):
     """Tests expected counts for synthetic Hi-C data
     with multiple chromosomal arms. Known values were
     provided and expected counts for each diagonal
     calculated."""
     arms = pd.DataFrame({
         "chrom": ["chrSyn", "chrSyn"],
         "start": [0, 2000000],
         "end": [2000000, 4990000],
     })
     result = HT.get_expected(self.cooler, arms, proc=1, ignore_diagonals=0)
     check = pd.read_csv("testFiles/test_expected_multiple_chroms.csv")
     # merge regions for new expected format
     check.loc[:, "region"] = check.apply(
         lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1)
     check_final = check.drop(
         columns=["chrom", "start", "end"])[result.columns]
     assert_frame_equal(result, check_final)
Пример #5
0
 def test_collapse(self):
     """Tests pileup of synthetic Hi-C data, with collapsing results."""
     position_frame = pd.read_csv("testFiles/posPileups.csv")
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     assigned = HT.assign_regions(50000, 10000, position_frame["chrom"],
                                  position_frame["pos"], arms)
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0)
     result = HT.do_pileup_obs_exp(cooler_file,
                                   exp_f,
                                   assigned,
                                   proc=1,
                                   collapse=True)
     expected = np.load("testFiles/test_pileups_obsExp_collapse.npy")
     self.assertTrue(np.allclose(result, expected))
Пример #6
0
 def test_expected_real_data(self):
     """Tests expected counts for real Hi-C data
     with multiple chromosomal arms. Known values were
     provided and expected counts for each diagonal
     calculated."""
     arms = HT.get_arms_hg19()
     cooler_file = cooler.Cooler(
         "testFiles/test3_realdata.mcool::/resolutions/50000")
     result = HT.get_expected(
         cooler_file, arms, proc=1,
         ignore_diagonals=0).drop(columns=["count.sum"])
     check = pd.read_csv("testFiles/test_expected_realdata.csv")
     # merge regions for new expected format
     check.loc[:, "region"] = check.apply(
         lambda x: f"{x['chrom']}:{x['start']}-{x['end']}", axis=1)
     check_final = (check.drop(
         columns=["chrom", "start", "end"])[result.columns].sort_values(
             by=["region", "diag"]).reset_index(drop=True))
     sorted_result = result.sort_values(by=["region", "diag"]).reset_index(
         drop=True)
     assert_frame_equal(sorted_result, check_final)
Пример #7
0
 def test_specific_regions(self):
     """Tests functionality to extract Obs/Exp values
     at specific regions from synthetic Hi-C data."""
     position_frame = pd.read_csv("testFiles/posPileups.csv")
     position_frame.loc[:, "mid"] = position_frame["pos"]
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0)
     pairing_score = HT.get_pairing_score_obs_exp(cooler_file,
                                                  exp_f,
                                                  50000,
                                                  regions=position_frame,
                                                  arms=arms,
                                                  norm=False)
     expected = pd.read_csv(
         "testFiles/test_pairingScore_obsExp_specificRegions.csv")
     assert_frame_equal(pairing_score, expected)
Пример #8
0
 def test_equal_sized_windows(self):
     """Test flexible pileup with equally sized windows (obs/exp)."""
     position_frame = pd.read_csv("testFiles/posPileupSymmetric.csv")
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     expected = HT.get_expected(cooler_file,
                                arms,
                                proc=2,
                                ignore_diagonals=0)
     result = HT.extract_windows_different_sizes_obs_exp(
         position_frame, arms, cooler_file, expected)
     # load expected extracted windows
     with open("testFiles/test_pilesup_symmetric_obs_exp.pickle",
               "rb") as file_pointer:
         expected = pickle.load(file_pointer)
     self.assertTrue(
         all(np.allclose(i, j) for i, j in zip(result, expected)))
Пример #9
0
 def test_wrong_parameters(self):
     """Tests raising of error when specific
     region pileup is done with the norm parameter set to True."""
     position_frame = pd.read_csv("testFiles/posPileups.csv")
     position_frame.loc[:, "mid"] = position_frame["pos"]
     arms = pd.DataFrame({
         "chrom": "chrSyn",
         "start": 0,
         "end": 4990000
     },
                         index=[0])
     cooler_file = cooler.Cooler(
         "testFiles/test2.mcool::/resolutions/10000")
     exp_f = HT.get_expected(cooler_file, arms, ignore_diagonals=0)
     bad_call = partial(
         HT.get_pairing_score_obs_exp,
         cooler_file,
         exp_f,
         50000,
         regions=position_frame,
         arms=arms,
         norm=True,
     )
     self.assertRaises(ValueError, bad_call)