def test_volcano_plot(self): ppg.util.global_pipegraph.quiet = False import mbf_sampledata pasilla_data = pd.read_csv( mbf_sampledata.get_sample_path( "mbf_comparisons/pasillaCount_deseq2.tsv.gz"), sep=" ", ) # pasilla_data = pasilla_data.set_index('Gene') pasilla_data.columns = [str(x) for x in pasilla_data.columns] treated = [x for x in pasilla_data.columns if x.startswith("treated")] untreated = [ x for x in pasilla_data.columns if x.startswith("untreated") ] pasilla_data = DelayedDataFrame("pasilla", pasilla_data) comp = Comparisons(pasilla_data, { "treated": treated, "untreated": untreated }).a_vs_b("treated", "untreated", TTest()) comp.filter([("log2FC", "|>=", 2.0), ("FDR", "<=", 0.05)]) prune_qc(lambda job: "volcano" in job.job_id) run_pipegraph() qc_jobs = list(get_qc_jobs()) qc_jobs = [x for x in qc_jobs if not x._pruned] print(qc_jobs) assert len(qc_jobs) == 1 assert_image_equal(qc_jobs[0].filenames[0])
def test_subtraction_by_read(self): from mbf_sampledata import get_human_22_fake_genome genome = get_human_22_fake_genome() lane = mbf_align.AlignedSample( "test_lane", get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")), genome, False, "AA123", ) # index creation is automatic lane2 = mbf_align.AlignedSample( "test_lane2", get_sample_data(Path("mbf_align/rnaseq_spliced_chr22.bam")), genome, False, "AA124", ) # index creation is automatic lane3 = mbf_align.AlignedSample( "test_lane3", get_sample_data(Path("mbf_align/chipseq_chr22.bam")), genome, False, "AA123", ) # index creation is automatic lane3_subset = mbf_align.AlignedSample( "test_lane3_subset", get_sample_data(Path("mbf_align/chipseq_chr22_subset.bam")), genome, False, "AA123", ) # index creation is automatic lane_empty = lane.post_process( mbf_align.post_process.SubtractOtherLane(lane2), new_name="empty") lane_full = lane.post_process( mbf_align.post_process.SubtractOtherLane(lane3), new_name="full") lane_some = lane3.post_process( mbf_align.post_process.SubtractOtherLane(lane3_subset), result_dir="results/aligned/shu", ) qc_jobs = [ lane_some.post_processor_qc_jobs, lane_full.post_processor_qc_jobs ] prune_qc(lambda job: job in qc_jobs) ppg.run_pipegraph() assert Path(lane_empty.get_bam_names()[1]).exists() assert Path(lane_full.get_bam_names()[1]).exists() assert lane_empty.mapped_reads() == 0 assert lane_full.mapped_reads() == lane.mapped_reads() assert lane.mapped_reads() != 0 assert (lane_some.mapped_reads() == lane3.mapped_reads() - lane3_subset.mapped_reads()) assert lane3_subset.mapped_reads( ) # make sure there was something to subtract assert "shu" in lane_some.get_bam_names()[0] assert_image_equal(qc_jobs[0].filenames[0], "_result_dir") assert_image_equal(qc_jobs[0].filenames[0])
def test_assert_images_equal_inside_class(self): assert_image_equal( Path(__file__).parent / "base_images" / "test_qc" / "_" / "test_assert_images_equal.png") with pytest.raises(ValueError): assert_image_equal( Path(__file__).parent / "base_images" / "test_qc" / "_" / "test_assert_images_equal.png", "_b", )
def _test_qc_plots(self, filename, remaining_job_count, chdir="."): lane = self.prep_lane() prune_qc(lambda job: filename in job.job_id) not_pruned_count = sum([1 for x in get_qc_jobs() if not x._pruned]) assert not_pruned_count == remaining_job_count # plot cache, plot_table, plot ppg.run_pipegraph() if chdir == '..': fn = lane.result_dir / chdir / filename else: fn = lane.result_dir / chdir / f"{lane.name}_{filename}" assert_image_equal(fn, suffix="_" + filename)
def test_very_simple(self): df = pd.DataFrame({ "a1": [0, 1, 2], "a2": [0.5, 1.5, 2.5], "b1": [2, 1, 0], "b2": [2.5, 0.5, 1], }) ddf = DelayedDataFrame("test", df) of = "test.png" h = HeatmapPlot(ddf, df.columns, of, heatmap_norm.Unchanged(), heatmap_order.Unchanged()) run_pipegraph() assert_image_equal(h.output_filename)
def test_smooth(self, new_pipegraph_no_qc): genome = get_human_22_fake_genome() df = pd.DataFrame( [ { "chr": "chr22", "start": 36925 * 1000 - 1000, "stop": 36925 * 1000 + 1000, }, { "chr": "chr22", "start": 31485 * 1000 - 2000, "stop": 31485 * 1000 + 2000, }, {"chr": "chr22", "start": 41842 * 1000, "stop": (41842 * 1000) + 1}, ] ) plot_regions = mbf_genomics.regions.GenomicRegions( "testregions", lambda: df, [], genome ) lane1 = mbf_align.lanes.AlignedSample( "one", mbf_sampledata.get_sample_path("mbf_align/chipseq_chr22.bam"), genome, False, None, ) lane2 = mbf_align.lanes.AlignedSample( "two", mbf_sampledata.get_sample_path("mbf_align/chipseq_chr22.bam"), genome, False, None, ) h = mbf_heatmap.chipseq.Heatmap( plot_regions, [lane1, lane2], region_strategy=regions.RegionFromCenter(1000), smoothing_strategy=smooth.SmoothExtendedReads(), ) fn = "test.png" h.plot(fn, norm.AsIs(), order.FirstLaneSum()) ppg.run_pipegraph() assert_image_equal(fn)
def test_venn_from_logfcs(self): ppg.util.global_pipegraph.quiet = False d = DelayedDataFrame( "ex1", pd.DataFrame( { "gene_stable_id": ["A", "B", "C", "D", "E"], "a": [1, 1, 1, 1, 1], "b": [1, 2, 3, 4, 5], "c": [1, 1, 3, 0.5, 0.75], } ), ) comp = Comparisons(d, {"a": ["a"], "b": ["b"], "c": ["c"]}) a = comp.all_vs_b("a", Log2FC()) selected = {name: x.filter([("log2FC", "|>=", 1)]) for name, x in a.items()} plot_job = venn.plot_venn("test", selected) ppg.run_pipegraph() assert_image_equal(plot_job.filenames[0], "_down") assert_image_equal(plot_job.filenames[1], "_up")
def test_hierarchical_pearson(self): df = pd.DataFrame({ "a1": [0, 1, 2], "a2": [0.5, 1.5, 2.5], "b1": [2, 1, 0], "b2": [0.5, 0.5, 1], }) df = df.sample(200, replace=True, random_state=500) np.random.seed(500) df += np.random.normal(0, 1, df.shape) ddf = DelayedDataFrame("test", df) of = "test.png" h = HeatmapPlot( ddf, df.columns, of, heatmap_norm.Unchanged(), heatmap_order.HierarchicalPearson(), ) run_pipegraph() assert_image_equal(h.output_filename)
def test_ma_plot(self): ppg.util.global_pipegraph.quiet = False pasilla_data, treated, untreated = get_pasilla_data_subset() import numpy numpy.random.seed(500) comp = Comparisons(pasilla_data, { "treated": treated, "untreated": untreated }).a_vs_b("treated", "untreated", TTest(), laplace_offset=1) comp.filter([ ("log2FC", "|>=", 2.0), # ('FDR', '<=', 0.05), ]) prune_qc(lambda job: "ma_plot" in job.job_id) run_pipegraph() qc_jobs = list(get_qc_jobs()) qc_jobs = [x for x in qc_jobs if not x._pruned] assert len(qc_jobs) == 1 assert_image_equal(qc_jobs[0].filenames[0])
def test_simple(self, new_pipegraph_no_qc): genome = get_human_22_fake_genome() start = 17750239 df = pd.DataFrame( [ {"chr": "chr22", "start": start, "stop": start + 1000}, {"chr": "chr22", "start": start + 20000, "stop": start + 20000 + 1000}, {"chr": "chr22", "start": start + 30000, "stop": start + 30000 + 1000}, ] ) plot_regions = mbf_genomics.regions.GenomicRegions( "testregions", lambda: df, [], genome ) lane1 = mbf_align.lanes.AlignedSample( "one", mbf_sampledata.get_sample_path("mbf_align/chipseq_chr22.bam"), genome, False, None, ) lane2 = mbf_align.lanes.AlignedSample( "two", mbf_sampledata.get_sample_path("mbf_align/chipseq_chr22.bam"), genome, False, None, ) h = mbf_heatmap.chipseq.Heatmap( plot_regions, [lane1, lane2], region_strategy=regions.RegionAsIs(), smoothing_strategy=smooth.SmoothRaw(), ) fn = "test.png" h.plot(fn, norm.AsIs(), order.AsIs()) ppg.run_pipegraph() assert_image_equal(fn)
def test_correlation(self): ppg.util.global_pipegraph.quiet = False import mbf_sampledata pasilla_data = pd.read_csv( mbf_sampledata.get_sample_path( "mbf_comparisons/pasillaCount_deseq2.tsv.gz"), sep=" ", ) # pasilla_data = pasilla_data.set_index('Gene') pasilla_data.columns = [str(x) for x in pasilla_data.columns] treated = [x for x in pasilla_data.columns if x.startswith("treated")] untreated = [ x for x in pasilla_data.columns if x.startswith("untreated") ] pasilla_data = DelayedDataFrame("pasilla", pasilla_data) Comparisons(pasilla_data, {"treated": treated, "untreated": untreated}) prune_qc(lambda job: "correlation" in job.job_id) run_pipegraph() qc_jobs = list(get_qc_jobs()) qc_jobs = [x for x in qc_jobs if not x._pruned] print(qc_jobs) assert len(qc_jobs) == 1 assert_image_equal(qc_jobs[0].filenames[0])
def test_assert_images_equal(): assert_image_equal( Path(__file__).parent / "base_images" / "test_qc" / "_" / "test_assert_images_equal.png") with pytest.raises( ValueError) as e: # here the baseline image does not exist assert_image_equal( Path(__file__).parent / "base_images" / "test_qc" / "_" / "test_assert_images_equal.png", "_b", ) # should_path overwrites suffix assert_image_equal( Path(__file__).parent / "base_images" / "test_qc" / "_" / "test_assert_images_equal.png", "_b", should_path=Path(__file__).parent / "base_images" / "test_qc" / "_" / "test_assert_images_equal.png", ) assert "Base_line image not found" in str(e.value) with pytest.raises(ValueError) as e: # here it is different assert_image_equal( Path(__file__).parent / "base_images" / "test_qc" / "_" / "test_assert_images_equal.png", suffix="_c", ) assert "Image files did not match" in str(e.value) with pytest.raises(IOError) as e: assert_image_equal("does not exist") assert "not created" in str(e.value) # with pytest.raises(ValueError) as e: #here it is different with pytest.raises(ValueError) as e: # here it is different assert_image_equal( Path(__file__).parent / "base_images" / "test_qc" / "_" / "test_assert_images_equal.png", suffix="_d", ) assert "do not match expected size" in str(e.value)