def register_qc_biotypes(self): output_filename = self.result_dir / f"{self.name}_reads_per_biotype.png" from mbf_genomics.genes import Genes from mbf_genomics.genes.anno_tag_counts import GeneUnstranded genes = Genes(self.genome) anno = GeneUnstranded(self) def plot(output_filename): print(genes.df.columns) return (dp(genes.df).groupby("biotype").summarize( (anno.columns[0], lambda x: x.sum(), "read count")).mutate(sample=self.name).p9( ).theme_bw().annotation_stripes().add_bar( "biotype", "read count", stat="identity").scale_y_continuous( labels=lambda xs: ["%.2g" % x for x in xs]) # .turn_x_axis_labels() .coord_flip().title(self.name).render( output_filename, width=6, height=2 + len(genes.df.biotype.unique()) * 0.25, )) return register_qc( ppg.FileGeneratingJob(output_filename, plot).depends_on(genes.add_annotator(anno)))
def test_pruning_plotjob(self, new_pipegraph): jobA = register_qc(ppg.PlotJob("c.png", lambda: None, lambda: None)) assert not jobA._pruned prune_qc() assert jobA._pruned assert jobA.cache_job._pruned assert jobA.table_job._pruned
def test_registration_and_pruning(self, new_pipegraph): with pytest.raises(TypeError): register_qc("shu") jobA = ppg.FileGeneratingJob("a", lambda: Path("a").write_text("hello")) register_qc(jobA) print(list(get_qc_jobs())) assert jobA in list(get_qc_jobs()) assert not jobA._pruned jobc = register_qc( ppg.FileGeneratingJob("c", lambda: Path("b").write_text("hello"))) def check_prune(job): return job.job_id.lower()[-1] == "c" prune_qc(check_prune) assert jobc in list(get_qc_jobs()) assert not jobc._pruned jobB = register_qc( ppg.FileGeneratingJob("b", lambda: Path("b").write_text("hello"))) assert jobB in list(get_qc_jobs()) assert jobB._pruned jobC = register_qc( ppg.FileGeneratingJob("C", lambda: Path("b").write_text("hello"))) assert not jobC._pruned assert len(list(get_qc_jobs())) == 4 prune_qc() assert jobA._pruned assert jobB._pruned assert jobc._pruned assert jobC._pruned for j in get_qc_jobs(): assert j._pruned
def register_qc_complexity(self): output_filename = self.result_dir / f"{self.name}_complexity.png" def calc(): import mbf_bam counts = mbf_bam.calculate_duplicate_distribution( str(self.bam_filename), str(self.index_filename) ) return pd.DataFrame( { "source": self.name, "Repetition count": list(counts.keys()), "Count": list(counts.values()), } ) def plot(df): import numpy as np unique_count = df["Count"].sum() total_count = (df["Count"] * df["Repetition count"]).sum() pcb = float(unique_count) / total_count if pcb >= 0.9: # pragma: no cover severity = "none" elif pcb >= 0.8: # pragma: no cover severity = "mild" elif pcb >= 0.5: # pragma: no cover severity = "moderate" else: severity = "severe" title = ( "Genomic positions with repetition count reads\nTotal read count: %i\nPCR Bottleneck coefficient: %.2f (%s)" % (total_count, pcb, severity) ) return ( dp(df) .p9() .theme_bw() .add_point("Repetition count", "Count") .add_line("Repetition count", "Count") .scale_y_continuous( trans="log2", breaks=[2 ** x for x in range(1, 24)], labels=lambda x: ["2^%0.f" % np.log(xs) for xs in x], ) .title(title) .pd ) return register_qc( ppg.PlotJob(output_filename, calc, plot) .depends_on(self.load()) .use_cores(-1) )
def register_qc_splicing(self): """How many reads were spliced? How many of those splices were known splice sites, how many were novel""" output_filename = self.result_dir / f"{self.name}_splice_sites.png" def calc(): from mbf_bam import count_introns bam_filename, bam_index_name = self.get_bam_names() counts_per_chromosome = count_introns(bam_filename, bam_index_name) known_splice_sites_by_chr = { chr: set() for chr in self.genome.get_chromosome_lengths() } for gene in self.genome.genes.values(): for start, stop in zip(*gene.introns_all): known_splice_sites_by_chr[gene.chr].add((start, stop)) total_counts = collections.Counter() known_count = 0 unknown_count = 0 for chr, counts in counts_per_chromosome.items(): for k, v in counts.items(): if k[0] == 0xFFFFFFFF: intron_counts = 0xFFFFFFFF - k[1] total_counts[intron_counts] += v else: if k in known_splice_sites_by_chr[chr]: known_count += v else: unknown_count += v result = {"side": [], "x": [], "count": []} result["side"].append("splice sites") result["x"].append("unknown") result["count"].append(unknown_count) result["side"].append("splice sites") result["x"].append("known") result["count"].append(known_count) for x, count in total_counts.items(): result["side"].append("reads with x splices") result["x"].append(x) result["count"].append(count) return pd.DataFrame(result) def plot(df): return (dp(df).p9().theme_bw().add_bar( "x", "count", stat="identity").facet_wrap( "side", scales="free", ncol=1).scale_y_continuous( labels=lambda xs: ["%.2g" % x for x in xs]).title( self.name).theme( panel_spacing_y=0.2).render(output_filename)) return register_qc( ppg.PlotJob(output_filename, calc, plot).depends_on(self.load()).use_cores(-1))
def register_qc_pca(self): output_filename = self.result_dir / "pca.png" def plot(): import sklearn.decomposition as decom pca = decom.PCA(n_components=2, whiten=False) data = self.get_df() # min max scaling 0..1 per gene data = data.sub(data.min(axis=1), axis=0) data = data.div(data.max(axis=1), axis=0) data = data[~pd.isnull(data).any(axis=1)] # can' do pca on NAN values pca.fit(data.T) xy = pca.transform(data.T) title = "PCA %s (%s)\nExplained variance: x %.2f%%, y %.2f%%" % ( self.ddf.name, self.find_variable_name(), pca.explained_variance_ratio_[0] * 100, pca.explained_variance_ratio_[1] * 100, ) plot_df = pd.DataFrame( { "x": xy[:, 0], "y": xy[:, 1], "label": [self.get_plot_name(c) for (a, c) in self.samples], "group": [ self.sample_column_to_group[c] for (a, c) in self.samples ], } ) p = dp(plot_df).p9().theme_bw().add_scatter("x", "y", color="group") if data.shape[1] < 15: p = p.add_text( "x", "y", "label", _alpha=0.5, # _adjust_text={ # "expand_points": (2, 2), # "arrowprops": {"arrowstyle": "->", "color": "darkgrey"}, # }, ) p = ( p.scale_color_many_categories() .title(title) .render(output_filename, width=8, height=6, dpi=72) ) plot_df.to_csv(output_filename.with_suffix(".tsv"), sep="\t") return register_qc( ppg.MultiFileGeneratingJob( [output_filename, output_filename.with_suffix(".tsv")], plot ).depends_on(self.deps()) )
def register_qc_correlation(self): output_filename = self.result_dir / "pearson_correlation.png" def plot(output_filename): data = self.get_df() data = data.sub(data.min(axis=1), axis=0) data = data.div(data.max(axis=1), axis=0) # data -= data.min() # min max scaling 0..1 per gene # data /= data.max() data = data[ ~pd.isnull(data).any(axis=1) ] # can' do correlation on NAN values sample_names = [self.get_plot_name(x) for x in data.columns] sample_groups = [self.sample_column_to_group[x] for x in data.columns] data.columns = sample_names order_pdf = pd.DataFrame( {"sample": sample_names, "group": sample_groups} ).sort_values(["group", "sample"]) ordered_names = ["group"] + list(order_pdf["sample"]) sample_count = data.shape[1] pdf = ( data.corr().transpose().assign(group=0).transpose() ) # value doesn't matter, this just reserves space on the plot pdf = pd.melt(pdf.reset_index(), "index") ( dp(pdf) .categorize("index", ordered_names) .categorize("variable", ordered_names) .p9() .add_tile("index", "variable", fill="value") .scale_fill_gradient2( "blue", "white", "red", limits=[-1, 1], midpoint=0 ) .add_scatter( _x=1, y="sample", color="group", _shape="s", data=order_pdf, _size=3 ) .scale_color_many_categories() .hide_x_axis_title() .hide_y_axis_title() .turn_x_axis_labels() .render( output_filename, width=1 + 0.15 * sample_count, height=0.15 * sample_count, ) ) return register_qc( ppg.FileGeneratingJob(output_filename, plot).depends_on(self.deps()) )
def register_qc_distribution(self, genes): output_filename = genes.result_dir / self.qc_folder / "read_distribution.png" output_filename.parent.mkdir(exist_ok=True) def plot(output_filename, elements): df = genes.df df = dp(df).select( {x.aligned_lane.name: x.columns[0] for x in elements}).pd if len(df) == 0: df = pd.DataFrame({"x": [0], "y": [0], "text": "no data"}) dp(df).p9().add_text("x", "y", "text").render(output_filename).pd else: plot_df = dp(df).melt(var_name="sample", value_name="count").pd plot = dp(plot_df).p9().theme_bw() print(df) # df.to_pickle(output_filename + '.pickle') if ((df > 0).sum(axis=0) > 1).any() and len(df) > 1: # plot = plot.geom_violin( # dp.aes(x="sample", y="count"), width=0.5, bw=0.1 # ) pass # oh so slow as of 20201019 if len(plot_df["sample"].unique()) > 1: plot = plot.annotation_stripes(fill_range=True) if (plot_df["count"] > 0).any(): # can't have a log boxplot with all nans (log(0)) plot = plot.scale_y_continuous( trans="log10", name=self.qc_distribution_scale_y_name, breaks=[1, 10, 100, 1000, 10000, 100_000, 1e6, 1e7], ) return (plot.add_boxplot( x="sample", y="count", _width=0.1, _fill=None, _color="blue").turn_x_axis_labels().title( "Raw read distribution").hide_x_axis_title(). render_args(limitsize=False).render( output_filename, width=0.2 * len(elements) + 1, height=4)) return register_qc( QCCollectingJob(output_filename, plot).depends_on( genes.add_annotator(self)).add(self))
def register_qc_pca(self, genes): output_filename = genes.result_dir / self.qc_folder / f"pca.png" def plot(output_filename, elements): import sklearn.decomposition as decom if len(elements) == 1: xy = np.array([[0], [0]]).transpose() title = "PCA %s - fake / single sample" % genes.name else: pca = decom.PCA(n_components=2, whiten=False) data = genes.df[[x.columns[0] for x in elements]] data -= data.min() # min max scaling 0..1 data /= data.max() data = data[~pd.isnull(data).any( axis=1)] # can' do pca on NAN values if len(data): pca.fit(data.T) xy = pca.transform(data.T) title = "PCA %s\nExplained variance: x %.2f%%, y %.2f%%" % ( genes.name, pca.explained_variance_ratio_[0] * 100, pca.explained_variance_ratio_[1] * 100, ) else: xy = np.array([[0] * len(elements), [0] * len(elements)]).transpose() title = "PCA %s - fake / no rows" % genes.name plot_df = pd.DataFrame({ "x": xy[:, 0], "y": xy[:, 1], "label": [x.plot_name for x in elements] }) print(plot_df) (dp(plot_df).p9().theme_bw().add_scatter("x", "y").add_text( "x", "y", "label", # cool, this can go into an endless loop... # _adjust_text={ # "expand_points": (2, 2), # "arrowprops": {"arrowstyle": "->", "color": "red"}, # }, ).scale_color_many_categories().title(title).render( output_filename, width=8, height=6)) return register_qc( QCCollectingJob(output_filename, plot).depends_on( genes.add_annotator(self)).add(self))
def register_qc_volcano(self, genes, filtered=None, filter_func=None): """perform a volcano plot """ if filtered is None: output_filename = genes.result_dir / "volcano.png" else: output_filename = filtered.result_dir / "volcano.png" def plot(output_filename): df = (dp(genes.df).mutate(significant=filter_func(genes.df) if filter_func is not None else "tbd.").pd) no_sig_lower = (df["significant"] & (df[self["log2FC"]] < 0)).sum() no_sig_higher = (df["significant"] & (df[self["log2FC"]] > 0)).sum() (dp(df).p9().scale_color_many_categories( name="regulated", shift=3).scale_y_continuous( name="p", trans=dp.reverse_transform("log10"), labels=lambda xs: ["%.2g" % x for x in xs], ).add_vline(xintercept=1, _color="blue").add_vline( xintercept=-1, _color="blue").add_hline(yintercept=0.05, _color="blue"). add_rect( # shade 'simply' significant regions xmin="xmin", xmax="xmax", ymin="ymin", ymax="ymax", _fill="lightgrey", data=pd.DataFrame({ "xmin": [-np.inf, 1], "xmax": [-1, np.inf], "ymin": [0, 0], "ymax": [0.05, 0.05], }), _alpha=0.8, ).add_scatter( self["log2FC"], self["p"], color="significant").title( f"# regulated down/ up: {no_sig_lower} / {no_sig_higher}") # .coord_trans(x="reverse", y="reverse") #broken as of 2019-01-31 .render(output_filename, width=8, height=6, dpi=300)) return register_qc( ppg.FileGeneratingJob(output_filename, plot).depends_on( genes.add_annotator(self), ppg.FunctionInvariant( str(output_filename) + "_filter_func", filter_func), ))
def register_qc_fastqc(self): from mbf_externals import FASTQC from mbf_qualitycontrol import register_qc a = FASTQC() output_dir = self.result_dir / "FASTQC" temp_job = self.prepare_input() if hasattr(temp_job, 'filenames'): filenames = temp_job.filenames else: filenames = [] for j in temp_job: # is actually joblist filenames.extend(j.filenames) job = a.run(output_dir, filenames) return register_qc(job.depends_on(temp_job))
def register_qc(self, new_lane): """Plot for to see how much you lost. """ output_filename = ( new_lane.result_dir / ".." / "alignment_substract.png" ).resolve() print(output_filename) def calc_and_plot(output_filename, lanes): parts = [] for l in lanes: was = l.parent.mapped_reads() now = l.mapped_reads() lost = was - now parts.append( pd.DataFrame( { "what": ["kept", "lost"], "count": [now, lost], "sample": l.name, } ) ) df = pd.concat(parts) return ( dp(df) .categorize("what", ["lost", "kept"]) .p9() .theme_bw() .annotation_stripes() .add_bar( "sample", "count", fill="what", position="stack", stat="identity" ) .title(lanes[0].genome.name + " substraction") .turn_x_axis_labels() .scale_y_continuous(labels=lambda xs: ["%.2g" % x for x in xs]) .render_args(width=len(parts) * 0.2 + 1, height=5) .render(output_filename) ) return register_qc( QCCollectingJob(output_filename, calc_and_plot) .depends_on(new_lane.load()) .add(new_lane) ) # since everybody says self.load, we get them all
def register_qc_alignment_stats(self): output_filename = self.result_dir / ".." / "alignment_statistics.png" def calc_and_plot(output_filename, lanes): parts = [] for lane in lanes: p = lane.get_alignment_stats() parts.append( pd.DataFrame( { "what": list(p.keys()), "count": list(p.values()), "sample": lane.name, } ) ) df = pd.concat(parts) order = sorted(df["what"].unique()) umrn = "Uniquely mapped reads number" if umrn in order: order = [x for x in order if x != umrn] + [umrn] return ( dp(df) .categorize("what", order) .p9() .theme_bw() .annotation_stripes() .add_bar( "sample", "count", fill="what", position="stack", stat="identity" ) .title(lanes[0].genome.name) .turn_x_axis_labels() .scale_y_continuous(labels=lambda xs: ["%.2g" % x for x in xs]) .render_args(width=len(parts) * 0.2 + 1, height=5, limitsize=False) .render(output_filename) ) return register_qc( QCCollectingJob(output_filename, calc_and_plot) .depends_on(self.load()) .add(self) ) # since everybody says self.load, we get them all
def register_qc_distribution(self): output_filename = self.result_dir / "distribution.png" def plot(output_filename): df = self.get_df() sample_count = df.shape[1] sample_names = [self.get_plot_name(x) for x in df.columns] sample_groups = [self.sample_column_to_group[x] for x in df.columns] df.columns = pd.MultiIndex.from_tuples( zip(sample_names, sample_groups), names=("sample", "group") ) order = [ x[0] for x in sorted(zip(sample_names, sample_groups), key=lambda v: v[1]) ] return ( dp(df) .melt(value_name="y") .categorize("sample", order) .p9() .theme_bw() .annotation_stripes() .geom_violin(dp.aes("sample", "y"), width=0.5) .add_boxplot(x="sample", y="y", _width=0.1, _fill=None, color="group") .scale_color_many_categories() .scale_y_continuous(trans="log10", name=self.find_variable_name()) .turn_x_axis_labels() .hide_x_axis_title() .render( output_filename, height=5, width=1 + 0.25 * sample_count, limitsize=False, ) ) return register_qc( ppg.FileGeneratingJob(output_filename, plot).depends_on(self.deps()) )
def register_qc_subchromosomal(self): """Subchromosom distribution plot - good to detect amplified regions or ancient virus awakening""" import mbf_genomics output_filename = (self.result_dir / f"{self.name}_subchromosomal_distribution.png") class IntervalStrategyWindows( mbf_genomics.genes.anno_tag_counts._IntervalStrategy): """For QC purposes, spawn all chromosomes with windows of the definied size See mbf_align.lanes.AlignedLane.register_qc_subchromosomal """ def __init__(self, window_size): self.window_size = window_size def _get_interval_tuples_by_chr(self, genome): result = {} for chr, length in genome.get_chromosome_lengths().items(): result[chr] = [] for ii in range(0, length, self.window_size): result[chr].append(("%s_%i" % (chr, ii), 0, [ii], [ii + self.window_size])) return result def calc(): from mbf_bam import count_reads_unstranded interval_strategy = IntervalStrategyWindows(250_000) intervals = interval_strategy._get_interval_tuples_by_chr( self.genome) bam_filename, bam_index_name = self.get_bam_names() counts = count_reads_unstranded( bam_filename, bam_index_name, intervals, intervals, each_read_counts_once=True, ) true_chromosomes = set(self.genome.get_true_chromosomes()) result = {"chr": [], "window": [], "count": []} for key, count in counts.items(): if not key.startswith("_"): # must handle both 2R_1234 # and Unmapped_scaffold_29_D1705_1234 *c, window = key.split("_") chr = "_".join(c) if chr in true_chromosomes: # pragma: no branch window = int(window) result["chr"].append(chr) result["window"].append(window) result["count"].append(count) return pd.DataFrame(result) def plot(df): import natsort df["count"] += 1 # so we don't crash in the log scale if all values are 0 for a chr return (dp(df).categorize( "chr", natsort.natsorted(X["chr"].unique())).p9().theme_bw().add_line( "window", "count", _alpha=0.3).scale_y_log10().facet_wrap( "chr", scales="free", ncol=1).hide_x_axis_labels().title( self.name).render_args(width=6, height=2 + len(df["chr"].unique()) * 1, limitsize=False).pd) return register_qc( ppg.PlotJob(output_filename, calc, plot).depends_on(self.load()).use_cores(-1))
def register_qc_gene_strandedness(self): # noqa: C901 from mbf_genomics.genes.anno_tag_counts import _IntervalStrategy class IntervalStrategyExonIntronClassification(_IntervalStrategy): """For QC purposes, defines all intron/exon intervals tagged with nothing but intron/exon See mbf_align.lanes.AlignedLane.register_qc_gene_strandedness """ def _get_interval_tuples_by_chr(self, genome): from mbf_nested_intervals import IntervalSet coll = {chr: [] for chr in genome.get_chromosome_lengths()} for g in genome.genes.values(): exons = g.exons_overlapping if len(exons[0]) == 0: # pragma: no cover exons = g.exons_merged for start, stop in zip(*exons): coll[g.chr].append( (start, stop, 0b0101 if g.strand == 1 else 0b0110)) for start, stop in zip(*g.introns_strict): coll[g.chr].append( (start, stop, 0b1001 if g.strand == 1 else 0b1010)) result = {} for chr, tups in coll.items(): iset = IntervalSet.from_tuples_with_id(tups) # iset = iset.merge_split() iset = iset.merge_hull() if iset.any_overlapping(): raise NotImplementedError("Should not be reached") result[chr] = [] for start, stop, ids in iset.to_tuples_with_id(): ids = set(ids) if len(ids) == 1: id = list(ids)[0] if id == 0b0101: tag = "exon" strand = +1 elif id == 0b0110: tag = "exon" strand = -1 elif id == 0b1001: tag = "intron" strand = +1 elif id == 0b1010: tag = "intron" strand = -1 else: # pragma: no cover raise NotImplementedError( "Should not be reached") else: down = 0 for i in ids: down |= i if down & 0b1100 == 0b1100: tag = "both" elif down & 0b0100 == 0b0100: tag = "exon" else: # pragma: no cover haven't observed this case in the wild yet. tag = ( # pragma: no cover "intron" # pragma: no cover ) # pragma: no cover haven't observed this case in the wild yet. if down & 0b11 == 0b11: tag += "_undecidable" strand = ( 1 ) # doesn't matter, but must be one or the other elif down & 0b01: strand = 1 else: strand -= 1 result[chr].append((tag, strand, [start], [stop])) return result output_filename = self.result_dir / f"{self.name}_strandedness.png" def calc(): from mbf_genomics.genes.anno_tag_counts import IntervalStrategyGene from mbf_bam import count_reads_stranded interval_strategy = IntervalStrategyExonIntronClassification() intervals = interval_strategy._get_interval_tuples_by_chr( self.genome) bam_filename, bam_index_name = self.get_bam_names() forward, reverse = count_reads_stranded( bam_filename, bam_index_name, intervals, IntervalStrategyGene()._get_interval_tuples_by_chr( self.genome), each_read_counts_once=True, ) result = {"what": [], "count": [], "sample": self.name} for k in forward.keys() | reverse.keys(): if k.endswith("_undecidable"): result["what"].append(k) result["count"].append( forward.get(k, 0) + reverse.get(k, 0)) elif not k.startswith("_"): result["what"].append(k + "_correct") result["count"].append(forward.get(k, 0)) result["what"].append(k + "_reversed") result["count"].append(reverse.get(k, 0)) elif k == "_outside": result["what"].append("outside") result["count"].append(forward.get(k, 0)) return pd.DataFrame(result) def plot(df): return (dp(df).mutate(what=pd.Categorical( df["what"], [ "exon_correct", "exon_reversed", "exon_undecidable", "intron_correct", "intron_reversed", "intron_undecidable", "both_correct", "both_reversed", "both_undecidable", "outside", ], )).p9().add_bar( "sample", "count", fill="what", position="dodge").scale_y_continuous( labels=lambda xs: ["%.2g" % x for x in xs]).turn_x_axis_labels().pd) return register_qc( ppg.PlotJob(output_filename, calc, plot).depends_on(self.load()).use_cores(-1))
def register_qc_ma_plot(self, genes, filtered, filter_func): """perform an MA plot - not a straight annotator.register_qc function, but called by .filter """ output_filename = filtered.result_dir / "ma_plot.png" def plot(output_filename): from statsmodels.nonparametric.smoothers_lowess import lowess print(genes.df.columns) print(list(self.sample_columns(self.comp[0]))) print(list(self.sample_columns(self.comp[1]))) df = genes.df[list(self.sample_columns(self.comp[0])) + list(self.sample_columns(self.comp[1]))] df = df.assign(significant=filter_func(genes.df)) pdf = [] loes_pdfs = [] # Todo: how many times can you over0lopt this? for a, b in itertools.combinations( [x for x in df.columns if not "significant" == x], 2): np_a = np.log2(df[a] + self.laplace_offset) np_b = np.log2(df[b] + self.laplace_offset) A = (np_a + np_b) / 2 M = np_a - np_b local_pdf = pd.DataFrame({ "A": A, "M": M, "a": self.comparisons.get_plot_name(a), "b": self.comparisons.get_plot_name(b), "significant": df["significant"], }).sort_values("M") chosen = np.zeros(len(local_pdf), bool) chosen[:500] = True chosen[-500:] = True chosen[np.random.randint(0, len(chosen), 1000)] = True pdf.append(local_pdf) fitted = lowess(M, A, is_sorted=False) loes_pdfs.append( pd.DataFrame({ "a": self.comparisons.get_plot_name(a), "b": self.comparisons.get_plot_name(b), "A": fitted[:, 0], "M": fitted[:, 1], })) pdf = pd.concat(pdf) pdf = pdf.assign( ab=[a + ":" + b for (a, b) in zip(pdf["a"], pdf["b"])]) loes_pdf = pd.concat(loes_pdfs) loes_pdf = loes_pdf.assign(ab=[ a + ":" + b for (a, b) in zip(loes_pdf["a"], loes_pdf["b"]) ]) (dp(pdf).p9().theme_bw(10).add_hline( yintercept=0, _color="lightblue").add_hline( yintercept=1, _color="lightblue").add_hline( yintercept=-1, _color="lightblue").scale_color_many_categories( name="significant", shift=3).add_point( "A", "M", color="significant", _size=1, _alpha=0.3).add_line("A", "M", _color="blue", data=loes_pdf). facet_wrap(["ab"]).title( f"MA {filtered.name}\n{self.comparisons.find_variable_name()}" ).render(output_filename, width=8, height=6)) return register_qc( ppg.FileGeneratingJob(output_filename, plot).depends_on( genes.add_annotator(self)).depends_on(self.comparisons.deps))