def test_to_plot_name(self): assert parse_a_or_c_to_plot_name("hello") == "hello" assert parse_a_or_c_to_plot_name(Constant("shu", 5)) == "shu" assert parse_a_or_c_to_plot_name(PolyConstant(["shu", "sha"], [5, 10])) == "shu" assert (parse_a_or_c_to_plot_name((PolyConstant(["shu", "sha"], [5, 10]), 1)) == "sha") assert (parse_a_or_c_to_plot_name( (PolyConstant(["shu", "sha"], [5, 10]), "sha")) == "sha") with pytest.raises(KeyError): parse_a_or_c_to_plot_name((PolyConstant(["shu", "sha"], [5, 10]), "shi")) with pytest.raises(IndexError): parse_a_or_c_to_plot_name((PolyConstant(["shu", "sha"], [5, 10]), 5)) with pytest.raises(ValueError): parse_a_or_c_to_plot_name(5) with pytest.raises(ValueError): parse_a_or_c_to_plot_name((Constant("shu", 5), "shu", 3)) assert (parse_a_or_c_to_plot_name( PolyConstant(["shu", "sha"], [5, 10], "hello")) == "hello") assert (parse_a_or_c_to_plot_name( (PolyConstant(["shu", "sha"], [5, 10], "hello"), "sha")) == "hello") assert (parse_a_or_c_to_plot_name( (PolyConstant(["shu", "sha"], [5, 10], "hello"), 1)) == "hello")
def test_multi_plus_filter(self, clear_annotators): d = DelayedDataFrame( "ex1", pd.DataFrame({ "a1": [1 / 0.99, 2 / 0.99, 3 / 0.99], "a2": [1 * 0.99, 2 * 0.99, 3 * 0.99], "b1": [2 * 0.99, 8 * 0.99, (16 * 3) * 0.99], "b2": [2 / 0.99, 8 / 0.99, (16 * 3) / 0.99], "delta": [10, 20, 30], }), ) c = Comparisons(d, {"a": ["a1", "a2"], "b": ["b1", "b2"]}) a = c.a_vs_b("a", "b", Log2FC(), laplace_offset=0) anno1 = Constant("shu1", 5) anno2 = Constant("shu2", 5) # noqa: F841 anno3 = Constant("shu3", 5) # noqa: F841 to_test = [ (("log2FC", "==", -1.0), [-1.0]), (("log2FC", ">", -2.0), [-1.0]), (("log2FC", "<", -2.0), [-4.0]), (("log2FC", ">=", -2.0), [-1.0, -2.0]), (("log2FC", "<=", -2.0), [-2.0, -4.0]), (("log2FC", "|>", 2.0), [-4.0]), (("log2FC", "|<", 2.0), [-1.0]), (("log2FC", "|>=", 2.0), [-2.0, -4.0]), (("log2FC", "|<=", 2.0), [-1.0, -2.0]), ((a["log2FC"], "<", -2.0), [-4.0]), (("log2FC", "|", -2.0), ValueError), ([("log2FC", "|>=", 2.0), ("log2FC", "<=", 0)], [-2.0, -4.0]), ((anno1, ">=", 5), [-1, -2.0, -4.0]), (((anno1, 0), ">=", 5), [-1, -2.0, -4.0]), (("shu2", ">=", 5), [-1, -2.0, -4.0]), (("delta", ">", 10), [-2.0, -4.0]), ] if not ppg.inside_ppg(): # can't test for missing columns in ppg. to_test.extend([(("log2FC_no_such_column", "<", -2.0), KeyError)]) filtered = {} for ii, (f, r) in enumerate(to_test): if r in (ValueError, KeyError): with pytest.raises(r): a.filter([f], "new%i" % ii) else: filtered[tuple(f)] = a.filter( [f] if isinstance(f, tuple) else f, "new%i" % ii) assert filtered[tuple(f)].name == "new%i" % ii force_load(filtered[tuple(f)].annotate(), filtered[tuple(f)].name) force_load(d.add_annotator(a), "somethingsomethingjob") run_pipegraph() c = a["log2FC"] assert (d.df[c] == [-1.0, -2.0, -4.0]).all() for f, r in to_test: if r not in (ValueError, KeyError): try: assert filtered[tuple(f)].df[c].values == approx(r) except AssertionError: print(f) raise
def test_annos_same_column_different_anno(self): a = DelayedDataFrame( "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]}) ) c = Constant("hello", "c") a += c c = Constant("hello2", "c") a += c c = Constant("hello2", "d") with pytest.raises(ValueError): a += c
def test_simple_from_anno_plus_column_pos(self): d = DelayedDataFrame( "ex1", pd.DataFrame({ "a": [1, 2, 3], "b": [2, 8, 16 * 3] })) a = Constant("five", 5) b = Constant("ten", 10) c = Comparisons(d, {"a": [(a, 0)], "b": [(b, 0)]}) a = c.a_vs_b("a", "b", Log2FC(), laplace_offset=0) force_load(d.add_annotator(a), "fl1") run_pipegraph() assert (d.df[a["log2FC"]] == [-1, -1, -1]).all()
def test_annotators_are_kept_on_filtering(self): genome = MockGenome( pd.DataFrame([ { "stable_id": "fake1", "chr": "1", "strand": 1, "tss": 5000, "tes": 5500, "description": "bla", }, { "stable_id": "fake2", "chr": "1", "strand": -1, "tss": 5400, "tes": 4900, "description": "bla", }, { "stable_id": "fake3", "chr": "2", "strand": -1, "tss": 5400, "tes": 4900, "description": "bla", }, ])) g = genes.Genes(genome) ca = Constant("shu", 5) g.add_annotator(ca) filtered = g.filter("nogenes", lambda df: df["chr"] == "4") assert filtered.has_annotator(ca)
def test_random_same_number(self): def sample_data(): return pd.DataFrame({ "chr": ["1", "2", "1"], "start": [10, 100, 1000], "stop": [12, 110, 1110], "column_that_will_disappear": ["A", "b", "c"], }) def convert(df): res = df[["chr", "start", "stop"]] res = res.assign(start=res["start"] + 1) return res if ppg.inside_ppg(): deps = [ppg.ParameterInvariant("shuParam", ("hello"))] else: deps = [] a = regions.GenomicRegions("sharum", sample_data, [], get_genome_chr_length()) a.add_annotator(Constant("Constant", 5)) a.annotate() b = a.convert("a+1", convert, dependencies=deps) force_load(b.load()) for d in deps: assert d in b.load().lfg.prerequisites run_pipegraph() assert len(a.df) == len(b.df) assert (a.df["start"] == b.df["start"] - 1).all() assert "column_that_will_disappear" in a.df.columns assert not ("column_that_will_disappear" in b.df.columns)
def test_forbidden_cache_names(self): a = DelayedDataFrame( "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]}) ) c1 = Constant("c1*", "*") c2 = Constant("c2/", "*") c3 = Constant("c3?", "*") c4 = Constant("c4" * 100, "*") with pytest.raises(ValueError): a += c1 with pytest.raises(ValueError): a += c2 with pytest.raises(ValueError): a += c3 with pytest.raises(ValueError): a += c4
def test_filtering(self): class A(Annotator): cache_name = "A" columns = ["aa"] def calc(self, df): return pd.DataFrame({self.columns[0]: "a"}, index=df.index) class B(Annotator): cache_name = "B" columns = ["ab"] def calc(self, df): return df["aa"] + "b" def dep_annos(self): return [A()] a = DelayedDataFrame( "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]}) ) a += Constant("C", "c") assert "C" in a.df.columns b = a.filter("sha", lambda df: df["A"] == 1) assert "C" in b.df.columns a += A() assert "aa" in a.df.columns assert "aa" in b.df.columns b += B() assert "ab" in b.df.columns assert not "ab" in a.df.columns
def test_annotator(self): a = DelayedDataFrame( "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]}) ) a += Constant("column", "value") a.annotate() assert "column" in a.df.columns assert (a.df["column"] == "value").all()
def test_annotator_basic(self): a = DelayedDataFrame( "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]}) ) a += Constant("aa", "aa") force_load(a.annotate()) ppg.run_pipegraph() assert (a.df["aa"] == "aa").all()
def test_to_column(self): assert parse_a_or_c_to_column("hello") == "hello" assert parse_a_or_c_to_column(Constant("shu", 5)) == "shu" assert parse_a_or_c_to_column(PolyConstant(["shu", "sha"], [5, 10])) == "shu" assert (parse_a_or_c_to_column((PolyConstant(["shu", "sha"], [5, 10]), 1)) == "sha") assert (parse_a_or_c_to_column( (PolyConstant(["shu", "sha"], [5, 10]), "sha")) == "sha") assert parse_a_or_c_to_column((None, "shi")) == "shi" with pytest.raises(KeyError): parse_a_or_c_to_column((PolyConstant(["shu", "sha"], [5, 10]), "shi")) with pytest.raises(IndexError): parse_a_or_c_to_column((PolyConstant(["shu", "sha"], [5, 10]), 5)) with pytest.raises(ValueError): parse_a_or_c_to_column(5) with pytest.raises(ValueError): parse_a_or_c_to_column((Constant("shu", 5), "shu", 3))
def test_annotator_coliding_with_non_anno_column(self): a = DelayedDataFrame( "shu", lambda: pd.DataFrame( {"A": [1, 2, 3], "B": ["a", "b", "c"], "idx": ["x", "y", "z"]} ).set_index("idx"), ) a += Constant("A", "aa") lj = a.anno_jobs["A"] ppg.JobGeneratingJob("shu", lambda: 55).depends_on(a.annotate()) with pytest.raises(ppg.RuntimeError): ppg.run_pipegraph() assert "were already present" in str(lj().exception)
def test_to_anno(self): assert parse_a_or_c_to_anno("hello") is None assert parse_a_or_c_to_anno(Constant("shu", 5)) == Constant("shu", 5) assert parse_a_or_c_to_anno(PolyConstant( ["shu", "sha"], [5, 10])) == PolyConstant(["shu", "sha"], [5, 10]) assert parse_a_or_c_to_anno( (PolyConstant(["shu", "sha"], [5, 10]), 1)) == PolyConstant(["shu", "sha"], [5, 10]) assert parse_a_or_c_to_anno( (PolyConstant(["shu", "sha"], [5, 10]), "sha")) == PolyConstant(["shu", "sha"], [5, 10]) with pytest.raises(KeyError): parse_a_or_c_to_anno((PolyConstant(["shu", "sha"], [5, 10]), "shi")) with pytest.raises(IndexError): parse_a_or_c_to_anno((PolyConstant(["shu", "sha"], [5, 10]), 5)) with pytest.raises(ValueError): parse_a_or_c_to_anno(5) with pytest.raises(ValueError): parse_a_or_c_to_anno((Constant("shu", 5), "shu", 3))
def test_find_annos_from_column(self, both_ppg_and_no_ppg_no_qc, clear_annotators): a = Constant("shu", 5) assert find_annos_from_column("shu") == [a] assert find_annos_from_column("shu")[0] is a with pytest.raises(KeyError): find_annos_from_column("nosuchcolumn") b = PolyConstant(["shu"], [10]) assert find_annos_from_column("shu") == [a, b] if ppg.inside_ppg(): both_ppg_and_no_ppg_no_qc.new_pipegraph() with pytest.raises(KeyError): find_annos_from_column("shu")
def test_multi_level(self): a = DelayedDataFrame( "shu", lambda: pd.DataFrame( {"A": [1, 2, 3], "B": ["a", "b", "c"], "idx": ["x", "y", "z"]} ).set_index("idx"), ) b = a.filter("sha", lambda df: df["C"] == 4, Constant("C", 4)) a1 = LenAnno("count") b += a1 c = b.filter("shc", lambda df: df["A"] >= 2) a2 = LenAnno("count2") c += a2 c.write() ppg.run_pipegraph() assert len(c.df) == 2 assert (c.df["A"] == [2, 3]).all() assert (c.df["count"] == "count3").all() assert (c.df["count2"] == "count22").all()
def test_filtering_on_annotator(self): class A(Annotator): cache_name = "A" columns = ["aa"] def calc(self, df): return pd.DataFrame( {self.columns[0]: (["a", "b"] * int(len(df) / 2 + 1))[: len(df)]}, index=df.index, ) a = DelayedDataFrame( "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]}) ) with pytest.raises(KeyError): b = a.filter("sha", lambda df: df["aa"] == "a") b = a.filter("sha", lambda df: df["aa"] == "a", [A()]) canno = Constant("C", "c") a += canno b += canno assert (b.df["A"] == [1]).all()
def gen(): a.add_annotator(Constant("shu", 5))
def dep_annos(self): return [Constant("Nestingconst", 5), Nested()]
def dep_annos(self): return [Constant("Nestedconst", 5)]