Exemplo n.º 1
0
def test_inv():
    df = tibble(x=1, y=2)
    out = df >> select(~f.x)
    assert out.columns.tolist() == ["y"]

    df = tibble(x=True)
    out = df >> mutate(y=~f.x)
    assert out.y.tolist() == [False]
Exemplo n.º 2
0
def gene_name_conversion(
    genes,
    species,
    infmt,
    outfmt,
    notfound,
):
    """Convert gene names using MyGeneInfo

    Args:
        genes: A sequence of genes
        species: The species to limit the query
            Supported: human, mouse, rat, fruitfly, nematode, zebrafish,
            thale-cress, frog and pig

        infmt: What's the original gene name format
            Available fields
            https://docs.mygene.info/en/latest/doc/query_service.html#available-fields
        outfmt: What's the target gene name format
        notfound: What to do if a conversion cannot be done.
            use-query: Ignore the conversion and use the original name
            skip: Ignore the conversion and skip the entire row in input file
            error: Report error

    Returns:
        A dataframe with two columns, query and `outfmt`.
    """
    out = (mygene.querymany(
        genes,
        scopes=infmt,
        fields=outfmt,
        as_dataframe=True,
        df_index=False,
        species=species,
    ) >> group_by(f.query) >> arrange(desc(f._score)) >> slice_head(1) >>
           select(~c(f._id, f._score, f.notfound)))
    if isinstance(outfmt, str):
        outfmt = [of.strip() for of in outfmt.split(",")]
    out = tibble(query=genes) >> left_join(out, by=f.query)
    if notfound == "use-query":
        out = out >> mutate(
            across(
                outfmt,
                lambda col, query: if_else(is_na(col), query, col),
                query=f.query,
            ))
    elif notfound == "error" and any(is_na(out[outfmt[0]])):
        nagenes = out >> filter(is_na(f[outfmt[0]])) >> pull(f.query)
        raise QueryGenesNotFound(nagenes)
    elif notfound == "skip":
        out = out >> filter(~is_na(f[outfmt[0]]))

    return out
Exemplo n.º 3
0
class TruvariBench(TruvariBench):
    requires = DownloadList
    input_data = lambda ch: tibble(
        compvcf=expand_dir(ch, pattern="input*.vcf.gz"),
        basevcf=expand_dir(ch, pattern="multi*.vcf.gz").iloc[0, 0],
    )
    envs = {
        "ref":
        str(
            Path(__file__).parent.parent.parent / "data" / "reference" /
            "hg19" / "chrs.fa"),
    }
Exemplo n.º 4
0
def test_and_or():
    df = tibble(x=1, y=2, z=3, w=4)
    out = df >> select(c(f.x, f.y) & c(f.y, f.z))
    assert out.columns.tolist() == ["y"]

    out = df >> mutate(a=f.x & f.y)
    assert out.a.tolist() == [True]

    out = df >> mutate(a=True & f.y)
    assert out.a.tolist() == [True]

    out = df >> select(c(f.x, f.y) | c(f.y, f.z))
    assert out.columns.tolist() == ["x", "y", "z"]
Exemplo n.º 5
0
def test_glimpse_html_df():
    df = tibble(x=f[:20], y=[str(i) for i in range(20)])
    g = glimpse(df, 100)

    out = g._repr_html_()
    assert "<table>" in out
Exemplo n.º 6
0
def test_glimpse_str_gf():
    df = tibble(x=f[:10], y=[str(i) for i in range(10)]) >> group_by(f.y)
    out = repr(glimpse(df))
    assert "Groups: y [10]" in out
Exemplo n.º 7
0
def test_glimpse_str_nest_df():
    df = tibble(x=f[:10], y=f[10:20]) >> nest(data=~f.x)
    out = str(glimpse(df))
    assert "Rows: 10" in out
    assert "Columns: 2" in out
    assert "<DF 1x1>, <DF 1x1>" in out
Exemplo n.º 8
0
def test_glimpse_str_df():
    df = tibble(x=f[:10], y=[str(i) for i in range(10)])
    out = str(glimpse(df))
    assert "Rows: 10" in out
    assert "Columns: 2" in out
    assert "0, 1, 2" in out
Exemplo n.º 9
0
from datar.all import tibble, flatten
from pipen import Proc
from biopipen.ns.web import Download
from biopipen.ns.plot import Heatmap
from biopipen.core.testing import get_pipeline

Heatmap = Proc.from_proc(
    Heatmap,
    requires=Download,
    input_data=lambda ch: tibble(infile=ch, annofiles=[flatten(ch)]),
    envs={
        "globals": "data = head(data, 100)",
        "args": {
            "right_annotation":
            """r:rowAnnotation(
                Boxplot = anno_boxplot(as.matrix(head(annos, 100)), outline = F)
            )"""
        }
    })


def pipeline():
    return get_pipeline(__file__).set_starts(Download).set_data([
        "https://www.ncbi.nlm.nih.gov/geo/download/"
        "?acc=GSE179367"
        "&format=file"
        "&file=GSE179367%5Fgene%5Fcount%2Ereal%2Etxt%2Egz",
    ])


def testing(pipen):
Exemplo n.º 10
0
def test_or_():
    df = tibble(x=1, y=2, z=3)
    out = df >> select(c(f.x, f.y) | [f.y, f.z])
    assert_frame_equal(out, tibble(x=1, y=2, z=3))
Exemplo n.º 11
0
def test_right_recycle_to_left():
    df = tibble(x=[True, False])
    out = mutate(df, y=f.x | True)
    assert_frame_equal(out, tibble(x=[True, False], y=[True, True]))
Exemplo n.º 12
0
def test_rowwise_gets_rowwise():
    df = tibble(x=[1, 2, 3], y=[4, 5, 6]).rowwise()
    out = mutate(df, z=1 + f.y, w=-f.x, t=+f.y)
    assert out.z.is_rowwise
    assert out.w.is_rowwise
Exemplo n.º 13
0
def test_op_getattr():
    df = tibble(x=[1, 2], y=[1, -3])
    out = mutate(df, z=(f.x * f.y).abs())
    assert_frame_equal(out, tibble(x=[1, 2], y=[1, -3], z=[1, 6]))
Exemplo n.º 14
0
def test_undefined_op():
    df = tibble(x=[1, 2], y=[1, 3])
    out = mutate(df, z=f.x * f.y)
    assert_frame_equal(out, tibble(x=[1, 2], y=[1, 3], z=[1, 6]))
Exemplo n.º 15
0
def test_ne():
    df = tibble(x=[1, 2], y=[1, 3])
    out = mutate(df, z=f.x != f.y)
    assert_frame_equal(out, tibble(x=[1, 2], y=[1, 3], z=[False, True]))
    out = mutate(df, z=f.x.size != f.y.size)
    assert_frame_equal(out, tibble(x=[1, 2], y=[1, 3], z=[False, False]))
Exemplo n.º 16
0
def test_apply_dtypes():
    df = tibble(x=[1.0, 2.0])
    apply_dtypes(df, True)
    assert is_integer(df.x)
Exemplo n.º 17
0
def test_neg():
    df = tibble(x=1, y=2)
    out = df >> select(-f[:1])
    assert out.columns.tolist() == ["y"]
Exemplo n.º 18
0
from pipen import Proc
from biopipen.ns.web import Download
from biopipen.ns.misc import Str2File
from biopipen.ns.gsea import FGSEA
from datar.all import flatten, tibble, select
from biopipen.core.testing import get_pipeline


FGSEA = Proc.from_proc(
    FGSEA,
    requires=[Download, Str2File],
    input_data=lambda ch1, ch2: tibble(
        *flatten(ch1),
        ch2,
        _name_repair="minimal",
    ) >> select(0, 2, 1),
    envs={"clscol": "Group", "classes": ["MMP9", "CTRL"]}
)


def pipeline():
    return get_pipeline(__file__).set_starts(Download, Str2File).set_data(
        [
            "https://www.ncbi.nlm.nih.gov/geo/download/"
            "?acc=GSE179367"
            "&format=file"
            "&file=GSE179367%5Fgene%5Fcount%2Ereal%2Etxt%2Egz",
            "https://www.genepattern.org/tutorial/linkedFiles/"
            "export_gnf.GENE_SYMBOL.gmt",
        ],
        [(
Exemplo n.º 19
0
def test_neg():
    df = tibble(x=[1, 2])
    out = df >> mutate(y=-f.x)
    assert_frame_equal(out, tibble(x=[1, 2], y=[-1, -2]))