Пример #1
0
    def test_nested_anno_dependencies(self):
        class Nested(Annotator):
            columns = ["b"]

            def calc(self, df):
                return pd.Series([10] * len(df))

            def dep_annos(self):
                return [Constant("Nestedconst", 5)]

        class Nesting(Annotator):
            columns = ["a"]

            def calc(self, df):
                return pd.Series([15] * len(df))

            def dep_annos(self):
                return [Constant("Nestingconst", 5), Nested()]

        anno = Nesting()
        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]})
        )
        a += anno
        a.write()
        ppg.run_pipegraph()
        assert (a.df["a"] == 15).all()
        assert (a.df["b"] == 10).all()
        assert (a.df["Nestedconst"] == 5).all()
        assert (a.df["Nestingconst"] == 5).all()
Пример #2
0
    def test_write_mixed_manglers(self):
        test_df = pd.DataFrame({"A": [1, 2]})

        def load():
            return test_df

        a = DelayedDataFrame("shu", load)
        a.write(mangler_function=lambda df: df)

        def b(df):
            return df.head()

        with pytest.raises(ppg.JobContractError):
            a.write(mangler_function=b)
Пример #3
0
    def test_write(self):
        test_df = pd.DataFrame({"A": [1, 2]})

        def load():
            return test_df

        a = DelayedDataFrame("shu", load)
        fn = a.write()[0]
        ppg.run_pipegraph()
        assert Path(fn.filenames[0]).exists()
        assert_frame_equal(pd.read_csv(fn.filenames[0], sep="\t"), test_df)
Пример #4
0
    def test_filtering_on_annotator_missing(self):
        class A(Annotator):
            cache_name = "A"
            columns = ["aa"]

            def calc(self, df):
                return pd.DataFrame(
                    {self.columns[0]: (["a", "b"] * int(len(df) / 2 + 1))[: len(df)]},
                    index=df.index,
                )

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})
        )
        b = a.filter("sha", lambda df: df["aaA"] == "a")
        load_job = b.load()
        a.write()
        print("run now")
        with pytest.raises(ppg.RuntimeError):
            ppg.run_pipegraph()
        assert "KeyError" in repr(load_job.lfg.exception)
Пример #5
0
    def test_annotator_depending_on_actual_jobs(self):
        def wf():
            Path("fileA").write_text("hello")

        class TestAnno(Annotator):
            columns = ["C"]

            def calc(self, df):
                prefix = Path("fileA").read_text()
                return pd.Series([prefix] * len(df))

            def deps(self, ddf):
                return [ppg.FileGeneratingJob("fileA", wf)]

        a = DelayedDataFrame(
            "shu", lambda: pd.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]})
        )
        a.add_annotator(TestAnno())
        a.write()
        ppg.run_pipegraph()
        assert (a.df["C"] == "hello").all()
Пример #6
0
    def test_write_excel(self):
        test_df = pd.DataFrame({"A": [1, 2]})

        def load():
            return test_df

        a = DelayedDataFrame("shu", load, result_dir="sha")
        assert Path("sha").exists()
        assert_frame_equal(a.df, test_df)
        assert a.non_annotator_columns == "A"
        fn = a.write("sha.xls")[1]
        assert fn.exists()
        assert_frame_equal(pd.read_excel(fn), test_df)
Пример #7
0
    def test_write(self):
        test_df = pd.DataFrame({"A": [1, 2]})

        def load():
            return test_df

        a = DelayedDataFrame("shu", load, result_dir="sha")
        assert Path("sha").exists()
        assert_frame_equal(a.df, test_df)
        assert a.non_annotator_columns == "A"
        fn = a.write()[1]
        assert "/sha" in str(fn.parent)
        assert fn.exists()
        assert_frame_equal(pd.read_csv(fn, sep="\t"), test_df)
Пример #8
0
    def test_write_excel2(self):
        data = {}
        for i in range(0, 257):
            c = "A%i" % i
            d = [1, 1]
            data[c] = d
        test_df = pd.DataFrame(data)

        def load():
            return test_df

        a = DelayedDataFrame("shu", load, result_dir="sha")
        fn = a.write("sha.xls")[1]
        assert fn.exists()
        assert_frame_equal(pd.read_csv(fn, sep="\t"), test_df)
Пример #9
0
    def test_write_mangle(self):
        test_df = pd.DataFrame({"A": [1, 2], "B": ["c", "d"]})

        def load():
            return test_df

        a = DelayedDataFrame("shu", load)
        assert_frame_equal(a.df, test_df)
        assert (a.non_annotator_columns == ["A", "B"]).all()

        def mangle(df):
            df = df.drop("A", axis=1)
            df = df[df.B == "c"]
            return df

        fn = a.write("test.csv", mangle)[1]
        assert fn.exists()
        assert_frame_equal(pd.read_csv(fn, sep="\t"), mangle(test_df))