def test_func(self): if not os.path.exists("./test_cache"): os.mkdir("./test_cache") if not os.path.exists("./test_params"): os.mkdir("./test_params") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) prep = preprep.Preprep("./test_cache", "./test_params") prep = prep.add(lambda x: x * 4) prep = prep.add(lambda x: x * 2) prep = prep.add(lambda x: x * 3) ret1 = prep.fit_gene(df) ret2 = prep.fit_gene(df) df_true = pd.DataFrame([[24, 48, 72], [96, 120, 144]], columns=["a", "b", "c"]) assert_frame_equal(ret1, ret2) assert_frame_equal(ret1, df_true) prep = preprep.Preprep("./test_cache", "./test_params") prep = prep.add(f1, name="unit_1") prep = prep.add(f1) prep = prep.add(f1) prep.fit_gene(df) shutil.rmtree("./test_cache") shutil.rmtree("./test_params")
def test_connect(self): if not os.path.exists("./test_cache"): os.mkdir("./test_cache") if not os.path.exists("./test_params"): os.mkdir("./test_params") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) prep = preprep.Preprep("./test_cache", "./test_params") prep1 = prep.add(lambda x: x * 4) prep2 = prep.add(lambda x: x * 4) prep = preprep.Connect(lambda x, y: x + y, [prep2, prep1], cache_format="csv") ret = prep.fit_gene(df) df_true = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c" ]) * 8 assert_frame_equal(ret, df_true) prep = preprep.Preprep("./test_cache", "./test_params") prep1 = prep.add(lambda x: x * 4) prep = preprep.Connect(lambda x, y: x + y, [prep, prep1], cache_format="csv") ret = prep.fit_gene(df) df_true = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c" ]) * 5 assert_frame_equal(ret, df_true) shutil.rmtree("./test_cache") shutil.rmtree("./test_params")
def test_op(self): if not os.path.exists("./test_cache"): os.mkdir("./test_cache") if not os.path.exists("./test_params"): os.mkdir("./test_cache") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) op = TestOp1() io = StringIO() sys.stdout = io prep = preprep.Preprep("./test_cache", "./test_params") prep = prep.add(op, name="1") prep = prep.add(op, name="2") prep = prep.add(op, name="3") io = StringIO() sys.stdout = io ret1 = prep.fit_gene(df, verbose=True) sys.stdout = sys.__stdout__ stdout_log1 = io.getvalue() io = StringIO() sys.stdout = io ret2 = prep.fit_gene(df, verbose=True) sys.stdout = sys.__stdout__ stdout_log2 = io.getvalue() ret3 = prep.gene(df) assert_frame_equal(ret1, ret2) assert_frame_not_equal(ret1, ret3) self.assertNotEqual(stdout_log1, stdout_log2) op2 = TestOp2() io = StringIO() sys.stdout = io prep = preprep.Preprep("./test_cache", "./test_params") prep = prep.add(op2, name="1") prep = prep.add(op2, name="2") prep = prep.add(op2, name="3") prep.fit_gene(df, verbose=True) sys.stdout = sys.__stdout__ stdout_log3 = io.getvalue() self.assertNotEqual(stdout_log1, stdout_log3) op2 = TestOp2() op2.a = 100 io = StringIO() sys.stdout = io prep = preprep.Preprep("./test_cache", "./test_params") prep = prep.add(op2, name="1") prep = prep.add(op2, name="2") prep = prep.add(op2, name="3") prep.fit_gene(df, verbose=True) sys.stdout = sys.__stdout__ stdout_log4 = io.getvalue() self.assertNotEqual(stdout_log3, stdout_log4) shutil.rmtree("./test_cache") shutil.rmtree("./test_params")
def test_multiinput(self): if not os.path.exists("./test_cache"): os.mkdir("./test_cache") if not os.path.exists("./test_params"): os.mkdir("./test_params") df1 = pd.DataFrame([[2, 2, 3], [4, 5, 5]]) df2 = pd.DataFrame([[1, 2, 3], [4, 5, 5]]) prep1 = preprep.Preprep("./test_cache", "./test_params", "input_1") prep1 = prep1.add(lambda x: x * 4) prep2 = preprep.Preprep("./test_cache", "./test_params", "input_2") prep2 = prep2.add(lambda x: x * 4) prep = preprep.Connect(lambda x, y: x + y, [prep1, prep2], cache_format="csv") prep.fit_gene({"input_1": df1, "input_2": df2}) #prep.fit_gene({"input_1":df1,"input_2":df2}) shutil.rmtree("./test_cache")
def test_multivalue_output(self): if not os.path.exists("./test_cache"): os.mkdir("./test_cache") if not os.path.exists("./test_params"): os.mkdir("./test_parms") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) prep = preprep.Preprep("./test_cache", "./test_params") prep = prep.add(f2) ret11, ret12 = prep.fit_gene(df) ret21, ret22 = prep.fit_gene(df) assert_frame_equal(ret11, ret12) assert_frame_equal(ret11, ret21) prep = preprep.Preprep("./test_cache", "./test_params") prep = prep.add(lambda df: "b") prep.fit_gene("a") shutil.rmtree("./test_cache")
def test_train_pred(self): if not os.path.exists("./test_cache"): os.mkdir("./test_cache") if not os.path.exists("./test_params"): os.mkdir("./test_parms") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) raw_df = df.copy() prep = preprep.Preprep("./test_cache", "./test_params") prep = prep.add(TestOp3(), name="test1") prep = prep.add(TestOp3(), name="test2") self.assertRaises(KeyError, lambda: prep.gene(df)) # In case cache file doesn't exist. ret1 = prep.fit_gene(df) # In case cache file exists. prep.fit_gene(raw_df) ret2 = prep.gene(df) assert_frame_equal(ret1, ret2) shutil.rmtree("./test_cache") shutil.rmtree("./test_params")
def test_feather_support(self): if not os.path.exists("./test_cache"): os.mkdir("./test_cache") if not os.path.exists("./test_params"): os.mkdir("./test_params") df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) prep = preprep.Preprep("./test_cache", "./test_params") prep = prep.add(lambda x: x * 4, name="feather_test", cache_format="feather") ret1 = prep.fit_gene(df) ret2 = prep.fit_gene(df) df_true = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c" ]) * 4 assert_frame_equal(ret1, ret2) assert_frame_equal(ret1, df_true) shutil.rmtree("./test_cache") shutil.rmtree("./test_params")