def test_project_kwargs2(self): df = create_df() w = Wrap(df) wnew = w.project("A", **{"Z_col": "B * 2"}) self.assertListEqual(["A", "Z_col"], list(wnew.df.columns)) self.assertListEqual([0, 2, 4, 6, 8], list(wnew.df["Z_col"])) self.assertGreater(len(w.df.columns), 2)
def test_project_away_badcolumn(): df = pd.DataFrame() df["A"] = [1, 2] w = Wrap(df) with pytest.raises(KeyError): w.project_away("B")
def test_count(): df = create_df() w = Wrap(df) w2 = w.count() assert ["Count"] == list(w2.df.columns) assert [5] == list(w2.df["Count"])
def test_project_away_wildcard_nomatch(): df = pd.DataFrame() df["A"] = [1, 2] w = Wrap(df) with pytest.raises(KeyError): w.project_away("A, B*")
def test_toint_str(self): df = create_df() w = Wrap(df) w = w.extend("D = toint('5')") self.assertListEqual([5]*5, list(w.df["D"]))
def test_where_notstartswith_cs(self): df = create_df() df["C"] = ["hi", "HI there", "today", "what", "this"] w = Wrap(df) w = w.where("C !startswith_cs \"hi\"") self.assertListEqual(["HI there", "today", "what", "this"], list(w.df["C"]))
def test_union_execute_multiple_tables(): df = pd.DataFrame() df["A"] = [1, 2] df["B"] = [10, 20] df2 = pd.DataFrame() df2["A"] = [3, 4] df2["C"] = [30, 40] df3 = pd.DataFrame() df3["A"] = [5, 6] df3["D"] = [50, 60] w = Wrap(df) w = w.let(df2=df2, df3=df3) wnew = w.execute(""" self | union kind=outer df2, df3 """) assert ["A", "B", "C", "D"] == list(wnew.df.columns) assert [1, 2, 3, 4, 5, 6] == list(wnew.df["A"]) assert [10, 20, -1, -1, -1, -1] == replace_nan(wnew.df["B"], -1) assert [-1, -1, 30, 40, -1, -1] == replace_nan(wnew.df["C"], -1) assert [-1, -1, -1, -1, 50, 60] == replace_nan(wnew.df["D"], -1)
def test_extend_noname(self): df = create_df() df = df[["B"]] w = Wrap(df) wnew = w.extend("B*2, B+1") assert ["B", "Column1", "Column2"] == list(wnew.df.columns)
def test_sort_1(self): df = create_df() df["U"] = [9, 8, 7, 1, 2] w = Wrap(df) wnew = w.sort("U + 1") self.assertListEqual([0, 1, 2, 4, 3], list(wnew.df["B"])) self.assertListEqual(list(range(5)), list(w.df["B"]))
def test_exp10(): df = create_df() w = Wrap(df) c = w.extend("F = exp10(A)") assert list(np.power(10, [0, 1, 2, 3, 4])) == list(c.df["F"])
def test_log2(): df = create_df() w = Wrap(df) c = w.extend("F = log2(A)") assert list(np.log2([0, 1, 2, 3, 4])) == list(c.df["F"])
def test_sqrt(): df = create_df() w = Wrap(df) c = w.extend("F = sqrt(A)") assert list(np.sqrt([0, 1, 2, 3, 4])) == list(c.df["F"])
def test_not(): df = pd.DataFrame() df["A"] = [True, False, False] w = Wrap(df) wnew = w.extend("B = not(A)") assert ["A", "B"] == list(wnew.df.columns) assert [False, True, True] == list(wnew.df["B"])
def test_execute_let_semicolon(): df = create_df() w = Wrap(df) wnew = w.execute("let a = 'G2';") assert "G2" == wnew._get_var_map()["a"]
def test_summarize_min_noby(self): df = create_df() df["F"] = [1, 8, 2, 3, 3] w = Wrap(df) c = w.summarize("min(F)") self.assertListEqual(["min_F"], list(c.df.columns)) self.assertListEqual([1], list(c.df["min_F"]))
def test_isinf(): df = create_df() df["D"] = [1.0, -np.inf, np.inf, None, np.nan] w = Wrap(df) c = w.extend("F = isinf(D)") assert list([False, True, True, False, False]) == list(c.df["F"])
def test_tostring(self): df = create_df() df["A"] = [1, "HI", np.nan, "", None] w = Wrap(df) w = w.extend("D = tostring(A)") assert ["1", "HI", "", "", ""] == list(w.df["D"])
def test_todynamic(self): df = pd.DataFrame() df["A"] = ['{"k1":"v1"}', '{"k1" : "v2"}'] w = Wrap(df) w = w.extend("D = todynamic(A)") self.assertEqual("v1", w.df["D"][0]["k1"])
def test_base64_encode_tostring(): df = pd.DataFrame() df["A"] = [_encode_base64("hello"), _encode_base64("There ")] h = "hello" w = Wrap(df) w = w.let(h=h).extend("B = A == base64_encode_tostring(h)") assert [True, False] == list(w.df["B"])
def test_summarize_avg_noby(self): df = create_df() df["F"] = [1, 1, 2, 3, 3] w = Wrap(df) c = w.summarize("avg(F)") self.assertListEqual(["avg_F"], list(c.df.columns)) self.assertListEqual([2], list(c.df["avg_F"]))
def test_summarize_countif_noby(self): df = create_df() df["F"] = [1, 1, 2, 4, 3] w = Wrap(df) c = w.summarize("C=countif(F > 2)") self.assertListEqual(["C"], list(c.df.columns)) self.assertListEqual([2], list(c.df["C"]))
def test_dynamic_bag_squarebrackets(): df = pd.DataFrame() df["A"] = ['{ "k": "v0", "k2": { "k3": 3 } }', '{ "k": "v", "k2": { "k3": 13 } }'] w = Wrap(df) w = w.extend("d = todynamic(A)").extend("f = d['k'], f1 = d['k2']['k3']") assert ["v0", "v"] == list(w.df["f"]) assert [3, 13] == list(w.df["f1"])
def test_parse_json(): df = pd.DataFrame() df["A"] = ["[1, 2]", '{ "a" : 7 }', '{ "b" : "val" }'] w = Wrap(df) w = w.extend("d = todynamic(A), p = parse_json(A)") assert list(w.df["d"]) == list(w.df["p"])
def test_dynamic_dot(): df = pd.DataFrame() df["A"] = ['{ "k": "v0", "k2": { "k3": 3 } }', '{ "k": "v", "k2": { "k3": 13 } }', "[1, 2]"] w = Wrap(df) w = w.extend("d = todynamic(A)").extend("f = d.k, f1 = d.k2.k3") assert ["v0", "v", None] == list(w.df["f"]) assert [3, 13, None] == replace_nan(w.df["f1"], None)
def test_base64_encode_tostring_series(): df = pd.DataFrame() df["A"] = ["hello", "There "] w = Wrap(df) w = w.extend("B = base64_encode_tostring(A)") assert [_encode_base64("hello"), _encode_base64("There ")] == list(w.df["B"])
def test_isnotempty(self): df = create_df() df["A"] = [1, "hi", None, np.nan, ""] w = Wrap(df) w = w.extend("D = isnotempty(A)") self.assertListEqual([True, True, False, False, False], list(w.df["D"]))
def test_extract2(self): df = pd.DataFrame() df["A"] = ["Duration = 1;A, Duration=2;B", "Duration=3;C"] w = Wrap(df) wnew = w.project("D=extract('uration *= *([0-9]+);([^,]*)', 2, A)") self.assertListEqual(["D"], list(wnew.df.columns)) self.assertListEqual(list(wnew.df["D"]), ["A", "C"])
def test_toupper(self): df = create_df() df["A"] = ["hi", "HI", " Hi", "", None] w = Wrap(df) w = w.extend("D = toupper(A)") self.assertListEqual(["HI", "HI", " HI", "", None], list(w.df["D"]))
def test_summarize_var_noby(self): df = create_df() df["F"] = [1, 1, 2, 3, 3] w = Wrap(df) c = w.summarize("variance(F)") self.assertListEqual(["variance_F"], list(c.df.columns)) self.assertListEqual([1], list(c.df["variance_F"]))
def test_execute(): df = create_df() w = Wrap(df) wnew = w.execute("self | where G == 'G1' | where A >= 1 | project C") assert ["C"] == list(wnew.df.columns) assert ["foo2", "foo4"] == list(wnew.df["C"])