def test_evalenv_equality(): a = 1 b = "hello friend" c = [1, 2, 3] d = {"a": 1, "b": 2} # The comparison is made in terms of the ids of the namespaces env = Environment.capture() env2 = Environment.capture() assert env == env2 env_outer = Environment.capture(1) assert env != env_outer
def test_evalenv_capture(): env = Environment.capture(0, 0) env = Environment.capture(1, 0) env = Environment.capture(0, 1) Environment.capture(env) with pytest.raises(TypeError): Environment.capture("blah") with pytest.raises(ValueError): Environment.capture(100)
def test_call_unrecognized_type(): f = lambda x: x env = Environment.capture() with pytest.raises(ValueError): call("f(x)").set_type({"x": 1}, env) with pytest.raises(ValueError): call("x").set_type({"x": [1, 2]}, env) with pytest.raises(ValueError): call("x").set_type({"x": set([5, 6])}, env)
def test_call_new_data_numeric_stateful_transform(): # The center() transformation remembers the value of the mean # of the first dataset passed, which is 10. env = Environment.capture(0) data = pd.DataFrame({"x": [10, 10, 10]}) call_term = Call(LazyCall("center", [LazyVariable("x")], {})) call_term.set_type(data, env) call_term.set_data() assert (call_term.value == [0, 0, 0]).all() data = pd.DataFrame({"x": [1, 2, 3]}) assert (call_term.eval_new_data(data) == [-9.0, -8.0, -7.0]).all()
def testevalenv_namespace(): a = 1 b = "hello friend" c = [1, 2, 3] d = {"a": 1, "b": 2} env = Environment.capture() assert env.namespace["a"] == a assert env.namespace["b"] == b assert env.namespace["c"] == c assert env.namespace["d"] == d
def test_get_function_from_module(): import numpy as np def function(x): return x env = Environment.capture() NAMES = ["np.random.normal", "np.exp", "function"] FUNS = [np.random.normal, np.exp, function] for name, fun in zip(NAMES, FUNS): f = get_function_from_module(name, env) assert f == fun
def test_evalenv_with_outer_namespace(): a = 1 b = "hello friend" env = Environment.capture() assert env.namespace["a"] == a assert env.namespace["b"] == b assert "c" not in env.namespace assert "d" not in env.namespace env = env.with_outer_namespace({"c": [1, 2, 3], "d": {"a": 1, "b": 2}}) assert env.namespace["c"] == [1, 2, 3] assert env.namespace["d"] == {"a": 1, "b": 2}
def test_call_new_data_categoric_stateful_transform(): env = Environment.capture(0) data = pd.DataFrame({"x": [1, 2, 3]}) # Full rank encoding call_term = Call(LazyCall("C", [LazyVariable("x")], {})) call_term.set_type(data, env) call_term.set_data(spans_intercept=True) assert (np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) == call_term.value).all() data = pd.DataFrame({"x": [2, 3]}) assert (call_term.eval_new_data(data) == np.array([[0, 1, 0], [0, 0, 1]])).all() with pytest.raises( ValueError, match=re.escape("The levels 4 in 'C(x)' are not present in the original data set"), ): data = pd.DataFrame({"x": [2, 3, 4]}) call_term.eval_new_data(data) # The same with reduced encoding data = pd.DataFrame({"x": [1, 2, 3]}) call_term = Call(LazyCall("C", [LazyVariable("x")], {})) call_term.set_type(data, env) call_term.set_data() assert (np.array([[0, 0], [1, 0], [0, 1]]) == call_term.value).all() data = pd.DataFrame({"x": [1, 3]}) assert (call_term.eval_new_data(data) == np.array([[0, 0], [0, 1]])).all() # It remembers it saw "A", "B", and "C", but not "D". # So when you pass a new level, it raises a ValueError. with pytest.raises( ValueError, match=re.escape("The levels 4 in 'C(x)' are not present in the original data set"), ): data = pd.DataFrame({"x": [2, 3, 4]}) call_term.eval_new_data(data)