def test_variable_unrecognized_type(): with pytest.raises(ValueError): Variable("x").set_type({"x": 1}) with pytest.raises(ValueError): Variable("x").set_type({"x": [1, 2]}) with pytest.raises(ValueError): Variable("x").set_type({"x": {"a": 1}})
def test_subset_index(): desc = model_description("threecats['b'] ~ continuous + dummy") comp = Model( Intercept(), Term(Variable("continuous")), Term(Variable("dummy")), ) comp.add_response(Response(Term(Variable("threecats", level="b")))) assert desc == comp
def test_term(): desc = model_description("x") comp = Model(Intercept(), Term(Variable("x"))) assert desc == comp desc = model_description("term_name_abc") comp = Model(Intercept(), Term(Variable("term_name_abc"))) assert desc == comp desc = model_description("`$%!N4m3##!! NNN`") comp = Model(Intercept(), Term(Variable("$%!N4m3##!! NNN"))) assert desc == comp
def test_variable_set_data_errors(): x = Variable("x") with pytest.raises(ValueError): x.set_data(True) with pytest.raises(ValueError): x.kind = "hello" x.set_data(True) with pytest.raises(Exception): x.kind = "categoric" x.set_data(True)
def visitLiteralExpr(self, expr): if expr.value == 0: return NegatedIntercept() elif expr.value == 1: return Intercept() else: return Term(Variable(expr.value))
def test_term_new_data_numeric(): data = pd.DataFrame({"x": [10, 10, 10]}) var_expr = Parser(Scanner("x").scan(False)).parse() var_term = Variable(var_expr.name.lexeme, var_expr.level) var_term.set_type(data) var_term.set_data() assert (var_term.value == [10, 10, 10]).all() data = pd.DataFrame({"x": [1, 2, 3]}) assert (var_term.eval_new_data(data).T == [1, 2, 3]).all()
def test_variable_eval_numeric(): x = Variable("x") arr = np.array([1, 2, 3, 4]) series = pd.Series([1, 2, 3, 4]) x.eval_numeric(arr) assert np.array_equal(x.value, arr) x.eval_numeric(series) assert np.array_equal(x.value, arr) with pytest.raises(ValueError): x.eval_numeric([1, 2, 3])
def test_variable_str(): assert str(Variable("a")) == "Variable(a)" assert str(Variable("a", "hi")) == "Variable(a, reference='hi')" assert repr(Variable("a")) == str(Variable("a")) assert repr(Variable("a", "hi")) == str(Variable("a", "hi"))
def test_term_remove(): desc = model_description("x - y") comp = Model(Intercept(), Term(Variable("x"))) assert desc == comp desc = model_description("x - 5") comp = Model(Intercept(), Term(Variable("x"))) assert desc == comp desc = model_description("x - f(x)") comp = Model(Intercept(), Term(Variable("x"))) assert desc == comp desc = model_description("x - y:z") comp = Model(Intercept(), Term(Variable("x"))) assert desc == comp desc = model_description("x - (1|g)") comp = Model(Intercept(), Term(Variable("x"))) assert desc == comp desc = model_description("x - (z + y)") comp = Model(Intercept(), Term(Variable("x"))) assert desc == comp
def test_term_interaction(): desc = model_description("x:y") comp = Model(Intercept(), Term(Variable("x"), Variable("y"))) assert desc == comp with pytest.raises(TypeError): model_description("x:5") desc = model_description("x:f(x)") comp = Model( Intercept(), Term(Variable("x"), Call(LazyCall("f", [LazyVariable("x")], {}))), ) assert desc == comp desc = model_description("x:y:z") comp = Model(Intercept(), Term(Variable("x"), Variable("y"), Variable("z"))) assert desc == comp desc = model_description("x:y*z") comp = Model( Intercept(), Term(Variable("x"), Variable("y")), Term(Variable("z")), Term(Variable("x"), Variable("y"), Variable("z")), ) assert desc == comp # Note the parenthesis, here `*` resolves earlier than `:` desc = model_description("x:(y*z)") comp = Model( Intercept(), Term(Variable("x"), Variable("y")), Term(Variable("x"), Variable("z")), Term(Variable("x"), Variable("y"), Variable("z")), ) assert desc == comp with pytest.raises(TypeError): model_description("x:(1|g)") desc = model_description("x:(z + y)") comp = Model( Intercept(), Term(Variable("x"), Variable("z")), Term(Variable("x"), Variable("y")), ) assert desc == comp
def test_term_add(): desc = model_description("x + y") comp = Model(Intercept(), Term(Variable("x")), Term(Variable("y"))) assert desc == comp desc = model_description("x + 5") comp = Model(Intercept(), Term(Variable("x")), Term(Variable(5))) assert desc == comp desc = model_description("x + f(x)") comp = Model( Intercept(), Term(Variable("x")), Term(Call(LazyCall("f", [LazyVariable("x")], {}))), ) assert desc == comp desc = model_description("x + y:z") comp = Model(Intercept(), Term(Variable("x")), Term(Variable("y"), Variable("z"))) assert desc == comp desc = model_description("x + (1|g)") comp = Model(Intercept(), Term(Variable("x")), GroupSpecificTerm(Intercept(), Term(Variable("g")))) assert desc == comp desc = model_description("x + (z + y)") comp = Model(Intercept(), Term(Variable("x")), Term(Variable("z")), Term(Variable("y"))) assert desc == comp
def test_group_specific_interactions(): desc = model_description("0 + (a*b|h+g)") comp = Model( GroupSpecificTerm(expr=Intercept(), factor=Term(Variable("h"))), GroupSpecificTerm(expr=Intercept(), factor=Term(Variable("g"))), GroupSpecificTerm(expr=Term(Variable("a")), factor=Term(Variable("h"))), GroupSpecificTerm(expr=Term(Variable("a")), factor=Term(Variable("g"))), GroupSpecificTerm(expr=Term(Variable("b")), factor=Term(Variable("h"))), GroupSpecificTerm(expr=Term(Variable("b")), factor=Term(Variable("g"))), GroupSpecificTerm(expr=Term(Variable("a"), Variable("b")), factor=Term(Variable("h"))), GroupSpecificTerm(Term(Variable("a"), Variable("b")), factor=Term(Variable("g"))), ) assert desc == comp desc = model_description("0 + (0 + a*b|h+g)") comp = Model( GroupSpecificTerm(expr=Term(Variable("a")), factor=Term(Variable("h"))), GroupSpecificTerm(expr=Term(Variable("a")), factor=Term(Variable("g"))), GroupSpecificTerm(expr=Term(Variable("b")), factor=Term(Variable("h"))), GroupSpecificTerm(expr=Term(Variable("b")), factor=Term(Variable("g"))), GroupSpecificTerm(expr=Term(Variable("a"), Variable("b")), factor=Term(Variable("h"))), GroupSpecificTerm(Term(Variable("a"), Variable("b")), factor=Term(Variable("g"))), ) assert desc == comp
def test_term_slash(): desc = model_description("x / y") comp = Model(Intercept(), Term(Variable("x")), Term(Variable("x"), Variable("y"))) assert desc == comp with pytest.raises(TypeError): model_description("x / 5") desc = model_description("x / f(x)") comp = Model( Intercept(), Term(Variable("x")), Term(Variable("x"), Call(LazyCall("f", [LazyVariable("x")], {}))), ) assert desc == comp desc = model_description("x / y:z") comp = Model( Intercept(), Term(Variable("x")), Term(Variable("x"), Variable("y"), Variable("z")), ) assert desc == comp with pytest.raises(TypeError): model_description("x / (1|g)") desc = model_description("x / (z + y)") comp = Model( Intercept(), Term(Variable("x")), Term(Variable("x"), Variable("z")), Term(Variable("x"), Variable("y")), ) assert desc == comp
def test_term_new_data_categoric(): data = pd.DataFrame({"x": ["A", "B", "C"]}) # Full rank encoding var_expr = Parser(Scanner("x").scan(False)).parse() var_term = Variable(var_expr.name.lexeme, var_expr.level) var_term.set_type(data) var_term.set_data(spans_intercept=True) assert (np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) == var_term.value).all() data = pd.DataFrame({"x": ["B", "C"]}) assert (var_term.eval_new_data(data) == np.array([[0, 1, 0], [0, 0, 1]])).all() # It remembers it saw "A", "B", and "C", but not "D". # So when you pass a new level, it raises a ValueError. with pytest.raises( ValueError, match="The levels D in 'x' are not present in the original data set." ): data = pd.DataFrame({"x": ["B", "C", "D"]}) var_term.eval_new_data(data) # The same with reduced encoding data = pd.DataFrame({"x": ["A", "B", "C"]}) var_expr = Parser(Scanner("x").scan(False)).parse() var_term = Variable(var_expr.name.lexeme, var_expr.level) var_term.set_type(data) var_term.set_data() assert (np.array([[0, 0], [1, 0], [0, 1]]) == var_term.value).all() data = pd.DataFrame({"x": ["A", "C"]}) assert (var_term.eval_new_data(data) == np.array([[0, 0], [0, 1]])).all() # It remembers it saw "A", "B", and "C", but not "D". # So when you pass a new level, it raises a ValueError. with pytest.raises( ValueError, match="The levels D in 'x' are not present in the original data set." ): data = pd.DataFrame({"x": ["B", "C", "D"]}) var_term.eval_new_data(data)
def visitQuotedNameExpr(self, expr): # Quoted names don't accept levels yet. return Term(Variable(expr.expression.lexeme[1:-1]))
def visitVariableExpr(self, expr): if expr.level: level = expr.level.value else: level = None return Term(Variable(expr.name.lexeme, level))