def test_compose_kwargs_sql_executor_time_filter(dummy_contessa, ctx): t = Table(**{"schema_name": "tmp", "table_name": "hello_world"}) e = SqlExecutor(t, dummy_contessa.conn, ctx) rule = NotNullRule("not_null_name", "not_null", "src", time_filter="created_at") time_filter = e.compose_where_time_filter(rule) computed_datetime = (ctx["task_ts"] - timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") expected = f"(created_at >= '{computed_datetime} UTC'::timestamptz AND created_at < '{ctx['task_ts']} UTC'::timestamptz)" assert time_filter == expected, "time_filter is string" rule = NotNullRule("not_null_name", "not_null", "src", time_filter=[{ "column": "created_at" }]) time_filter = e.compose_where_time_filter(rule) computed_datetime = (ctx["task_ts"] - timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") expected = f"(created_at >= '{computed_datetime} UTC'::timestamptz AND created_at < '{ctx['task_ts']} UTC'::timestamptz)" assert time_filter == expected, "time_filter has only column" rule = NotNullRule( "not_null_name", "not_null", "src", time_filter=[ { "column": "created_at", "days": 10 }, { "column": "updated_at", "days": 1 }, ], ) time_filter = e.compose_where_time_filter(rule) computed_created = (ctx["task_ts"] - timedelta(days=10)).strftime("%Y-%m-%d %H:%M:%S") computed_updated = (ctx["task_ts"] - timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S") expected = ( f"(created_at >= '{computed_created} UTC'::timestamptz AND created_at < '{ctx['task_ts']} UTC'::timestamptz) OR " f"(updated_at >= '{computed_updated} UTC'::timestamptz AND updated_at < '{ctx['task_ts']} UTC'::timestamptz)" ) assert time_filter == expected, "time_filter has 2 members"
def test_direct_time_filter_usage(dummy_contessa, ctx): t = Table(**{"schema_name": "tmp", "table_name": "hello_world"}) e = SqlExecutor(t, dummy_contessa.conn, ctx) rule = NotNullRule( "not_null_name", "not_null", "src", time_filter=TimeFilter( columns=[ TimeFilterColumn("created_at", since=timedelta(days=10), until="now"), TimeFilterColumn("updated_at", since=timedelta(days=1)), ], conjunction=TimeFilterConjunction.AND, ), ) time_filter = e.compose_where_time_filter(rule) computed_created = (ctx["task_ts"] - timedelta(days=10)).strftime("%Y-%m-%d %H:%M:%S") computed_updated = (ctx["task_ts"] - timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S") expected = ( f"(created_at >= '{computed_created} UTC'::timestamptz AND created_at < '{ctx['task_ts']} UTC'::timestamptz) AND " f"(updated_at >= '{computed_updated} UTC'::timestamptz)") assert time_filter == expected, "TimeFilter type can be used directly"
def test_compose_kwargs_sql_executor(dummy_contessa, ctx): t = Table(**{"schema_name": "tmp", "table_name": "hello_world"}) e = SqlExecutor(t, dummy_contessa.conn, ctx) rule = NotNullRule("not_null", "src", time_filter="created_at") kwargs = e.compose_kwargs(rule) expected = {"conn": dummy_contessa.conn} assert kwargs == expected
def test_build_rules(dummy_contessa): rules = [{ "name": "not_null", "columns": ["a", "b", "c"], "time_filter": "created_at" }] normalized_rules = dummy_contessa.normalize_rules(rules) rules = dummy_contessa.build_rules(normalized_rules) expected = [ NotNullRule("not_null", "a", time_filter="created_at"), NotNullRule("not_null", "b", time_filter="created_at"), NotNullRule("not_null", "c", time_filter="created_at"), ] expected_dicts = [e.__dict__ for e in expected] rules_dicts = [r.__dict__ for r in rules] assert expected_dicts == rules_dicts
def test_compose_kwargs_pd_executor(dummy_contessa, ctx): t = Table(**{"schema_name": "tmp", "table_name": "hello_world"}) e = PandasExecutor(t, dummy_contessa.conn, ctx) rule = NotNullRule("not_null", "src", time_filter="created_at") df = pd.DataFrame([{"created_at": datetime(2017, 10, 10)}]) e.conn.get_pandas_df = lambda x: df kwargs = e.compose_kwargs(rule) expected = {"df": df} assert kwargs.keys() == expected.keys()
def test_executor_filter_df(e, monkeypatch): rule = NotNullRule("not_null", "src", time_filter="created_at") monkeypatch.setattr("contessa.executor.datetime", FakedDatetime) df = e.filter_df(rule) expected = pd.DataFrame( [("a", "b", 3.0, datetime(2018, 9, 12, 13))], columns=["src", "dst", "value", "created_at"], ) assert df.equals(expected)
def test_build_rules(dummy_contessa): rules = [{ "name": "not_null_name", "type": "not_null", "columns": ["a", "b", "c"], "time_filter": "created_at", }] normalized_rules = dummy_contessa.normalize_rules(rules) rules = dummy_contessa.build_rules(normalized_rules) expected = [ NotNullRule("not_null_name", "not_null", "a", time_filter="created_at"), NotNullRule("not_null_name", "not_null", "b", time_filter="created_at"), NotNullRule("not_null_name", "not_null", "c", time_filter="created_at"), ] expected_dicts = {e.column for e in expected} rules_dicts = {r.column for r in rules} assert expected_dicts == rules_dicts
LtRule, LteRule, EqRule, ) from contessa.utils import AggregatedResult @pytest.mark.parametrize( "rule, expected", [ ( GtRule("gt_name", "gt", "value", "value2"), AggregatedResult(total_records=5, failed=3, passed=1), ), # test another col ( NotNullRule("not_null_name", "not_null", "value"), AggregatedResult(total_records=5, failed=1, passed=4), ), ( GteRule("gte_name", "gte", "value", 4), AggregatedResult(total_records=5, failed=1, passed=3), ), ( NotRule("not_name", "not", "value", 4), AggregatedResult(total_records=5, failed=2, passed=3), ), ( LtRule("lt_name", "lt", "value", 4), AggregatedResult(total_records=5, failed=3, passed=1), ), (
def rule(): return NotNullRule("not_null_name", "not_null", "src")
{ "src": "aa", "dst": "aa", "value": 66 }, ]) @pytest.mark.parametrize( "rule, expected", [ ( GtRule("gt", "value", "value2"), [False, False, True, False, False], ), # test another col (NotNullRule("not_null", "value"), [True, True, True, False, True]), (GteRule("gte", "value", 4), [False, True, True, False, True]), (NotRule("not", "value", 4), [True, False, True, True, False]), (LtRule("lt", "value", 4), [True, False, False, False, False]), (LteRule("lte", "value", 4), [True, True, False, False, True]), (EqRule("eq", "value", 4), [False, True, False, False, True]), ], ) def test_one_column_rule_sql(rule, expected, conn, ctx): conn.execute(""" drop table if exists public.tmp_table; create table public.tmp_table( value int, value2 int );