Exemplo n.º 1
0
def test_sql_apply_only_failures(conn, ctx):
    conn.execute("""
        drop table if exists public.tmp_table;

        create table public.tmp_table(
          src text,
          dst text
        );

        insert into public.tmp_table(src, dst)
        values ('bts', 'abc'), ('aaa', NULL)
    """)
    refresh_executors(Table(schema_name="public", table_name="tmp_table"),
                      conn,
                      context=ctx)

    sql = """
        select src
        from {{ table_fullname }}
        where src != 'aaa'
    """
    rule = CustomSqlRule(
        "sql_test_name",
        "sql_test",
        "src",
        sql,
        "example description",
        only_failures_mode=True,
    )
    results = rule.apply(conn)
    assert results.total_records == 0
    assert results.failed == 1
    assert results.passed == 0
    conn.execute("""DROP TABLE tmp_table;""")
Exemplo n.º 2
0
def test_sql_standard_formatting(conn, ctx):
    ctx["dst_table"] = "public.dst_table"
    conn.execute("""
        drop table if exists public.dst_table;

        create table public.dst_table(
          src text,
          dst text,
          created timestamptz
        );

        insert into public.dst_table(src, dst, created)
        values ('bts', 'abc', '2018-09-12T12:00:00'), ('aaa', NULL, '2018-09-12T12:00:00'), ('aaa', NULL, '2019-07-31T12:00:00')
    """)
    refresh_executors(Table("public", "dst_table"), conn, context=ctx)

    sql = """
        select dst
        from {{ dst_table }}
        where dst LIKE 'a%'
    """
    rule = CustomSqlRule(
        "sql_test_name",
        "sql_test",
        None,
        sql,
        "example description",
        only_failures_mode=True,
    )
    results = rule.apply(conn)
    assert results.total_records == 0  # unknown because of only_failures_mode
    assert results.failed == 1
    assert results.passed == 0
Exemplo n.º 3
0
def test_sql_apply_extra_ctx(conn, ctx):
    ctx["dst_table"] = "public.dst_table"
    conn.execute("""
        drop table if exists public.dst_table;

        create table public.dst_table(
          src text,
          dst text,
          created timestamptz
        );

        insert into public.dst_table(src, dst, created)
        values ('bts', 'abc', '2018-09-12T12:00:00'), ('aaa', NULL, '2018-09-12T12:00:00'), ('aaa', NULL, '2019-07-31T12:00:00')
    """)
    refresh_executors(Table("public", "dst_table"), conn, context=ctx)

    sql = """
        select
        src = 'aaa'
        from {{ dst_table }}
        where created between timestamptz '{{task_ts}}' and timestamptz '{{task_ts}}' + interval '60 seconds'
    """
    rule = CustomSqlRule("sql_test_name", "sql_test", "col1", sql,
                         "example description")
    results = rule.apply(conn)
    assert results.total_records == 2
    assert results.failed == 1
    assert results.passed == 1
    conn.execute("""DROP TABLE public.dst_table;""")
Exemplo n.º 4
0
def test_sql_apply(conn, ctx):
    conn.execute("""
        drop table if exists public.tmp_table;

        create table public.tmp_table(
          src text,
          dst text
        );

        insert into public.tmp_table(src, dst)
        values ('bts', 'abc'), ('aaa', NULL)
    """)
    refresh_executors(Table(schema_name="public", table_name="tmp_table"),
                      conn,
                      context=ctx)

    sql = """
        select
        src = 'aaa'
        from {{ table_fullname }}
    """
    rule = CustomSqlRule("sql_test", sql, "example description")
    results = rule.apply(conn)
    expected = pd.Series([False, True])
    assert list(expected) == list(results)
    conn.execute("""DROP TABLE tmp_table;""")
Exemplo n.º 5
0
def test_rule_context_formatted_in_where():
    class TestRule(SqlRule):
        @property
        def sql(self):
            return "select a, b, c from {{table_fullname}}_{{ts_nodash}}"

    r = TestRule(
        name="test_rule_name",
        type="test_rule_type",
        condition="created_at >= '{{ts_nodash}}'::timestamptz - interval '10 minutes'",
        description="Greater than 0 when bags <> 0",
    )
    check_table = Table("raw", "table")
    context = ContessaRunner.get_context(check_table, {"ts_nodash": "20190101T000000"})

    # executor holds context of run, so set it
    refresh_executors(check_table, "", context)

    result = r.sql_with_where
    expected = """
		select a, b, c
		from raw.table_20190101T000000
		where created_at >= '20190101T000000'::timestamptz - interval '10 minutes'
	"""
    assert normalize_str(result) == normalize_str(expected)
Exemplo n.º 6
0
    def run(
        self,
        raw_rules: List[Dict[str, str]],
        check_table: Dict,
        result_table: Optional[
            Dict] = None,  # todo - docs for quality name, maybe defaults..
        context: Optional[Dict] = None,
    ) -> Union[CheckResult, QualityCheck]:
        check_table = Table(**check_table)
        context = self.get_context(check_table, context)

        normalized_rules = self.normalize_rules(raw_rules)
        refresh_executors(check_table, self.conn, context)

        if result_table:
            result_table = ResultTable(**result_table,
                                       model_cls=self.model_cls)
            quality_check_class = self.get_quality_check_class(result_table)
            self.conn.ensure_table(quality_check_class.__table__)
        else:
            quality_check_class = CheckResult

        rules = self.build_rules(normalized_rules)
        objs = self.do_quality_checks(quality_check_class, rules, context)

        if result_table:
            self.conn.upsert(objs)
        return objs
Exemplo n.º 7
0
def test_new_rule(conn, ctx):
    class CountSqlRule(SqlRule):
        executor_cls = SqlExecutor

        def __init__(self, name, type, count, description=None, **kwargs):
            super().__init__(name, type, description=description, **kwargs)
            self.count = count

        def get_sql_parameters(self):
            context = super().get_sql_parameters()
            context.update({"target_count": self.count})
            return context

        @property
        def sql(self):
            return """
                SELECT COUNT(*) = {{target_count}} FROM {{table_fullname}}
            """

    conn.execute("""
        drop table if exists public.tmp_table;
    
        create table public.tmp_table(
          a text,
          b text
        );
    
        insert into public.tmp_table(a, b)
        values ('bts', 'abc'), ('aaa', NULL)
    """)

    refresh_executors(Table(schema_name="public", table_name="tmp_table"),
                      conn,
                      context=ctx)
    rule = CountSqlRule("count_name", "count", 2)
    results = rule.apply(conn)
    expected = AggregatedResult(total_records=1, failed=0, passed=1)
    assert (expected.failed, expected.passed) == (results.failed,
                                                  results.passed)

    rule = CountSqlRule("count_name", "count", 2, condition="a = 'bts'")
    results = rule.apply(conn)
    expected = AggregatedResult(total_records=1, failed=1, passed=0)
    assert (expected.failed, expected.passed) == (results.failed,
                                                  results.passed)
    conn.execute("""DROP TABLE tmp_table;""")
Exemplo n.º 8
0
def test_new_rule(conn, ctx):
    class CountSqlRule(SqlRule):
        executor_cls = SqlExecutor

        def __init__(self, name, count, **kwargs):
            super().__init__(name, **kwargs)
            self.count = count

        def get_sql_parameters(self):
            context = super().get_sql_parameters()
            context.update({"target_count": self.count})
            return context

        @property
        def sql(self):
            return """
                SELECT COUNT(*) = {{target_count}} FROM {{table_fullname}}
            """

    conn.execute("""
        drop table if exists public.tmp_table;
    
        create table public.tmp_table(
          a text,
          b text
        );
    
        insert into public.tmp_table(a, b)
        values ('bts', 'abc'), ('aaa', NULL)
    """)

    refresh_executors(Table(schema_name="public", table_name="tmp_table"),
                      conn,
                      context=ctx)
    rule = CountSqlRule("count", 2)
    results = rule.apply(conn)
    expected = pd.Series([True])
    assert list(expected) == list(results)

    rule = CountSqlRule("count", 2, condition="a = 'bts'")
    results = rule.apply(conn)
    expected = pd.Series([False])
    assert list(expected) == list(results)
    conn.execute("""DROP TABLE tmp_table;""")
Exemplo n.º 9
0
def test_one_column_rule_sql(rule, expected, conn, ctx):
    conn.execute("""
            drop table if exists public.tmp_table;

            create table public.tmp_table(
              value int,
              value2 int
            );

            insert into public.tmp_table(value, value2)
            values (1, 2), (4, 5), (5, 3), (NULL, NULL), (4, 11)
        """)
    refresh_executors(Table(schema_name="public", table_name="tmp_table"),
                      conn,
                      context=ctx)

    results = rule.apply(conn)
    expected = pd.Series(expected, name=rule.column)
    assert list(expected) == list(results)
Exemplo n.º 10
0
def test_sql_missing_jinja_param(conn, ctx):
    ctx["dst_table"] = "public.dst_table"
    refresh_executors(Table("public", "dst_table"), conn, context=ctx)

    sql = """
        select dst
        from {{ dst_table }}
        where dst LIKE '{{ missing_pattern }}'
    """
    rule = CustomSqlRule(
        "sql_test_name",
        "sql_test",
        None,
        sql,
        "example description",
        only_failures_mode=True,
    )

    with pytest.raises(jinja2.exceptions.UndefinedError):
        rule.apply(conn)
Exemplo n.º 11
0
    def run(
        self,
        raw_rules: List[Dict[str, str]],
        check_table: Dict,
        result_table: Dict,  # todo - docs for quality name, maybe defaults..
        context: Optional[Dict] = None,
    ):
        check_table = Table(**check_table)
        result_table = ResultTable(**result_table)
        context = self.get_context(check_table, context)

        normalized_rules = self.normalize_rules(raw_rules)
        refresh_executors(check_table, self.conn, context)
        quality_check_class = self.get_quality_check_class(result_table)
        self.ensure_table(quality_check_class)

        rules = self.build_rules(normalized_rules)
        objs = self.do_quality_checks(quality_check_class, rules, context)

        self.insert(objs)
Exemplo n.º 12
0
def test_one_column_rule_sql_condition(rule, expected, conn, ctx):
    conn.execute("""
            drop table if exists public.tmp_table;

            create table public.tmp_table(
              value int,
              conditional boolean,
              date timestamptz
            );

            insert into public.tmp_table(value, conditional, date)
            values (1, TRUE, NULL), (4, TRUE, NULL), (5, FALSE, NULL), (NULL, FALSE, NULL), (4, FALSE, '2019-10-02T13:30:00+0020')
        """)
    refresh_executors(Table(schema_name="public", table_name="tmp_table"),
                      conn,
                      context=ctx)

    results = rule.apply(conn)
    expected = pd.Series(expected, name=rule.column)
    assert list(expected) == list(results)
Exemplo n.º 13
0
def test_cmp_with_other_col_rules(rule, expected, conn, ctx):
    conn.execute("""
        drop table if exists public.tmp_table;

        create table public.tmp_table(
          value1 int,
          value2 int,
          value3 int,
          value4 int
        );

        insert into public.tmp_table(value1, value2, value3, value4)
        values (1, 2, 1, 1), (1, 1, 1, NULL), (1, 1, 1, 1)
    """)
    refresh_executors(Table(schema_name="public", table_name="tmp_table"),
                      conn,
                      context=ctx)

    results = rule.apply(conn)
    expected = pd.Series(expected, name=rule.column)
    assert list(expected) == list(results)
Exemplo n.º 14
0
def test_one_column_rule_sql(rule, expected, conn, ctx):
    conn.execute("""
            drop table if exists public.tmp_table;

            create table public.tmp_table(
              value int,
              value2 int
            );

            insert into public.tmp_table(value, value2)
            values (1, 2), (4, 5), (5, 3), (NULL, NULL), (4, 11)
        """)
    refresh_executors(Table(schema_name="public", table_name="tmp_table"),
                      conn,
                      context=ctx)

    results = rule.apply(conn)
    assert (expected.total_records, expected.failed, expected.passed) == (
        results.total_records,
        results.failed,
        results.passed,
    )
Exemplo n.º 15
0
def test_cmp_with_other_col_rules(rule, expected, conn, ctx):
    conn.execute("""
        drop table if exists public.tmp_table;

        create table public.tmp_table(
          value1 int,
          value2 int,
          value3 int,
          value4 int
        );

        insert into public.tmp_table(value1, value2, value3, value4)
        values (1, 2, 1, 1), (1, 1, 1, NULL), (1, 1, 1, 1)
    """)
    refresh_executors(Table(schema_name="public", table_name="tmp_table"),
                      conn,
                      context=ctx)

    results = rule.apply(conn)
    assert (expected.total_records, expected.failed, expected.passed) == (
        results.total_records,
        results.failed,
        results.passed,
    )