def qa_checks(**context):
     if os.path.exists(qa_file) and GIT_USER is not None and GIT_TOKEN is not None and ENVIRONMENT != 'CI':
         with open(qa_file, 'r') as fd:
             sqlfile = fd.read()
             sqllist = sqlfile.split(";")
             sf_hook = SnowflakeHook(snowflake_conn_id=Variable.get(
                 "SNOWFLAKE_CONNECTION", default_var="SNOWFLAKE"))
             for sql in sqllist:
                 if len(sql.strip()) > 5:
                     result = sf_hook.get_pandas_df(sql)
                     if len(result.index) > 0:
                         for index, row in result.iterrows():
                             make_github_issue('QA Failed for ' + row['TABLE_NAME'],
                                               "Error: " + row['ERROR_DESC'] + "\n" + "Error Count: " + str(
                                                   row['ERROR_COUNT']) + "\n" + row['ERROR_CONDITION'],
                                               ['bug', 'qa'])
Exemplo n.º 2
0
def snowflake_db_monitor(**op_kwarg):
    snowflake_hook = SnowflakeHook(snowflake_conn_id="test_snowflake_conn")

    with snowflake_query_tracker(database=DATABASE, schema=SCHEMA) as st:
        snowflake_tables = snowflake_hook.get_pandas_df(GET_COLUMNS)
        snowflake_shapes = DataFrame()
        snowflake_tables = snowflake_tables[snowflake_tables["schema_name"] ==
                                            "{}".format(SCHEMA)]

    snowflake_shapes["column_count"] = snowflake_tables.groupby(
        "table_name").nunique("column_name")["column_name"]
    snowflake_shapes["table_name"] = snowflake_tables["table_name"].unique()

    table_row_info = {}
    snowflake_rows = snowflake_hook.get_records(GET_DB_ROW_INFO)
    for tablename, row_count in snowflake_rows:
        table_row_info[tablename] = row_count

    row_counts = list(table_row_info.values())
    log_metric("Max table row count", max(row_counts))
    log_metric("Min table row count", min(row_counts))
    log_metric("Mean table row count", round(mean(row_counts), 2))
    log_metric("Median table row count", median(row_counts))

    snowflake_shapes["row_count"] = (snowflake_shapes["table_name"].map(
        table_row_info).fillna(0).astype(int))

    for _, row in snowflake_shapes.iterrows():
        log_metric(
            "{} shape".format(row["table_name"]),
            (row["column_count"], row["row_count"]),
        )

    log_metric("Max table column count",
               snowflake_shapes["column_count"].max())
    log_metric("Min table column count",
               snowflake_shapes["column_count"].max())
    log_metric("Mean table column count",
               round(snowflake_shapes["column_count"].mean(), 2))
    log_metric("Median table column count",
               snowflake_shapes["column_count"].median())