def rows_not_exists(df: DataFrame, topic, rule: MonitorRule): execute_result = init_topic_rule_result(rule, topic) if table_not_exist(df) or data_is_empty(df): execute_result.topicResult.result = True else: execute_result.topicResult.result = False return execute_result
def rows_no_change(df: DataFrame, topic, rule: MonitorRule): if table_not_exist(df) or data_is_empty(df): return None else: execute_result = init_topic_rule_result(rule, topic) statistical_interval = rule.params.statisticalInterval # TODO date range start_date, end_date = get_date_range(statistical_interval) prior_start_date, prior_end_date = get_date_range_with_end_date( statistical_interval, start_date) coverage_rate = rule.params.coverageRate if coverage_rate is None: raise ValueError("coverage rate is None") current_count = len(df.index) data_source: DataSource = get_datasource_by_id(topic.dataSourceId) prior_count = query_topic_data_count_by_datetime( topic, prior_start_date, prior_end_date, data_source) if current_count <= prior_count * (coverage_rate / 100): execute_result.topicResult.result = True else: execute_result.topicResult.result = False return execute_result
def factor_use_cast(df: DataFrame, topic, rule: MonitorRule): if table_not_exist(df) or data_is_empty(df): return None else: factor_rule_result_list = [] execute_result = RuleExecuteResult() execute_result.ruleType = FACTOR_RULE factor_filtered = get_execute_factor_list(rule, topic) for factor in factor_filtered: factor_rule_result = init_factor_rule_result( rule, topic, factor) value = df[factor["name"].lower()] factor_rule_result.result = check_use_cast(value, rule, factor) factor_rule_result_list.append(factor_rule_result) execute_result.factorResult = factor_rule_result_list return execute_result
def rows_count_mismatch_and_another(df: DataFrame, topic, rule: MonitorRule): execute_result = init_topic_rule_result(rule, topic) if table_not_exist(df) or data_is_empty(df): return None start_date, end_date = get_date_range(rule.params.statisticalInterval) topic_id = rule.params.topicId another_topic = get_topic_by_id(topic_id) data_source: DataSource = get_datasource_by_id(another_topic.dataSourceId) current_count = len(df.index) prior_count = query_topic_data_count_by_datetime(another_topic, start_date, end_date, data_source) if current_count != prior_count: execute_result.topicResult.result = True else: execute_result.topicResult.result = False return execute_result
def factor_mismatch_type(df: DataFrame, topic: dict, rule: MonitorRule): if table_not_exist(df) or data_is_empty(df): return None else: factor_rule_result_list: List = [] # columns = df.columns factor_list = get_execute_factor_list(rule, topic) execute_result = RuleExecuteResult() execute_result.ruleType = FACTOR_RULE for factor in factor_list: factor_rule_result = init_factor_rule_result( rule, topic, factor) factor_type = factor["type"] factor_name = factor["name"].lower() if factor_name in df.columns: value = df[factor["name"].lower()] factor_rule_result.result = not check_value_match_type( value, factor_type) factor_rule_result.params[factor["name"]] = "mismatch type" factor_rule_result_list.append(factor_rule_result) execute_result.factorResult = factor_rule_result_list return execute_result