def match_day_of_month(self) -> bool: sql = self.match_day_of_month_sql() conn = get_connection() cur = conn.cursor() cur.execute(sql) row = cur.fetchone() result = self.get_count_result(row) return result
def match_minute(self) -> bool: sql = self.match_minute_sql(0, 59) conn = get_connection() cur = conn.cursor() cur.execute(sql) row = cur.fetchone() result = self.get_count_result(row) return result
def match_factor_is_blank(self) -> Dict: sql = self.get_factor_is_blank_sql() log.info("factor is blank: {sql}".format(sql=sql)) conn = get_connection() cursor = conn.cursor() cursor.execute(sql) row = cursor.fetchone() result = self.get_count_result(row) return result
def match_string_length_not_in_range(self, min_, max_) -> Dict: sql = self.get_string_length_not_in_range_sql(min_, max_) log.info("string length not in range: {sql}".format(sql=sql)) conn = get_connection() cursor = conn.cursor() cursor.execute(sql) row = cursor.fetchone() result = self.get_count_result(row) return result
def match_string_length_mismatch(self, length) -> Dict: sql = self.get_string_length_mismatch_sql(length) log.info("string length mismatch: {sql}".format(sql=sql)) conn = get_connection() cursor = conn.cursor() cursor.execute(sql) row = cursor.fetchone() result = self.get_count_result(row) return result
def get_mismatch_enum(self): sql = self.get_mismatch_enum_sql() log.info(sql) conn = get_connection() cur = conn.cursor() cur.execute(sql) row = cur.fetchone() result = self.get_count_result(row) return result
def match_factor_not_in_range(self, min_, max_) -> Dict: query_sql = self.get_factor_not_in_range_sql(min_, max_) log.info("factor not in range: {sql}".format(sql=query_sql)) conn = get_connection() cursor = conn.cursor() cursor.execute(query_sql) row = cursor.fetchone() result = self.get_count_result(row) return result
def match_quantile_not_in_range(self) -> bool: query_sql = self.get_factor_data_sql() log.info("factor quantile not in range: {sql}".format(sql=query_sql)) conn = get_connection() cursor = conn.cursor() cursor.execute(query_sql) rows = cursor.fetchall() data_frame = self.get_data_frame(cursor, rows) quantile = data_frame[self.factor.name.lower()].quantile() return self.check_value_not_in_range(quantile)
def query_topic_data_count_by_datetime(topic, from_datetime, to_datetime, data_source): # topic_sql = "select count(*) from {0} ".format(__build_topic_name(topic["name"])) topic_sql = "select count(*) from {0} where update_time_ between timestamp '{1}' and timestamp '{2}'".format( __build_topic_name(topic.name, data_source), from_datetime.format('YYYY-MM-DD'), to_datetime.format('YYYY-MM-DD')) conn = get_connection() cur = conn.cursor() cur.execute(topic_sql) row = cur.fetchone() return row[0]
def match_date(self) -> bool: sql = self.match_date_sql() log.info(sql) conn = get_connection() cur = conn.cursor() cur.execute(sql) try: cur.fetchall() except TrinoUserError: return False else: return True
def match_rows_not_exists(self) -> Dict: sql = self.get_rows_not_exists_sql() log.info(sql) conn = get_connection() cur = conn.cursor() cur.execute(sql) row = cur.fetchone() result = {} if row: for index, value in enumerate(row): if index == 0: result["count"] = value return result
def get_count_value(self): sql = self.get_count_sql() log.info(sql) conn = get_connection() cur = conn.cursor() cur.execute(sql) row = cur.fetchone() result = {} if row: for index, value in enumerate(row): if index == 0: result["count"] = value return result
def get_match_count_and_total_count(self, pattern) -> Tuple: match_regexp_sql = self.get_factor_match_regexp_sql(pattern) log.info("factor match regexp: {sql}".format(sql=match_regexp_sql)) conn = get_connection() cursor = conn.cursor() cursor.execute(match_regexp_sql) match_count_row = cursor.fetchone() match_count_result = self.get_count_result(match_count_row) total_sql = self.get_count_sql() log.info("total sql: {sql}".format(sql=total_sql)) cursor.execute(total_sql) total_count_row = cursor.fetchone() total_count_result = self.get_count_result(total_count_row) return match_count_result["count"], total_count_result["count"]
def match_factor_match_another(self, filed1: str, field2: str) -> bool: match_another_sql = self.get_factor_and_another_sql(filed1, field2) log.info("factor and another: {sql}".format(sql=match_another_sql)) conn = get_connection() cursor = conn.cursor() cursor.execute(match_another_sql) match_count_row = cursor.fetchone() match_count_result = self.get_count_result(match_count_row) total_sql = self.get_count_sql() log.info("total sql: {sql}".format(sql=total_sql)) cursor.execute(total_sql) total_count_row = cursor.fetchone() total_count_result = self.get_count_result(total_count_row) return match_count_result["count"] == total_count_result["count"]
def match_rows_count_mismatch_and_another(self) -> bool: topic_count_sql = self.get_topic_count_sql() log.info("topic count sql: {sql}".format(sql=topic_count_sql)) conn = get_connection() cur = conn.cursor() cur.execute(topic_count_sql) topic_count_row = cur.fetchone() topic_count_result = self.get_count_result(topic_count_row) another_topic_count_sql = self.get_another_topic_count_sql() log.info("another topic count sql: {sql}".format(sql=another_topic_count_sql)) cur.execute(another_topic_count_sql) another_topic_count_row = cur.fetchone() another_topic_count_result = self.get_count_result(another_topic_count_row) return topic_count_result == another_topic_count_result
def match_rows_no_change(self): change_sql = self.get_rows_change_sql() log.info("rows change sql: {sql}".format(sql=change_sql)) conn = get_connection() cur = conn.cursor() cur.execute(change_sql) change_count_row = cur.fetchone() change_count_result = self.get_count_result(change_count_row) total_sql = self.get_total_count_sql() log.info("total sql: {sql}".format(sql=total_sql)) cur.execute(total_sql) total_count_row = cur.fetchone() total_count_result = self.get_count_result(total_count_row) return (change_count_result["count"] / total_count_result["count"]) * 100
def match_empty_over_coverage(self): change_sql = self.get_empty_sql() log.info("empty sql: {sql}".format(sql=change_sql)) conn = get_connection() cur = conn.cursor() cur.execute(change_sql) empty_count_row = cur.fetchone() empty_count_result = self.get_count_result(empty_count_row) total_sql = self.get_count_sql() log.info("total sql: {sql}".format(sql=total_sql)) cur.execute(total_sql) total_count_row = cur.fetchone() total_count_result = self.get_count_result(total_count_row) return (empty_count_result["count"] / total_count_result["count"]) * 100
def query_topic_data_by_datetime(topic_name, from_datetime, to_datetime, topic: Topic = None, data_source: DataSource = None): topic_sql = "select * from {0} where update_time_ between timestamp '{1}' and timestamp '{2}'".format( __build_topic_name(topic_name, data_source), from_datetime.format('YYYY-MM-DD'), to_datetime.format('YYYY-MM-DD')) conn = get_connection() cur = conn.cursor() cur.execute(topic_sql) rows = cur.fetchall() columns = list([desc[0] for desc in cur.description]) df = __build_data_frame(rows, columns) if topic is None: return df else: return convert_df_dtype(df, topic)
def match_use_cast(self) -> bool: sql = self.get_factor_use_cast_sql() log.info("factor use cast: {sql}".format(sql=sql)) conn = get_connection() cur = conn.cursor() cur.execute(sql) rows = cur.fetchall() if rows: columns = list([desc[0] for desc in cur.description]) data_type = self.get_factor_data_type() data_frame = build_data_frame(rows, columns, data_type) df_series = data_frame[self.factor.name.lower()] if df_series_is_str(df_series): result = [ df_series.str.isnumeric().all(), df_series.str.isdecimal().all() ] return True in result else: return False else: return False
def query_rule_results_by_datetime(criteria, data_source, tenant_id): topic_sql = generate_monitor_log_query(criteria, data_source, tenant_id) conn = get_connection() cur = conn.cursor() cur.execute(topic_sql) rows = cur.fetchall() columns = list([desc[0] for desc in cur.description]) df = __build_data_frame(rows, columns) rule_results = [] for row in df.itertuples(index=True, name='Pandas'): rule_log = MonitorRuleLog() rule_log.count = float(row.count) if criteria.ruleCode: rule_log.topicId = row.topicid if criteria.topicId: rule_log.factorId = row.factorid rule_log.ruleCode = row.rulecode rule_results.append(rule_log) return rule_results