def find_task_rows(process_date: date, scheduler: TopicSnapshotScheduler, source_topic_schema: TopicSchema, source_topic_service: TopicDataService, principal_service: PrincipalService) -> List[int]: if scheduler.filter is None or scheduler.filter.filters is None or len( scheduler.filter.filters) == 0: rows = source_topic_service.find_distinct_values( None, [TopicDataColumnNames.ID.value], False) else: parsed_criteria = parse_condition_for_storage(scheduler.filter, [source_topic_schema], principal_service, True) variables = build_variables(process_date, scheduler.frequency) rows = source_topic_service.find_distinct_values( [parsed_criteria.run(variables, principal_service)], [TopicDataColumnNames.ID.value], False) return ArrayHelper(rows).map( lambda x: x.get(TopicDataColumnNames.ID.value)).to_list()
def run_retrieve_all_data_rules( data_service: TopicDataService, rules: List[MonitorRule], date_range: Tuple[datetime, datetime], changed_rows_count_in_range: int, total_rows_count: int) -> List[Tuple[MonitorRule, RuleResult]]: """ run rules which should retrieve all data, make sure pass-in rules are qualified, will not check them inside """ rules_by_factor = group_rules_by_factor(rules) factors = find_factors_and_log_missed(data_service, rules_by_factor) data_entity_helper = data_service.get_data_entity_helper() column_names = ArrayHelper(factors).map( lambda x: data_entity_helper.get_column_name(x.name)).to_list() rows = data_service.find_distinct_values( criteria=build_date_range_criteria(date_range), column_names=column_names, distinct_value_on_single_column=True) # deal with data # cast values to decimal since all rules are deal with numbers # value cannot be cast, will be treated as 0 def translate_to_array(data_rows: List[Dict[str, Any]], factor: Factor) -> List[List[Any]]: return ArrayHelper(data_rows) \ .map(lambda x: x.get(factor.name)) \ .map(lambda value: is_decimal(value)) \ .filter(lambda x: x[1] if x[0] else 0) \ .map(lambda x: [x]) \ .to_list() def run_rules(factor: Factor, data: List[Any]) -> List[Tuple[MonitorRule, RuleResult]]: concerned_rules = rules_by_factor.get(factor.factorId) if concerned_rules is None or len(concerned_rules) == 0: return [] def run_rule(rule: MonitorRule) -> Tuple[MonitorRule, RuleResult]: result = retrieve_all_data_rules_map[rule.code]( data_service, factor, data, rule, date_range, changed_rows_count_in_range, total_rows_count) return rule, result return ArrayHelper(concerned_rules).map(run_rule).to_list() return ArrayHelper(factors) \ .map(lambda x: (x, translate_to_array(rows, x))) \ .map(lambda x: run_rules(x[0], x[1])) \ .reduce(lambda all_results, x: [*all_results, *x], [])