def merge_or_insert_topic(instance, context): raw_data, old_value = instance[pipeline_constants.NEW], instance[ pipeline_constants.OLD] unit_action_status = UnitStatus(type=action.type) start = time.time() pipeline_uid = context[PIPELINE_UID] unit_action_status.uid = pipeline_uid if action.topicId is None: raise ValueError("action.topicId is empty {0}".format(action.name)) target_topic = get_topic_by_id(action.topicId) mapping_results, mapping_logs = run_mapping_rules( action.mapping, target_topic, raw_data, pipeline_topic) joint_type, where_condition = build_query_conditions( action.by, pipeline_topic, raw_data, target_topic, context) for index in range(len(mapping_results)): mongo_query = __build_mongo_query(joint_type, where_condition) target_data = query_topic_data(mongo_query, target_topic.name) if target_data is None: insert_topic_data(target_topic.name, mapping_results[index], pipeline_uid) unit_action_status.insertCount = unit_action_status.insertCount + 1 else: update_topic_data(target_topic.name, mapping_results[index], target_data, pipeline_uid) unit_action_status.updateCount = unit_action_status.updateCount + 1 unit_action_status.mapping = mapping_logs elapsed_time = time.time() - start unit_action_status.complete_time = elapsed_time return context, unit_action_status
def read_factor(instance, context): raw_data, old_value = instance[pipeline_constants.NEW], instance[pipeline_constants.OLD] unit_action_status = ReadFactorAction(type=action.type) start = time.time() # print("context",context) variable_type, context_target_name = process_variable(action.variableName) topic = get_topic_by_id(action.topicId) factor = get_factor(action.factorId, topic) joint_type, where_condition = build_query_conditions(action.by, pipeline_topic, raw_data, topic, context) mongo_query = __build_mongo_query(joint_type, where_condition) target_data = query_topic_data(mongo_query, topic.name) if target_data is not None: if factor.name in target_data: read_value = target_data[factor.name] if factor.name in context: log.warn("factor name {0} is already in context".format(factor.name)) context[context_target_name] = target_data[factor.name] unit_action_status.value = read_value else: context[context_target_name] = convert_factor_type(factor.defaultValue, factor.type) log.warn("target_data is empty ,conditions {0}".format(mongo_query)) elapsed_time = time.time() - start unit_action_status.complete_time = elapsed_time # print("read context",context) return context, unit_action_status, []
def merge_topic(): # begin time start = time.time() # create action status monitor status = ActionStatus() status.type = "merge-row" status.uid = action_context.get_pipeline_id() previous_data = action_context.previousOfTriggerData current_data = action_context.currentOfTriggerData action = action_context.action if action.topicId is None: raise ValueError("action.topicId is empty {0}".format( action.topicId)) pipeline_topic = action_context.unitContext.stageContext.pipelineContext.pipelineTopic target_topic = get_topic_by_id(action.topicId) variables = get_variables(action_context) # if there are aggregate functions, need lock the record to update mappings_results, having_aggregate_functions = parse_mappings( action.mapping, target_topic, previous_data, current_data, variables) status.value = mappings_results where_ = parse_parameter_joint(action.by, current_data, variables, pipeline_topic, target_topic) status.by = where_ trigger_pipeline_data_list = [] target_data = query_topic_data(where_, target_topic, action_context.get_current_user()) if target_data is None: raise Exception("can't insert data in merge row action ") else: if target_topic.type == "aggregate": args = [ mappings_results, where_, target_topic, action_context.get_current_user() ] retry_callback = (update_retry_callback, args) recovery_callback = (update_recovery_callback, args) execute_ = retry_template(retry_callback, recovery_callback, RetryPolicy()) result = execute_() trigger_pipeline_data_list.append(result) else: trigger_pipeline_data_list.append( update_topic_data_one(mappings_results, target_data, action_context.get_pipeline_id(), target_data[get_id_name()], target_topic, action_context.get_current_user())) status.updateCount = status.updateCount + 1 elapsed_time = time.time() - start status.completeTime = elapsed_time return status, trigger_pipeline_data_list
def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus(pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), start_time=datetime.now()) if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) # TODO pipeline when condition log.info("start run pipeline {0}".format(pipeline.name)) context = {PIPELINE_UID: pipeline_status.uid} try: start = time.time() for stage in pipeline.stages: stage_run_status = StageRunStatus() stage_run_status.name = stage.name log.info("stage name {0}".format(stage.name)) for unit in stage.units: # TODO __check_when_condition # if unit.on is not None: # result = __check_when_condition(unit.on.children, data) # if result: # continue if unit.do is not None: unit_run_status = UnitRunStatus() for action in unit.do: func = find_action_type_func( convert_action_type(action.type), action, pipeline_topic) # call dynamic action in action folder # TODO [future] custom folder out_result, unit_action_status = func( data, context) log.debug("out_result :{0}".format(out_result)) context = {**context, **out_result} unit_run_status.actions.append(unit_action_status) stage_run_status.units.append(unit_run_status) else: log.info("action stage unit {0} do is None".format( stage.name)) elapsed_time = time.time() - start pipeline_status.stages.append(stage_run_status) pipeline_status.complete_time = elapsed_time pipeline_status.status = FINISHED log.info("pipeline_status {0} time :{1}".format( pipeline.name, elapsed_time)) except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: # log.info("insert_pipeline_monitor") if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: log.info("pipeline_status is {0}".format(pipeline_status)) else: sync_pipeline_monitor_data(pipeline_status)
def insert_or_update(action, already_see, pipeline_graph, pipeline_node): topic = get_topic_by_id(action.topicId) topic_node = buildTopicNode(topic) if topic_node.id in already_see: pass else: pipeline_graph.nodes.append(topic_node) already_see.append(topic_node.id) relationship_properties = buildRelationShipProperties({"type": "pipeline_to_topic"}) relationship = buildRelationShip(pipeline_node, topic_node, relationship_properties) pipeline_graph = add_edge(pipeline_graph, relationship) for map_ in action.mapping: factor = getFactorFromTopic(topic, map_.factorId) if factor is None: continue factor_node = buildFactorNode(factor) if factor_node.name == "changeId": pass else: if factor_node.id in already_see: pass else: pipeline_graph.nodes.append(factor_node) already_see.append(factor_node) relationship_properties = buildRelationShipProperties({"type": "pipeline_to_factor"}) relationship = buildRelationShip(pipeline_node, factor_node, relationship_properties) pipeline_graph = add_edge(pipeline_graph, relationship)
async def import_topic(topic: Topic): result = get_topic_by_id(topic.topicId) if result is None: # import_topic_to_db(topic) create_topic_schema(topic) else: update_topic_schema(topic.topicId, topic)
def insert_topic(instance, context): raw_data, old_value = instance[pipeline_constants.NEW], instance[ pipeline_constants.OLD] unit_action_status = InsertAction(type=action.type) start = time.time() pipeline_uid = context[PIPELINE_UID] unit_action_status.uid = pipeline_uid # print(json.loads(raw_data)) if action.topicId is None: raise ValueError("action.topicId is empty {0}".format(action.name)) target_topic = get_topic_by_id(action.topicId) log.info("run target_topic {0}".format(target_topic.name)) mapping_results = run_mapping_rules(action.mapping, target_topic, raw_data, pipeline_topic, context) log.info("mapping_results:{0}".format(mapping_results)) unit_action_status.mapping = mapping_results trigger_pipeline_data_list = [] for index, item in enumerate(mapping_results): trigger_pipeline_data_list.append( insert_topic_data(target_topic.name, item, pipeline_uid)) unit_action_status.insertCount = unit_action_status.insertCount + 1 elapsed_time = time.time() - start unit_action_status.complete_time = elapsed_time return context, unit_action_status, trigger_pipeline_data_list
def merge_topic(instance, context): raw_data, old_value = instance[pipeline_constants.NEW], instance[pipeline_constants.OLD] # unit_action_status = UnitActionStatus(type=action.type) unit_action_status = MergeRowAction(type=action.type) start = time.time() pipeline_uid = context[PIPELINE_UID] unit_action_status.uid = pipeline_uid if action.topicId is None: raise ValueError("action.topicId is empty {0}".format(action.name)) target_topic = get_topic_by_id(action.topicId) mapping_results = run_mapping_rules(action.mapping, target_topic, raw_data, pipeline_topic, context) joint_type, where_condition = build_query_conditions(action.by, pipeline_topic, raw_data, target_topic, context) unit_action_status.whereConditions = where_condition unit_action_status.mapping = mapping_results trigger_pipeline_data_list = [] for index, mapping_result in enumerate(mapping_results): mongo_query = __build_mongo_query(joint_type, index_conditions(where_condition, index)) target_data = query_topic_data(mongo_query, target_topic.name) if target_data is None: raise Exception("can't insert data in merge row action ") else: trigger_pipeline_data_list.append( update_topic_data(target_topic.name, mapping_result, target_data, pipeline_uid, mongo_query)) unit_action_status.updateCount = unit_action_status.updateCount + 1 elapsed_time = time.time() - start unit_action_status.complete_time = elapsed_time return context, unit_action_status, trigger_pipeline_data_list
def exists(): # begin time start = time.time() # create action status monitor status = ActionStatus() status.type = "exists" status.uid = action_context.get_pipeline_id() previous_data = action_context.previousOfTriggerData current_data = action_context.currentOfTriggerData action = action_context.action pipeline_topic = action_context.get_pipeline_context().pipelineTopic target_topic = get_topic_by_id(action.topicId) variables = get_variables(action_context) where_ = parse_parameter_joint(action.by, current_data, variables, pipeline_topic, target_topic) status.by = where_ target_data = query_topic_data(where_, target_topic, action_context.get_current_user()) if target_data is not None: set_variable(action_context, action.variableName, 'true') else: set_variable(action_context, action.variableName, 'false') elapsed_time = time.time() - start status.completeTime = elapsed_time return status, []
def get_topic_sub_query_with_space_filter(console_subject, current_user): console_space = load_console_space_by_subject_id(console_subject.subjectId, current_user) filters: List[SpaceFilter] = get_filters_by_id(console_space.spaceId, current_user) if filters is None: filters = [] topic_sub_query = {} for filter in filters: if filter.enabled: topic = get_topic_by_id(filter.topicId) table = build_table_by_topic_id(filter.topicId) sub_query = PrestoQuery. \ from_(table). \ select('*'). \ where(build_space_filter_where(filter.joint)) # where(table.tenant_id_ == current_user.tenantId). \ topic_sub_query[filter.topicId] = { "alias": topic.name, "query": sub_query } def get_topic_sub_query_by_topic_id(topic_id): return topic_sub_query.get(topic_id, None) return get_topic_sub_query_by_topic_id
def insert_topic(instance, context): raw_data, old_value = instance[pipeline_constants.NEW], instance[ pipeline_constants.OLD] unit_action_status = UnitStatus(type=action.type) start = time.time() pipeline_uid = context[PIPELINE_UID] unit_action_status.uid = pipeline_uid if action.topicId is None: raise ValueError("action.topicId is empty {0}".format(action.name)) insert_action = InsertAction() target_topic = get_topic_by_id(action.topicId) mapping_results, mapping_logs = run_mapping_rules( action.mapping, target_topic, raw_data, pipeline_topic) # for i enumerate(mapping_results) for count, item in enumerate(mapping_results): # print(count, item) insert_topic_data(target_topic.name, item, pipeline_uid) unit_action_status.insertCount = unit_action_status.insertCount + 1 # for index in range(len(mapping_results)): insert_action.mapping = mapping_logs # unit_action_status.mapping = mapping_logs elapsed_time = time.time() - start unit_action_status.action = insert_action unit_action_status.complete_time = elapsed_time return context, unit_action_status
def parse_parameter(parameter_: Parameter): if parameter_.kind == "topic": topic = get_topic_by_id(parameter_.topicId) topic_name = build_collection_name(topic.name) factor = get_factor(parameter_.factorId, topic) factor_name = factor.name return f'{factor_name.upper()}' elif parameter_.kind == 'constant': return parameter_.value elif parameter_.kind == 'computed': if parameter_.type == Operator.add: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}+{next_}' else: result = parse_parameter(item) return result elif parameter_.type == Operator.subtract: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}-{next_}' else: result = parse_parameter(item) return result elif parameter_.type == Operator.multiply: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}*{next_}' else: result = parse_parameter(item) return result elif parameter_.type == Operator.divide: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}/{next_}' else: result = parse_parameter(item) return result elif parameter_.type == Operator.modulus: result = None for item in parameter_.parameters: if result: next_ = parse_parameter(item) result = f'{result}%{next_}' else: result = parse_parameter(item) return result elif parameter_.type == "case-then": return parse_oracle_case_then(parameter_.parameters) else: raise Exception("operator is not supported")
def build_table_by_topic_id(topic_id) -> Table: topic = get_topic_by_id(topic_id) topic_col_name = build_collection_name(topic.name) datasource: DataSource = load_data_source_by_id(topic.dataSourceId) catalog_name = datasource.dataSourceCode schema_name = datasource.name schema = Schema(schema_name, LiteralValue(catalog_name)) return Table(topic_col_name, schema)
def read_factor(): # begin time start = time.time() # create action status monitor status = ActionStatus() status.type = "read-factor" status.uid = action_context.unitContext.stageContext.pipelineContext.pipeline.pipelineId previous_data = action_context.previousOfTriggerData current_data = action_context.currentOfTriggerData action = action_context.action pipeline_topic = action_context.unitContext.stageContext.pipelineContext.pipelineTopic target_topic = get_topic_by_id(action.topicId) variables = get_variables(action_context) where_ = parse_parameter_joint(action.by, current_data, variables, pipeline_topic, target_topic) status.by = where_ target_factor = get_factor(action.factorId, target_topic) if action.arithmetic == "none" or action.arithmetic is None: target_data = query_topic_data(where_, target_topic, action_context.get_current_user()) if target_data is not None: if isinstance(target_data, list): raise ValueError("read factor action should just get one factor record") else: read_value = target_data[target_factor.name] set_variable(action_context, action.variableName, read_value) status.value = read_value else: raise ValueError("read factor action must match one factor record") else: read_value = None if action.arithmetic == "sum": read_value = query_topic_data_aggregate(where_, {target_factor.name: "sum"}, target_topic, action_context.get_current_user()) elif action.arithmetic == "count": read_value = query_topic_data_aggregate(where_, {target_factor.name: "count"}, target_topic, action_context.get_current_user()) elif action.arithmetic == "avg": read_value = query_topic_data_aggregate(where_, {target_factor.name: "avg"}, target_topic, action_context.get_current_user()) if read_value is not None: set_variable(action_context, action.variableName, read_value) else: raise ValueError("read factor action must match one factor record at least") status.value = read_value elapsed_time = time.time() - start status.completeTime = elapsed_time return status, []
def __get_factor_name_by_alias(column_name_list, console_subject): factor_name_list = [] for column_name in column_name_list: column = __find_column_by_alias(column_name, console_subject.dataset.columns) factor = get_factor(column.parameter.factorId, get_topic_by_id(column.parameter.topicId)) factor_name_list.append(factor.name) return factor_name_list
def write_factor(instance, context): raw_data, old_value = instance[pipeline_constants.NEW], instance[ pipeline_constants.OLD] unit_action_status = WriteFactorAction(type=action.type) start = time.time() if action.topicId is not None: target_topic = get_topic_by_id(action.topicId) # todo for find factor # factor_dict = build_factor_dict(target_topic) conditions = action.by joint_type, where_condition = build_query_conditions( conditions, pipeline_topic, raw_data, target_topic, context) target_factor = get_factor(action.factorId, target_topic) # if action.source.kind source_value_list = __run_arithmetic( action.arithmetic, get_source_value_list(pipeline_topic, raw_data, action.source, target_factor, context)) update_data = {target_factor.name: source_value_list} mongo_query = __build_mongo_query(joint_type, where_condition) condition_factors = { "$set": get_condition_factor_value(raw_data, where_condition, joint_type) } trigger_pipeline_data_list = [] target_data = query_topic_data(mongo_query, target_topic.name) if old_value is not None: old_value_list = get_source_value_list(pipeline_topic, old_value, action.source, target_factor, context) trigger_pipeline_data_list.append( find_and_modify_topic_data( target_topic.name, mongo_query, __merge_condition_factor( __build_mongo_update(update_data, action.arithmetic, target_factor, old_value_list), condition_factors), target_data)) else: trigger_pipeline_data_list.append( find_and_modify_topic_data( target_topic.name, mongo_query, __merge_condition_factor( __build_mongo_update(update_data, action.arithmetic, target_factor), condition_factors), target_data)) elapsed_time = time.time() - start unit_action_status.complete_time = elapsed_time return context, unit_action_status, trigger_pipeline_data_list
def write_factor(instance, context): raw_data, old_value = instance[pipeline_constants.NEW], instance[ pipeline_constants.OLD] unit_action_status = UnitStatus(type=action.type) start = time.time() pipeline_uid = context[PIPELINE_UID] # TODO action_log # action_log = WriteFactorAction() if action.topicId is not None: target_topic = get_topic_by_id(action.topicId) # todo for find factor factor_dict = build_factor_dict(target_topic) conditions = action.by joint_type, where_condition = build_query_conditions( conditions, pipeline_topic, raw_data, target_topic, context) source_value_list = get_source_value_list(pipeline_topic, raw_data, action.source) target_factor = get_factor(action.factorId, target_topic) update_data = {target_factor.name: source_value_list} mongo_query = __build_mongo_query(joint_type, where_condition) target_data = query_topic_data(mongo_query, target_topic.name) if target_data is None: condition_factors = get_condition_factor_value( raw_data, where_condition, joint_type) insert_data = { **{ target_factor.name: source_value_list }, **condition_factors } log.info("Insert data : {0}".format(insert_data)) insert_topic_data(target_topic.name, insert_data, pipeline_uid) else: if old_value is not None: old_value_list = get_source_value_list( pipeline_topic, old_value, action.source) # def_value_list = source_value_list - old_value_list # update_data = {target_factor.name: def_value_list} find_and_modify_topic_data( target_topic.name, mongo_query, __build_mongo_update(update_data, action.arithmetic, target_factor, old_value_list), target_data) else: find_and_modify_topic_data( target_topic.name, mongo_query, __build_mongo_update(update_data, action.arithmetic, target_factor, None), target_data) elapsed_time = time.time() - start unit_action_status.complete_time = elapsed_time return context, unit_action_status
def parse_parameter(parameter: Parameter, factor=None): if parameter.kind == "topic": topic = get_topic_by_id(parameter.topicId) topic_col_name = build_collection_name(topic.name) factor = get_factor(parameter.factorId, topic) return Table(topic_col_name)[factor.name] elif parameter.kind == 'constant': # if factor.type =="text": # return "\'"+parameter.value+"\'" # else: return parameter.value elif parameter.kind == 'computed': if parameter.type == Operator.add: result = None for item in parameter.parameters: if result: result = operator.add(result, parse_parameter(item)) else: result = parse_parameter(item) return result elif parameter.type == Operator.subtract: result = None for item in parameter.parameters: if result: result = operator.sub(result, parse_parameter(item)) else: result = parse_parameter(item) return result elif parameter.type == Operator.multiply: result = None for item in parameter.parameters: if result: result = operator.mul(result, parse_parameter(item)) else: result = parse_parameter(item) return result elif parameter.type == Operator.divide: result = None for item in parameter.parameters: if result: result = operator.truediv(result, parse_parameter(item)) else: result = parse_parameter(item) return result elif parameter.type == Operator.modulus: result = None for item in parameter.parameters: if result: result = operator.mod(result, parse_parameter(item)) else: result = parse_parameter(item) return result else: # TODO more operator support raise Exception("operator is not supported")
def buildPipelineGraph(pipeline, already_see, pipeline_graph): pipeline_node = buildPipelineNode(pipeline) pipeline_graph.nodes.append(pipeline_node) if get_topic_by_id(pipeline.topicId) is None: return source_topic_node = buildTopicNode(get_topic_by_id(pipeline.topicId)) if source_topic_node.id in already_see: pass else: pipeline_graph.nodes.append(source_topic_node) already_see.append(source_topic_node.id) relationship_properties = buildRelationShipProperties({"type": "topic_to_pipeline"}) relationship = buildRelationShip(source_topic_node, pipeline_node, relationship_properties) pipeline_graph = add_edge(pipeline_graph, relationship) for stage in pipeline.stages: for unit in stage.units: for action in unit.do: if action.type == pipeline_constants.INSERT_OR_MERGE_ROW: insert_or_update(action, already_see, pipeline_graph, pipeline_node) elif action.type == pipeline_constants.COPY_TO_MEMORY: pass elif action.type == pipeline_constants.INSERT_ROW: insert_or_update(action, already_see, pipeline_graph, pipeline_node) elif action.type == pipeline_constants.WRITE_FACTOR: insert_or_update(action, already_see, pipeline_graph, pipeline_node) elif action.type == pipeline_constants.MERGE_ROW: insert_or_update(action, already_see, pipeline_graph, pipeline_node) elif action.type == pipeline_constants.READ_ROW: read_(action, already_see, pipeline_graph, pipeline_node) elif action.type == pipeline_constants.READ_ROWS: read_(action, already_see, pipeline_graph, pipeline_node) elif action.type == pipeline_constants.READ_FACTORS: read_(action, already_see, pipeline_graph, pipeline_node) elif action.type == pipeline_constants.READ_FACTOR: read_(action, already_see, pipeline_graph, pipeline_node) elif action.type == pipeline_constants.EXIST: pass elif action.type == pipeline_constants.ALARM: pass else: raise ("action not support:" + action.type)
def _filter_criterion(filter: Filter) -> any: left = parse_parameter(filter.left) topic = get_topic_by_id(filter.left.topicId) factor = get_factor(filter.left.factorId, topic) # print("left", left) # print("type",type(left)) right = parse_parameter(filter.right,factor) # print("right", right) # print("type", type(right)) if filter.operator == "equals": # if right.isdigit(): # return operator.eq(left, int(right)) # else: return operator.eq(left, right) elif filter.operator == "not-equals": if right.isdigit(): return left.__ne__(int(right)) else: return left.__ne__(right) elif filter.operator == 'empty': return left.isnull() elif filter.operator == 'not-empty': return left.notnull() elif filter.operator == "more": return operator.gt(left, int(right)) elif filter.operator == "more-equals": return operator.ge(left, int(right)) elif filter.operator == "less": return operator.lt(left, int(right)) elif filter.operator == "less-equals": return operator.le(left, int(right)) elif filter.operator == 'in': value_list = right.split(',') values: List = [] for value in value_list: if value.isdigit(): values.append(int(value)) else: values.append(value) return left.isin(values) elif filter.operator == 'not-in': value_list = right.split(',') values: List = [] for value in value_list: if value.isdigit(): values.append(int(value)) else: values.append(value) return left.notin(values) else: # TODO more operator support raise Exception("operator is not supported")
def _parse_parameter(parameter_: Parameter): if parameter_.kind == "topic": topic = get_topic_by_id(parameter_.topicId) # topic_name = build_collection_name(topic.name) factor = get_factor(parameter_.factorId, topic) return f'${factor.name}' elif parameter_.kind == 'constant': return parameter_.value elif parameter_.kind == 'computed': if parameter_.type == Operator.add: result = None for item in parameter_.parameters: if result: result = {"$add": [result, _parse_parameter(item)]} else: result = _parse_parameter(item) return result elif parameter_.type == Operator.subtract: result = None for item in parameter_.parameters: if result: result = {"$subtract": [result, _parse_parameter(item)]} else: result = _parse_parameter(item) return result elif parameter_.type == Operator.multiply: result = None for item in parameter_.parameters: if result: result = {"$multiply": [result, _parse_parameter(item)]} else: result = _parse_parameter(item) return result elif parameter_.type == Operator.divide: result = None for item in parameter_.parameters: if result: result = {"$divide": [result, _parse_parameter(item)]} else: result = _parse_parameter(item) return result elif parameter_.type == Operator.modulus: result = None for item in parameter_.parameters: if result: result = {"$mod": [result, _parse_parameter(item)]} else: result = _parse_parameter(item) return result elif parameter_.type == "case-then": return parse_mongo_case_then(parameter_.parameters) else: raise Exception("operator is not supported")
async def build_pipeline_index_list(pipeline: Pipeline, pipeline_index_dict, current_user): source_topic = get_topic_by_id(pipeline.topicId, current_user) pipeline_index_list = [] temporary_context_dict = {} for pipeline_stage in pipeline.stages: for pipeline_unit in pipeline_stage.units: for action in pipeline_unit.do: result_dict = __process_by_action_type(action, pipeline, pipeline_stage, pipeline_unit, pipeline_index_dict, temporary_context_dict, current_user) if result_dict: for index in result_dict.values(): pipeline_index_list.append(index) return pipeline_index_list
def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus(pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), startTime=datetime.now(), topicId=pipeline.pipelineId) pipeline_status.oldValue = data[pipeline_constants.OLD] pipeline_status.newValue = data[pipeline_constants.NEW] if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) log.info("start run pipeline {0}".format(pipeline.name)) context = {PIPELINE_UID: pipeline_status.uid} if __check_condition(pipeline, pipeline_topic, data): try: start = time.time() for stage in pipeline.stages: if __check_condition(stage, pipeline_topic, data): stage_run_status = StageRunStatus() stage_run_status.name = stage.name log.info("stage name {0}".format(stage.name)) for unit in stage.units: if unit.do is not None and __check_condition(unit, pipeline_topic, data): unit_run_status = UnitRunStatus() for action in unit.do: func = find_action_type_func(convert_action_type(action.type), action, pipeline_topic) # call dynamic action in action folder # TODO [future] custom folder out_result, unit_action_status = func(data, context) log.debug("out_result :{0}".format(out_result)) context = {**context, **out_result} unit_run_status.actions.append(unit_action_status) stage_run_status.units.append(unit_run_status) else: log.info("action stage unit {0} do is None".format(stage.name)) pipeline_status.stages.append(stage_run_status) elapsed_time = time.time() - start pipeline_status.completeTime = elapsed_time pipeline_status.status = FINISHED log.info("pipeline_status {0} time :{1}".format(pipeline.name, elapsed_time)) except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: log.debug("pipeline_status is {0}".format(pipeline_status)) else: print("sync pipeline monitor") watchmen.monitor.services.pipeline_monitor_service.sync_pipeline_monitor_data(pipeline_status)
def _join(q: QueryBuilder, join: Join) -> QueryBuilder: # left topic = get_topic_by_id(join.topicId) topic_col_name = build_collection_name(topic.name) factor = get_factor(join.factorId, topic) left_table = Table(topic_col_name).as_(topic.name) # right sec_topic = get_topic_by_id(join.secondaryTopicId) sec_topic_col_name = build_collection_name(sec_topic.name) sec_factor = get_factor(join.secondaryFactorId, sec_topic) right_table = Table(sec_topic_col_name).as_(sec_topic.name) if join.type == JoinType.inner: return q.join(right_table, JoinType.inner).on( operator.eq(left_table[factor.name], right_table[sec_factor.name])) if join.type == JoinType.left: return q.join(right_table, JoinType.left).on( operator.eq(left_table[factor.name], right_table[sec_factor.name])) if join.type == JoinType.right: return q.join(right_table, JoinType.right).on( operator.eq(left_table[factor.name], right_table[sec_factor.name]))
def __add_mapping_to_pipeline_index(mapping_factor, pipeline_index, pipeline_index_dict): topic: Topic = get_topic_by_id(mapping_factor.source.topicId) if topic is not None and topic.type != RAW: new_pipeline_index = __get_pipeline_index_in_dict(mapping_factor.source.topicId, mapping_factor.source.factorId, pipeline_index.pipelineId, pipeline_index_dict) new_pipeline_index.tenantId = pipeline_index.tenantId new_pipeline_index.pipelineName = pipeline_index.pipelineName new_pipeline_index.stageId = pipeline_index.stageId new_pipeline_index.actionId = pipeline_index.actionId new_pipeline_index.stageName = pipeline_index.stageName new_pipeline_index.unitName = pipeline_index.unitName new_pipeline_index.unitId = pipeline_index.unitId new_pipeline_index.refType = MAPPING_TO new_pipeline_index.mappingToTopicId = pipeline_index.topicId new_pipeline_index.mappingToFactorId = pipeline_index.factorId
def read_row(instance, context): raw_data, old_value = instance[pipeline_constants.NEW], instance[pipeline_constants.OLD] unit_action_status = UnitActionStatus(type=action.type) start = time.time() variable_type, context_target_name = process_variable(action.variableName) topic = get_topic_by_id(action.topicId) joint_type, where_condition = build_query_conditions(action.by, pipeline_topic, raw_data, topic, context) mongo_query = __build_mongo_query(joint_type, where_condition) target_data = query_topic_data(mongo_query, topic.name) if target_data is not None: context[context_target_name] = target_data elapsed_time = time.time() - start unit_action_status.complete_time = elapsed_time return context, unit_action_status, []
def topic_handler_in_dataset( self) -> Tuple[Union[Field, CustomFunction], ParameterValueType]: param = self.param topic_id = param.topicId factor_id = param.factorId topic = get_topic_by_id(topic_id) table = None if self.topic_space_filter: if self.topic_space_filter(param.topicId): alias_ = self.topic_space_filter(param.topicId)["alias"] table = AliasedQuery(alias_) if table is None: table = build_table_by_topic_id(topic_id) factor = get_factor(factor_id, topic) result = Field(factor.name, None, table) value_type = factor.type return result, value_type
def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus( pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), startTime=datetime.now().replace(tzinfo=None), topicId=pipeline.pipelineId) pipeline_status.oldValue = data[pipeline_constants.OLD] pipeline_status.newValue = data[pipeline_constants.NEW] if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) log.info("start run pipeline {0}".format(pipeline.name)) context = {PIPELINE_UID: pipeline_status.uid} start = time.time() if __check_condition(pipeline, pipeline_topic, data, context): try: pipeline_trigger_merge_list = [] for stage in pipeline.stages: if __check_condition(stage, pipeline_topic, data, context): stage_run_status = StageRunStatus(name=stage.name) log.info("stage name {0}".format(stage.name)) context, pipeline_trigger_merge_list = run_unit( context, data, pipeline_status, pipeline_topic, pipeline_trigger_merge_list, stage, stage_run_status) elapsed_time = time.time() - start pipeline_status.completeTime = elapsed_time pipeline_status.status = FINISHED log.debug("pipeline_status {0} time :{1}".format( pipeline.name, elapsed_time)) if pipeline_topic.kind is None or pipeline_topic.kind != pipeline_constants.SYSTEM: __trigger_all_pipeline(pipeline_trigger_merge_list) except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: log.debug("pipeline_status is {0}".format(pipeline_status)) else: watchmen.monitor.services.pipeline_monitor_service.sync_pipeline_monitor_data( pipeline_status)
def parse_parameter(parameter: Parameter, factor=None): if parameter.kind == "topic": topic = get_topic_by_id(parameter.topicId) topic_col_name = build_collection_name(topic.name) factor = get_factor(parameter.factorId, topic) result = { 'type': factor.type, 'value': Table(topic_col_name).as_(topic.name)[factor.name] } return result elif parameter.kind == 'constant': if parameter.value.strip().startswith("{&monthDiff"): value_ = parameter.value.strip() args_str = value_.replace("{&monthDiff(", "").replace(")}", "") expr = _date_diff("month", args_str) result = {"type": "number", "value": expr} return result elif parameter.value.strip().startswith("{&dayDiff"): value_ = parameter.value.strip() args_str = value_.replace("{&dayDiff(", "").replace(")}", "") expr = _date_diff("day", args_str) result = {"type": "number", "value": expr} return result elif parameter.value.strip().startswith("{&yearDiff"): value_ = parameter.value.strip() args_str = value_.replace("{&yearDiff(", "").replace(")}", "") expr = _date_diff("year", args_str) result = {"type": "number", "value": expr} return result else: result = {'type': "text", 'value': parameter.value} return result elif parameter.kind == 'computed': result = None left = None for item in parameter.parameters: if left: right = parse_parameter(item) return _arithmetic_process(parameter.type, left, right) else: left = parse_parameter(item) return result
def read_factor(instance, context): raw_data, old_value = instance[pipeline_constants.NEW], instance[pipeline_constants.OLD] unit_action_status = UnitActionStatus(type=action.type) start = time.time() variable_type, context_target_name = process_variable(action.variableName) topic = get_topic_by_id(action.topicId) factor = get_factor(action.factorId, topic) joint_type, where_condition = build_query_conditions(action.by, pipeline_topic, raw_data, topic, context) mongo_query = __build_mongo_query(joint_type, where_condition) target_data = query_topic_data(mongo_query, topic.name) if factor.name in target_data: read_value = target_data[factor.name] context[context_target_name] = target_data[factor.name] build_action_log(factor, read_value, topic, unit_action_status) elapsed_time = time.time() - start unit_action_status.complete_time = elapsed_time return context, unit_action_status