def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus(pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), startTime=datetime.now()) pipeline_status.oldValue = data[pipeline_constants.OLD] pipeline_status.newValue = data[pipeline_constants.NEW] if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) # TODO pipeline when condition log.info("start run pipeline {0}".format(pipeline.name)) context = {PIPELINE_UID: pipeline_status.uid} if __check_pipeline_condition(pipeline, pipeline_topic, data): try: start = time.time() for stage in pipeline.stages: stage_run_status = StageRunStatus() stage_run_status.name = stage.name log.info("stage name {0}".format(stage.name)) for unit in stage.units: # TODO __check_when_condition # if unit.on is not None: # result = __check_when_condition(unit.on.children, data) # if result: # continue if unit.do is not None: unit_run_status = UnitRunStatus() for action in unit.do: func = find_action_type_func(convert_action_type(action.type), action, pipeline_topic) # call dynamic action in action folder # TODO [future] custom folder out_result, unit_action_status = func(data, context) log.debug("out_result :{0}".format(out_result)) context = {**context, **out_result} unit_run_status.actions.append(unit_action_status) stage_run_status.units.append(unit_run_status) else: log.info("action stage unit {0} do is None".format(stage.name)) elapsed_time = time.time() - start pipeline_status.stages.append(stage_run_status) pipeline_status.completeTime = elapsed_time pipeline_status.status = FINISHED log.info("pipeline_status {0} time :{1}".format(pipeline.name, elapsed_time)) except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: # log.info("insert_pipeline_monitor") if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: log.info("pipeline_status is {0}".format(pipeline_status)) else: # if pipeline_status.oldValue is not None: # print(pipeline_status.json()) sync_pipeline_monitor_data(pipeline_status)
def sync_pipeline_monitor_data(pipeline_monitor: PipelineRunStatus): topic_event = TopicEvent(code="raw_pipeline_monitor", data=pipeline_monitor.dict()) # asyncio.ensure_future(import_raw_topic_data(topic_event)) topic = get_topic(topic_event.code) if topic is None: raise Exception("topic name does not exist") add_audit_columns(topic_event.data, INSERT) save_topic_instance(topic_event.code, topic_event.data) watchmen.pipeline.index.trigger_pipeline( topic_event.code, { pipeline_constants.NEW: topic_event.data, pipeline_constants.OLD: None }, TriggerType.insert)
def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus( pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), startTime=datetime.now().replace(tzinfo=None), topicId=pipeline.pipelineId) pipeline_status.oldValue = data[pipeline_constants.OLD] pipeline_status.newValue = data[pipeline_constants.NEW] if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) log.info("start run pipeline {0}".format(pipeline.name)) context = {PIPELINE_UID: pipeline_status.uid} start = time.time() if __check_condition(pipeline, pipeline_topic, data, context): try: pipeline_trigger_merge_list = [] for stage in pipeline.stages: if __check_condition(stage, pipeline_topic, data, context): stage_run_status = StageRunStatus(name=stage.name) log.info("stage name {0}".format(stage.name)) context, pipeline_trigger_merge_list = run_unit( context, data, pipeline_status, pipeline_topic, pipeline_trigger_merge_list, stage, stage_run_status) elapsed_time = time.time() - start pipeline_status.completeTime = elapsed_time pipeline_status.status = FINISHED log.debug("pipeline_status {0} time :{1}".format( pipeline.name, elapsed_time)) if pipeline_topic.kind is None or pipeline_topic.kind != pipeline_constants.SYSTEM: __trigger_all_pipeline(pipeline_trigger_merge_list) except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: log.debug("pipeline_status is {0}".format(pipeline_status)) else: watchmen.monitor.services.pipeline_monitor_service.sync_pipeline_monitor_data( pipeline_status)
def create_raw_pipeline_monitor(): table = Table('topic_raw_pipeline_monitor', metadata) table.append_column(Column(name='id_', type_=String(60), primary_key=True)) table.append_column(Column(name='data_', type_=CLOB, nullable=True)) table.append_column( Column(name='sys_inserttime', type_=Date, nullable=True)) table.append_column( Column(name='sys_updatetime', type_=Date, nullable=True)) schema = json.loads(PipelineRunStatus.schema_json(indent=1)) for key, value in schema.get("properties").items(): column_name = key.lower() column_type = value.get("type", None) if column_type is None: column_format = value.get("format", None) if column_format is None: table.append_column( Column(name=column_name, type_=CLOB, nullable=True)) else: if column_format == "date-time": table.append_column( Column(name=column_name, type_=Date, nullable=True)) elif column_type == "boolean": table.append_column( Column(name=column_name, type_=String(5), nullable=True)) elif column_type == "string": if column_name == "error": table.append_column( Column(name=column_name, type_=CLOB, nullable=True)) elif column_name == "uid": table.append_column( Column(name=column_name.upper(), type_=String(50), quote=True, nullable=True)) else: table.append_column( Column(name=column_name, type_=String(50), nullable=True)) elif column_type == "integer": table.append_column( Column(name=column_name, type_=Integer, nullable=True)) elif column_type == "array": table.append_column( Column(name=column_name, type_=CLOB, nullable=True)) else: raise Exception(column_name + "not support type") table.create(engine)
def sync_pipeline_monitor_data(pipeline_monitor: PipelineRunStatus): code = "raw_pipeline_monitor" data = pipeline_monitor.dict() raw_data = { "data_": data, "tenant_id_": pipeline_monitor.tenantId, "traceid": pipeline_monitor.traceId } topic = find_monitor_topic(code, pipeline_monitor.currentUser) trace_id = get_surrogate_key() if topic is None: raise Exception(code + " topic name does not exist") add_audit_columns(raw_data, INSERT) flatten_fields = get_flatten_field(data, topic.factors) raw_data.update(flatten_fields) storage_template = get_template_by_datasource_id(topic.dataSourceId) storage_template.topic_data_insert_one(raw_data, code) watchmen.pipeline.index.trigger_pipeline(code, { pipeline_constants.NEW: data, pipeline_constants.OLD: None }, TriggerType.insert, pipeline_monitor.currentUser, trace_id)
def run_pipeline(pipeline_context: PipelineContext, current_user): pipeline = pipeline_context.pipeline data = pipeline_context.data pipeline_status = PipelineRunStatus( pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), startTime=datetime.now().replace(tzinfo=None), topicId=pipeline.topicId, tenantId=pipeline_context.currentUser.tenantId, traceId=pipeline_context.traceId, pipelineName=pipeline.name) pipeline_status.oldValue = data.get(pipeline_constants.OLD) pipeline_status.newValue = data.get(pipeline_constants.NEW) pipeline_status.currentUser = pipeline_context.currentUser if pipeline_context.currentUser is None: raise Exception("pipeline_context currentUser is None") if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) pipeline_status.pipelineTopicName = pipeline_topic.name pipeline_context = PipelineContext(pipeline, data, pipeline_context.currentUser, pipeline_context.traceId) pipeline_context.variables[PIPELINE_UID] = pipeline_status.uid pipeline_context.pipelineTopic = pipeline_topic pipeline_context.pipelineStatus = pipeline_status start = time.time() if should_run(pipeline_context): # noinspection PyBroadException try: for stage in pipeline.stages: stage_run_status = StageRunStatus(name=stage.name) stage_context = StageContext(pipeline_context, stage, stage_run_status) stage_run_status.name = stage.name run_stage(stage_context, stage_run_status) pipeline_status.stages.append(stage_context.stageStatus) elapsed_time = time.time() - start pipeline_status.completeTime = elapsed_time pipeline_status.status = FINISHED log.info("run pipeline \"{0}\" spend time \"{1}\" ".format( pipeline.name, elapsed_time)) if pipeline_topic.kind is None or pipeline_topic.kind != pipeline_constants.SYSTEM: __trigger_all_pipeline( pipeline_context.pipeline_trigger_merge_list, pipeline_context.currentUser, pipeline_context.traceId) except Exception as e: trace = traceback.format_exc() log.error(trace) pipeline_status.error = trace pipeline_status.status = ERROR finally: if settings.PIPELINE_MONITOR_ON: if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: log.debug( "pipeline_status is {0}".format(pipeline_status)) else: asyncio.ensure_future( sync_pipeline_monitor_log(pipeline_status)) else: log.info("pipeline {0} status is {1}".format( pipeline.name, pipeline_status.status))
def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus() pipeline_status.topicId = pipeline.topicId pipeline_status.pipelineId = pipeline.pipelineId pipeline_status.uid = get_surrogate_key() pipeline_status.rawId = data[pipeline_constants.NEW][pipeline_constants.ID] if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) # TODO pipeline when condition context = {PIPELINE_UID: pipeline_status.uid} unit_status_list = [] try: start = time.time() for stage in pipeline.stages: log.info("stage name {0}".format(stage.name)) for unit in stage.units: # if unit.on is not None: # result = __check_when_condition(unit.on.children, data) # if result: # continue if unit.do is not None: for action in unit.do: func = find_action_type_func( convert_action_type(action.type), action, pipeline_topic) # call dynamic action in action folder out_result, unit_status = func(data, context) unit_status.stageName = stage.name unit_status_list.append(unit_status.dict()) log.debug("out_result :{0}".format(out_result)) context = {**context, **out_result} else: log.info("action stage unit {0} do is None".format( stage.name)) elapsed_time = time.time() - start pipeline_status.complete_time = elapsed_time pipeline_status.status = FINISHED log.info("pipeline_status {0} time :{1}".format( pipeline.name, elapsed_time)) except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: log.info("insert_pipeline_monitor") if pipeline_topic.type == pipeline_constants.SYSTEM: log.info("pipeline_status is {0}".format(pipeline_status)) log.info("unit status is {0}".format(unit_status_list)) else: if unit_status_list: insert_units_monitor(unit_status_list) insert_pipeline_monitor(pipeline_status)
def insert_pipeline_monitor(pipeline_status: PipelineRunStatus): monitor_pipeline_collection.insert_one(pipeline_status.dict())
def run_pipeline(pipeline: Pipeline, data): pipeline_status = PipelineRunStatus(pipelineId=pipeline.pipelineId, uid=get_surrogate_key(), startTime=datetime.now(), topicId=pipeline.pipelineId) pipeline_status.oldValue = data[pipeline_constants.OLD] pipeline_status.newValue = data[pipeline_constants.NEW] # trigger_context = if pipeline.enabled: pipeline_topic = get_topic_by_id(pipeline.topicId) log.debug("start run pipeline {0}".format(pipeline.name)) context = {PIPELINE_UID: pipeline_status.uid} if __check_condition(pipeline, pipeline_topic, data): try: start = time.time() for stage in pipeline.stages: if __check_condition(stage, pipeline_topic, data): stage_run_status = StageRunStatus() stage_run_status.name = stage.name log.debug("stage name {0}".format(stage.name)) pipeline_trigger_merge_list = [] for unit in stage.units: if unit.do is not None: match_result = __check_condition( unit, pipeline_topic, data) # print("match_result",match_result) if match_result: unit_run_status = UnitRunStatus() for action in unit.do: func = find_action_type_func( convert_action_type(action.type), action, pipeline_topic) # call dynamic action in action folder # TODO [future] custom folder out_result, unit_action_status, trigger_pipeline_data_list = func( data, context) if trigger_pipeline_data_list: pipeline_trigger_merge_list += trigger_pipeline_data_list log.debug("out_result :{0}".format( out_result)) context = {**context, **out_result} unit_run_status.actions.append( unit_action_status) stage_run_status.units.append( unit_run_status) else: log.debug( "action stage unit {0} do is None".format( stage.name)) pipeline_status.stages.append(stage_run_status) elapsed_time = time.time() - start pipeline_status.completeTime = elapsed_time pipeline_status.status = FINISHED log.debug("pipeline_status {0} time :{1}".format( pipeline.name, elapsed_time)) # print(pipeline_topic.kind) if pipeline_topic.kind is None or pipeline_topic.kind != pipeline_constants.SYSTEM: __trigger_all_pipeline(pipeline_trigger_merge_list) ## trigger_pipeline() except Exception as e: log.exception(e) pipeline_status.error = traceback.format_exc() pipeline_status.status = ERROR log.error(pipeline_status) finally: if pipeline_topic.kind is not None and pipeline_topic.kind == pipeline_constants.SYSTEM: # log.debug("pipeline_status is {0}".format(pipeline_status)) pass else: print("sync pipeline monitor") watchmen.monitor.services.pipeline_monitor_service.sync_pipeline_monitor_data( pipeline_status)