Exemplo n.º 1
0
 def child_process_ready(self, child_id):
     """
     发送子进程已经准备好被调度的信号
     :param child_id: 子进程 ID
     :return:
     """
     valve.send(signals,
                'child_process_ready',
                sender=PipelineProcess,
                child_id=child_id)
Exemplo n.º 2
0
    def test_send_on_valve_is_none(self):
        kwargs_1 = {'1': 1}
        kwargs_2 = {'2': 2}

        valve.unload_valve_function()
        valve.send(mock_signal, 'signal_1', **kwargs_1)
        valve.send(mock_signal, 'signal_1', **kwargs_2)
        self.assertEqual(mock_signal.signal_1.history[0], kwargs_1)
        self.assertEqual(mock_signal.signal_1.history[1], kwargs_2)

        mock_signal.clear()
Exemplo n.º 3
0
 def batch_process_ready(self, process_id_list, pipeline_id):
     """
     发送批量唤醒进程的信号
     :param process_id_list: 需要被唤醒的进程 ID 列表
     :param pipeline_id: 这些进程相关的 root pipeline
     :return:
     """
     valve.send(signals,
                'batch_process_ready',
                sender=PipelineProcess,
                process_id_list=process_id_list,
                pipeline_id=pipeline_id)
Exemplo n.º 4
0
 def unfreeze(self):
     """
     解冻当前进程
     :return:
     """
     with transaction.atomic():
         self.__class__.objects.select_for_update().get(id=self.id)
         self.is_frozen = False
         self.save(save_snapshot=False)
         valve.send(signals,
                    'process_unfreeze',
                    sender=PipelineProcess,
                    process_id=self.id)
Exemplo n.º 5
0
    def callback(self, callback_data, process_id):
        if not self.wait_callback:
            raise exceptions.InvalidOperationException(
                'can\'t callback a poll schedule.')

        self.callback_data = callback_data
        self.save()
        valve.send(signals,
                   'schedule_ready',
                   sender=ScheduleService,
                   process_id=process_id,
                   schedule_id=self.id,
                   countdown=0)
Exemplo n.º 6
0
    def set_next_schedule(self):
        if self.wait_callback:
            raise exceptions.InvalidOperationException(
                'can\'t set next schedule on callback schedule.')
        count_down = self.service_act.service.interval.next()
        self.is_scheduling = False
        self.save()
        ScheduleCeleryTask.objects.unbind(self.id)

        valve.send(signals,
                   'schedule_ready',
                   sender=ScheduleService,
                   process_id=self.process_id,
                   schedule_id=self.id,
                   countdown=count_down)
Exemplo n.º 7
0
    def test_send_on_closed(self):
        kwargs_1 = {'1': 1}
        kwargs_2 = {'2': 2}

        def is_valve_closed():
            return True

        valve.unload_valve_function()
        valve.set_valve_function(is_valve_closed)
        valve.send(mock_signal, 'signal_1', **kwargs_1)
        valve.send(mock_signal, 'signal_1', **kwargs_2)
        self.assertEqual(len(mock_signal.signal_1.history), 0)

        mock_signal.clear()
        Signal.objects.all().delete()
Exemplo n.º 8
0
    def test_send_on_valve_opened(self):
        kwargs_1 = {'1': 1}
        kwargs_2 = {'2': 2}

        def is_valve_closed():
            return False

        valve.unload_valve_function()
        valve.set_valve_function(is_valve_closed)
        valve.send(mock_signal, 'signal_1', **kwargs_1)
        valve.send(mock_signal, 'signal_1', **kwargs_2)
        self.assertEqual(mock_signal.signal_1.history[0], kwargs_1)
        self.assertEqual(mock_signal.signal_1.history[1], kwargs_2)

        mock_signal.clear()
Exemplo n.º 9
0
 def process_ready(self,
                   process_id,
                   current_node_id=None,
                   call_from_child=False):
     """
     发送一个进程已经准备好被调度的信号
     :param process_id: 已经准备好的进程 ID
     :param current_node_id: 下一个执行的节点的 ID(可用于失败跳过)
     :param call_from_child: 该信号是否由子进程发出
     :return:
     """
     valve.send(signals,
                'process_ready',
                sender=PipelineProcess,
                process_id=process_id,
                current_node_id=current_node_id,
                call_from_child=call_from_child)
Exemplo n.º 10
0
    def test_open_valve(self):
        kwargs_1 = {'1': 1}
        kwargs_2 = {'2': 2}

        def valve_closed():
            return True

        valve.unload_valve_function()
        valve.set_valve_function(valve_closed)
        valve.send(mock_signal, 'signal_1', **kwargs_1)
        valve.send(mock_signal, 'signal_1', **kwargs_2)
        self.assertEqual(len(mock_signal.signal_1.history), 0)
        valve.open_valve(mock_signal)
        self.assertEqual(mock_signal.signal_1.history[0], kwargs_1)
        self.assertEqual(mock_signal.signal_1.history[1], kwargs_2)

        mock_signal.clear()
Exemplo n.º 11
0
    def set_schedule(self, activity_id, service_act, process_id, version,
                     parent_data):
        wait_callback = service_act.service.interval is None
        schedule = self.create(id="%s%s" % (activity_id, version),
                               activity_id=activity_id,
                               service_act=service_act,
                               process_id=process_id,
                               wait_callback=wait_callback,
                               version=version)
        data_service.set_schedule_data(schedule.id, parent_data)

        if not wait_callback:
            count_down = service_act.service.interval.next()
            valve.send(signals,
                       'schedule_ready',
                       sender=ScheduleService,
                       process_id=process_id,
                       schedule_id=schedule.id,
                       countdown=count_down)

        return schedule
Exemplo n.º 12
0
 def pipeline_ready(self, process_id):
     valve.send(signals,
                'pipeline_ready',
                sender=Pipeline,
                process_id=process_id)
Exemplo n.º 13
0
    def handle(self, process, element, status):
        success = False
        exception_occurred = False
        monitoring = False
        version = status.version
        root_pipeline = process.root_pipeline

        # rerun mode
        if status.loop > 1 and not element.on_retry():
            element.prepare_rerun_data()
            process.top_pipeline.context.recover_variable()

        elif element.on_retry():
            element.retry_at_current_exec()

        # set loop to data
        element.data.inputs._loop = status.loop - 1
        element.data.outputs._loop = status.loop - 1

        # pre output extract
        process.top_pipeline.context.extract_output(element)

        # hydrate inputs
        hydrate_node_data(element)

        if element.timeout:
            logger.info('node %s %s start timeout monitor, timeout: %s' %
                        (element.id, version, element.timeout))
            signals.service_activity_timeout_monitor_start.send(
                sender=element.__class__,
                node_id=element.id,
                version=version,
                root_pipeline_id=root_pipeline.id,
                countdown=element.timeout)
            monitoring = True

        # execute service
        try:
            success = element.execute(root_pipeline.data)
        except Exception as e:
            if element.error_ignorable:
                # ignore exception
                success = True
                exception_occurred = True
                element.ignore_error()
            ex_data = traceback.format_exc(e)
            element.data.outputs.ex_data = ex_data
            logger.error(ex_data)

        # process result
        if success is False:
            ex_data = element.data.get_one_of_outputs('ex_data')
            Status.objects.fail(element, ex_data)
            try:
                element.failure_handler(root_pipeline.data)
            except Exception as e:
                logger.error('failure_handler(%s) failed: %s' %
                             (element.id, traceback.format_exc(e)))

            if monitoring:
                signals.service_activity_timeout_monitor_end.send(
                    sender=element.__class__,
                    node_id=element.id,
                    version=version)
                logger.info('node %s %s timeout monitor revoke' %
                            (element.id, version))

            # send activity error signal
            valve.send(signals,
                       'activity_failed',
                       sender=root_pipeline,
                       pipeline_id=root_pipeline.id,
                       pipeline_activity_id=element.id,
                       subprocess_id_stack=process.subprocess_stack)

            return self.HandleResult(next_node=None,
                                     should_return=False,
                                     should_sleep=True)
        else:
            is_error_ignored = element.error_ignorable and not element.get_result_bit(
            )
            if element.need_schedule(
            ) and not exception_occurred and not is_error_ignored:
                # write data before schedule
                Data.objects.write_node_data(element)
                # set schedule
                ScheduleService.objects.set_schedule(
                    element.id,
                    service_act=element.shell(),
                    process_id=process.id,
                    version=version,
                    parent_data=process.top_pipeline.data)
                return self.HandleResult(next_node=None,
                                         should_return=True,
                                         should_sleep=True)

            process.top_pipeline.context.extract_output(element)
            error_ignorable = not element.get_result_bit()

            if monitoring:
                signals.service_activity_timeout_monitor_end.send(
                    sender=element.__class__,
                    node_id=element.id,
                    version=version)
                logger.info('node %s %s timeout monitor revoke' %
                            (element.id, version))

            if not Status.objects.finish(element, error_ignorable):
                # has been forced failed
                return self.HandleResult(next_node=None,
                                         should_return=False,
                                         should_sleep=True)
            return self.HandleResult(next_node=element.next(),
                                     should_return=False,
                                     should_sleep=False)
Exemplo n.º 14
0
def schedule(process_id, schedule_id):
    """
    调度服务主函数
    :param process_id: 被调度的节点所属的 PipelineProcess
    :param schedule_id: 调度 ID
    :return:
    """
    with schedule_exception_handler(process_id, schedule_id):
        ScheduleService.objects.filter(id=schedule_id).update(
            is_scheduling=True)
        sched_service = ScheduleService.objects.get(id=schedule_id)
        service_act = sched_service.service_act
        act_id = sched_service.activity_id
        version = sched_service.version

        if not Status.objects.filter(id=act_id, version=version).exists():
            # forced failed
            logger.warning(
                'schedule service failed, schedule(%s - %s) had been forced exit.'
                % (act_id, version))
            sched_service.destroy()
            return

        # get data
        parent_data = get_schedule_parent_data(sched_service.id)
        if parent_data is None:
            raise exceptions.DataRetrieveError(
                'child process(%s) retrieve parent_data error, sched_id: %s' %
                (process_id, schedule_id))

        # schedule
        ex_data = None
        success = False
        try:
            success = service_act.schedule(parent_data,
                                           sched_service.callback_data)
            if success is None:
                success = True
        except Exception as e:
            if service_act.error_ignorable:
                success = True
                service_act.ignore_error()
                service_act.finish_schedule()

            ex_data = traceback.format_exc(e)
            logging.error(ex_data)

        sched_service.schedule_times += 1
        set_schedule_data(sched_service.id, parent_data)

        # schedule failed
        if not success:
            if not Status.objects.transit(
                    id=act_id, version=version, to_state=states.FAILED).result:
                # forced failed
                logger.warning(
                    'FAILED transit failed, schedule(%s - %s) had been forced exit.'
                    % (act_id, version))
                sched_service.destroy()
                return

            if service_act.timeout:
                signals.service_activity_timeout_monitor_end.send(
                    sender=service_act.__class__,
                    node_id=service_act.id,
                    version=version)
                logger.info('node %s %s timeout monitor revoke' %
                            (service_act.id, version))

            Data.objects.write_node_data(service_act, ex_data=ex_data)

            with transaction.atomic():
                process = PipelineProcess.objects.select_for_update().get(
                    id=sched_service.process_id)
                if not process.is_alive:
                    logger.info(
                        'pipeline %s has been revoked, status adjust failed.' %
                        process.root_pipeline_id)
                    return

                process.adjust_status()

            # send activity error signal

            try:
                service_act.schedule_fail()
            except Exception as e:
                logger.error('schedule_fail handler fail: %s' %
                             traceback.format_exc(e))

            signals.service_schedule_fail.send(sender=ScheduleService,
                                               activity_shell=service_act,
                                               schedule_service=sched_service,
                                               ex_data=ex_data)

            valve.send(signals,
                       'activity_failed',
                       sender=process.root_pipeline,
                       pipeline_id=process.root_pipeline_id,
                       pipeline_activity_id=service_act.id,
                       subprocess_id_stack=process.subprocess_stack)
            return

        # schedule execute finished or callback finished
        if service_act.is_schedule_done() or sched_service.wait_callback:
            error_ignorable = not service_act.get_result_bit()
            if not Status.objects.transit(id=act_id,
                                          version=version,
                                          to_state=states.FINISHED).result:
                # forced failed
                logger.warning(
                    'FINISHED transit failed, schedule(%s - %s) had been forced exit.'
                    % (act_id, version))
                sched_service.destroy()
                return

            if service_act.timeout:
                signals.service_activity_timeout_monitor_end.send(
                    sender=service_act.__class__,
                    node_id=service_act.id,
                    version=version)
                logger.info('node %s %s timeout monitor revoke' %
                            (service_act.id, version))

            Data.objects.write_node_data(service_act)
            if error_ignorable:
                s = Status.objects.get(id=act_id)
                s.error_ignorable = True
                s.save()

            # sync parent data
            with transaction.atomic():
                process = PipelineProcess.objects.select_for_update().get(
                    id=sched_service.process_id)
                if not process.is_alive:
                    logger.warning('schedule(%s - %s) revoked.' %
                                   (act_id, version))
                    sched_service.destroy()
                    return

                process.top_pipeline.data.update_outputs(
                    parent_data.get_outputs())
                # extract outputs
                process.top_pipeline.context.extract_output(service_act)
                process.save(save_snapshot=True)

            # clear temp data
            delete_parent_data(sched_service.id)
            # save schedule service
            sched_service.finish()

            signals.service_schedule_success.send(
                sender=ScheduleService,
                activity_shell=service_act,
                schedule_service=sched_service)

            valve.send(signals,
                       'wake_from_schedule',
                       sender=ScheduleService,
                       process_id=sched_service.process_id,
                       activity_id=sched_service.activity_id)
        else:
            sched_service.set_next_schedule()