Exemplo n.º 1
0
 def query_task(cls,
                only_latest=True,
                reverse=None,
                order_by=None,
                **kwargs):
     filters = []
     for f_n, f_v in kwargs.items():
         attr_name = 'f_%s' % f_n
         if hasattr(Task, attr_name):
             filters.append(operator.attrgetter('f_%s' % f_n)(Task) == f_v)
     if filters:
         tasks = Task.select().where(*filters)
     else:
         tasks = Task.select()
     if reverse is not None:
         if not order_by or not hasattr(Task, f"f_{order_by}"):
             order_by = "create_time"
         if reverse is True:
             tasks = tasks.order_by(getattr(Task, f"f_{order_by}").desc())
         elif reverse is False:
             tasks = tasks.order_by(getattr(Task, f"f_{order_by}").asc())
     if only_latest:
         tasks_group = cls.get_latest_tasks(tasks=tasks)
         return list(tasks_group.values())
     else:
         return [task for task in tasks]
Exemplo n.º 2
0
def query_task(**kwargs):
    with DB.connection_context():
        filters = []
        for f_n, f_v in kwargs.items():
            attr_name = 'f_%s' % f_n
            if hasattr(Task, attr_name):
                filters.append(operator.attrgetter('f_%s' % f_n)(Task) == f_v)
        if filters:
            tasks = Task.select().where(*filters)
        else:
            tasks = Task.select()
        return [task for task in tasks]
Exemplo n.º 3
0
 def report_task_to_initiator(cls, task: Task):
     """
     :param task:
     :return:
     """
     if task.f_role != task.f_initiator_role and task.f_party_id != task.f_initiator_party_id:
         exception = None
         for t in range(DEFAULT_FEDERATED_COMMAND_TRYS):
             try:
                 response = federated_api(
                     job_id=task.f_job_id,
                     method='POST',
                     endpoint='/initiator/{}/{}/{}/{}/{}/{}/report'.format(
                         task.f_job_id, task.f_component_name,
                         task.f_task_id, task.f_task_version, task.f_role,
                         task.f_party_id),
                     src_party_id=task.f_party_id,
                     dest_party_id=task.f_initiator_party_id,
                     src_role=task.f_role,
                     json_body=task.to_human_model_dict(
                         only_primary_with=cls.REPORT_TO_INITIATOR_FIELDS),
                     federated_mode=task.f_federated_mode)
             except Exception as e:
                 exception = e
                 continue
             if response["retcode"] != RetCode.SUCCESS:
                 exception = Exception(response["retmsg"])
             else:
                 return True
         else:
             schedule_logger(job_id=task.f_job_id).error(
                 f"report task to initiator error: {exception}")
             return False
     else:
         return False
Exemplo n.º 4
0
 def query_task(cls, only_latest=True, reverse=None, order_by=None, **kwargs) -> typing.List[Task]:
     tasks = Task.query(reverse=reverse, order_by=order_by, **kwargs)
     if only_latest:
         tasks_group = cls.get_latest_tasks(tasks=tasks)
         return list(tasks_group.values())
     else:
         return tasks
Exemplo n.º 5
0
 def check_task(cls, job_id, role, party_id, components: list):
     filters = [
         Task.f_job_id == job_id,
         Task.f_role == role,
         Task.f_party_id == party_id,
         Task.f_component_name << components
     ]
     tasks = Task.select().where(*filters)
     if tasks and len(tasks) == len(components):
         return True
     else:
         return False
Exemplo n.º 6
0
 def save_worker_info(cls, task: Task, worker_name: WorkerName, worker_id,
                      **kwargs):
     worker = WorkerInfo()
     ignore_attr = auto_date_timestamp_db_field()
     for attr, value in task.to_dict().items():
         if hasattr(worker,
                    attr) and attr not in ignore_attr and value is not None:
             setattr(worker, attr, value)
     worker.f_create_time = current_timestamp()
     worker.f_worker_name = worker_name.value
     worker.f_worker_id = worker_id
     for k, v in kwargs.items():
         attr = f"f_{k}"
         if hasattr(worker, attr) and v is not None:
             setattr(worker, attr, v)
     rows = worker.save(force_insert=True)
     if rows != 1:
         raise Exception("save worker info failed")
Exemplo n.º 7
0
    def run_task():
        task = Task()
        task.f_create_time = current_timestamp()
        try:
            parser = argparse.ArgumentParser()
            parser.add_argument('-j', '--job_id', required=True, type=str, help="job id")
            parser.add_argument('-n', '--component_name', required=True, type=str,
                                help="component name")
            parser.add_argument('-t', '--task_id', required=True, type=str, help="task id")
            parser.add_argument('-r', '--role', required=True, type=str, help="role")
            parser.add_argument('-p', '--party_id', required=True, type=str, help="party id")
            parser.add_argument('-c', '--config', required=True, type=str, help="task config")
            parser.add_argument('--processors_per_node', help="processors_per_node", type=int)
            parser.add_argument('--job_server', help="job server", type=str)
            args = parser.parse_args()
            schedule_logger(args.job_id).info('enter task process')
            schedule_logger(args.job_id).info(args)
            # init function args
            if args.job_server:
                RuntimeConfig.init_config(HTTP_PORT=args.job_server.split(':')[1])
                RuntimeConfig.set_process_role(ProcessRole.EXECUTOR)
            job_id = args.job_id
            component_name = args.component_name
            task_id = args.task_id
            role = args.role
            party_id = int(args.party_id)
            executor_pid = os.getpid()
            task_config = file_utils.load_json_conf(args.config)
            job_parameters = task_config['job_parameters']
            job_initiator = task_config['job_initiator']
            job_args = task_config['job_args']
            task_input_dsl = task_config['input']
            task_output_dsl = task_config['output']
            component_parameters = TaskExecutor.get_parameters(job_id, component_name, role, party_id)
            task_parameters = task_config['task_parameters']
            module_name = task_config['module_name']
            TaskExecutor.monkey_patch()
        except Exception as e:
            traceback.print_exc()
            schedule_logger().exception(e)
            task.f_status = TaskStatus.FAILED
            return
        try:
            job_log_dir = os.path.join(job_utils.get_job_log_directory(job_id=job_id), role, str(party_id))
            task_log_dir = os.path.join(job_log_dir, component_name)
            log_utils.LoggerFactory.set_directory(directory=task_log_dir, parent_log_dir=job_log_dir,
                                                  append_to_parent_log=True, force=True)

            task.f_job_id = job_id
            task.f_component_name = component_name
            task.f_task_id = task_id
            task.f_role = role
            task.f_party_id = party_id
            task.f_operator = 'python_operator'
            tracker = Tracking(job_id=job_id, role=role, party_id=party_id, component_name=component_name,
                               task_id=task_id,
                               model_id=job_parameters['model_id'],
                               model_version=job_parameters['model_version'],
                               component_module_name=module_name)
            task.f_start_time = current_timestamp()
            task.f_run_ip = get_lan_ip()
            task.f_run_pid = executor_pid
            run_class_paths = component_parameters.get('CodePath').split('/')
            run_class_package = '.'.join(run_class_paths[:-2]) + '.' + run_class_paths[-2].replace('.py', '')
            run_class_name = run_class_paths[-1]
            task.f_status = TaskStatus.RUNNING
            TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role,
                                          party_id=party_id, initiator_party_id=job_initiator.get('party_id', None),
                                          initiator_role=job_initiator.get('role', None),
                                          task_info=task.to_json())

            # init environment, process is shared globally
            RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode'],
                                      BACKEND=job_parameters.get('backend', 0))
            if args.processors_per_node and args.processors_per_node > 0 and RuntimeConfig.BACKEND == Backend.EGGROLL:
                session_options = {"eggroll.session.processors.per.node": args.processors_per_node}
            else:
                session_options = {}
            session.init(job_id=job_utils.generate_session_id(task_id, role, party_id),
                         mode=RuntimeConfig.WORK_MODE,
                         backend=RuntimeConfig.BACKEND,
                         options=session_options)
            federation.init(job_id=task_id, runtime_conf=component_parameters)

            schedule_logger().info('run {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id))
            schedule_logger().info(component_parameters)
            schedule_logger().info(task_input_dsl)
            task_run_args = TaskExecutor.get_task_run_args(job_id=job_id, role=role, party_id=party_id,
                                                           task_id=task_id,
                                                           job_args=job_args,
                                                           job_parameters=job_parameters,
                                                           task_parameters=task_parameters,
                                                           input_dsl=task_input_dsl,
                                                           if_save_as_task_input_data=job_parameters.get("save_as_task_input_data", SAVE_AS_TASK_INPUT_DATA_SWITCH)
                                                           )
            run_object = getattr(importlib.import_module(run_class_package), run_class_name)()
            run_object.set_tracker(tracker=tracker)
            run_object.set_taskid(taskid=task_id)
            run_object.run(component_parameters, task_run_args)
            output_data = run_object.save_data()
            tracker.save_output_data_table(output_data, task_output_dsl.get('data')[0] if task_output_dsl.get('data') else 'component')
            output_model = run_object.export_model()
            # There is only one model output at the current dsl version.
            tracker.save_output_model(output_model, task_output_dsl['model'][0] if task_output_dsl.get('model') else 'default')
            task.f_status = TaskStatus.COMPLETE
        except Exception as e:
            task.f_status = TaskStatus.FAILED
            schedule_logger().exception(e)
        finally:
            sync_success = False
            try:
                task.f_end_time = current_timestamp()
                task.f_elapsed = task.f_end_time - task.f_start_time
                task.f_update_time = current_timestamp()
                TaskExecutor.sync_task_status(job_id=job_id, component_name=component_name, task_id=task_id, role=role,
                                              party_id=party_id,
                                              initiator_party_id=job_initiator.get('party_id', None),
                                              initiator_role=job_initiator.get('role', None),
                                              task_info=task.to_json())
                sync_success = True
            except Exception as e:
                traceback.print_exc()
                schedule_logger().exception(e)
        schedule_logger().info('task {} {} {} start time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_start_time)))
        schedule_logger().info('task {} {} {} end time: {}'.format(task_id, role, party_id, timestamp_to_date(task.f_end_time)))
        schedule_logger().info('task {} {} {} takes {}s'.format(task_id, role, party_id, int(task.f_elapsed)/1000))
        schedule_logger().info(
            'finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))

        print('finish {} {} {} {} {} {} task'.format(job_id, component_name, task_id, role, party_id, task.f_status if sync_success else TaskStatus.FAILED))
Exemplo n.º 8
0
 def get_tasks_asc(cls, job_id, role, party_id):
     tasks = Task.select().where(
         Task.f_job_id == job_id, Task.f_role == role,
         Task.f_party_id == party_id).order_by(Task.f_create_time.asc())
     tasks_group = cls.get_latest_tasks(tasks=tasks)
     return tasks_group
Exemplo n.º 9
0
    def run_task():
        task = Task()
        task.f_create_time = current_timestamp()
        try:
            parser = argparse.ArgumentParser()
            parser.add_argument('-j',
                                '--job_id',
                                required=True,
                                type=str,
                                help="job id")
            parser.add_argument('-n',
                                '--component_name',
                                required=True,
                                type=str,
                                help="component name")
            parser.add_argument('-t',
                                '--task_id',
                                required=True,
                                type=str,
                                help="task id")
            parser.add_argument('-r',
                                '--role',
                                required=True,
                                type=str,
                                help="role")
            parser.add_argument('-p',
                                '--party_id',
                                required=True,
                                type=str,
                                help="party id")
            parser.add_argument('-c',
                                '--config',
                                required=True,
                                type=str,
                                help="task config")
            parser.add_argument('--job_server', help="job server", type=str)
            args = parser.parse_args()
            schedule_logger.info('enter task process')
            schedule_logger.info(args)
            # init function args
            if args.job_server:
                RuntimeConfig.init_config(
                    HTTP_PORT=args.job_server.split(':')[1])
            job_id = args.job_id
            component_name = args.component_name
            task_id = args.task_id
            role = args.role
            party_id = int(args.party_id)
            task_config = file_utils.load_json_conf(args.config)
            job_parameters = task_config['job_parameters']
            job_initiator = task_config['job_initiator']
            job_args = task_config['job_args']
            task_input_dsl = task_config['input']
            task_output_dsl = task_config['output']
            parameters = task_config['parameters']
            module_name = task_config['module_name']
        except Exception as e:
            schedule_logger.exception(e)
            task.f_status = TaskStatus.FAILED
            return
        try:
            # init environment, process is shared globally
            RuntimeConfig.init_config(WORK_MODE=job_parameters['work_mode'])
            storage.init_storage(job_id=task_id,
                                 work_mode=RuntimeConfig.WORK_MODE)
            federation.init(job_id=task_id, runtime_conf=parameters)
            job_log_dir = os.path.join(
                job_utils.get_job_log_directory(job_id=job_id), role,
                str(party_id))
            task_log_dir = os.path.join(job_log_dir, component_name)
            log_utils.LoggerFactory.set_directory(directory=task_log_dir,
                                                  parent_log_dir=job_log_dir,
                                                  append_to_parent_log=True,
                                                  force=True)

            task.f_job_id = job_id
            task.f_component_name = component_name
            task.f_task_id = task_id
            task.f_role = role
            task.f_party_id = party_id
            task.f_operator = 'python_operator'
            tracker = Tracking(job_id=job_id,
                               role=role,
                               party_id=party_id,
                               component_name=component_name,
                               task_id=task_id,
                               model_id=job_parameters['model_id'],
                               model_version=job_parameters['model_version'],
                               module_name=module_name)
            task.f_start_time = current_timestamp()
            task.f_run_ip = get_lan_ip()
            task.f_run_pid = os.getpid()
            run_class_paths = parameters.get('CodePath').split('/')
            run_class_package = '.'.join(
                run_class_paths[:-2]) + '.' + run_class_paths[-2].replace(
                    '.py', '')
            run_class_name = run_class_paths[-1]
            task_run_args = TaskExecutor.get_task_run_args(
                job_id=job_id,
                role=role,
                party_id=party_id,
                job_parameters=job_parameters,
                job_args=job_args,
                input_dsl=task_input_dsl)
            run_object = getattr(importlib.import_module(run_class_package),
                                 run_class_name)()
            run_object.set_tracker(tracker=tracker)
            run_object.set_taskid(taskid=task_id)
            task.f_status = TaskStatus.RUNNING
            TaskExecutor.sync_task_status(job_id=job_id,
                                          component_name=component_name,
                                          task_id=task_id,
                                          role=role,
                                          party_id=party_id,
                                          initiator_party_id=job_initiator.get(
                                              'party_id', None),
                                          task_info=task.to_json())

            schedule_logger.info('run {} {} {} {} {} task'.format(
                job_id, component_name, task_id, role, party_id))
            schedule_logger.info(parameters)
            schedule_logger.info(task_input_dsl)
            run_object.run(parameters, task_run_args)
            if task_output_dsl:
                if task_output_dsl.get('data', []):
                    output_data = run_object.save_data()
                    tracker.save_output_data_table(
                        output_data,
                        task_output_dsl.get('data')[0])
                if task_output_dsl.get('model', []):
                    output_model = run_object.export_model()
                    # There is only one model output at the current dsl version.
                    tracker.save_output_model(output_model,
                                              task_output_dsl['model'][0])
            task.f_status = TaskStatus.SUCCESS
        except Exception as e:
            schedule_logger.exception(e)
            task.f_status = TaskStatus.FAILED
        finally:
            try:
                task.f_end_time = current_timestamp()
                task.f_elapsed = task.f_end_time - task.f_start_time
                task.f_update_time = current_timestamp()
                TaskExecutor.sync_task_status(
                    job_id=job_id,
                    component_name=component_name,
                    task_id=task_id,
                    role=role,
                    party_id=party_id,
                    initiator_party_id=job_initiator.get('party_id', None),
                    task_info=task.to_json())
            except Exception as e:
                schedule_logger.exception(e)
        schedule_logger.info('finish {} {} {} {} {} {} task'.format(
            job_id, component_name, task_id, role, party_id, task.f_status))
        print('finish {} {} {} {} {} {} task'.format(job_id, component_name,
                                                     task_id, role, party_id,
                                                     task.f_status))
Exemplo n.º 10
0
 def save_task(self, role, party_id, task_info):
     with DB.connection_context():
         tasks = Task.select().where(
             Task.f_job_id == self.job_id,
             Task.f_component_name == self.component_name,
             Task.f_task_id == self.task_id, Task.f_role == role,
             Task.f_party_id == party_id)
         is_insert = True
         if tasks:
             task = tasks[0]
             is_insert = False
         else:
             task = Task()
             task.f_create_time = current_timestamp()
         task.f_job_id = self.job_id
         task.f_component_name = self.component_name
         task.f_task_id = self.task_id
         task.f_role = role
         task.f_party_id = party_id
         if 'f_status' in task_info:
             if task.f_status in [TaskStatus.COMPLETE, TaskStatus.FAILED]:
                 # Termination status cannot be updated
                 # TODO:
                 pass
         for k, v in task_info.items():
             try:
                 if k in [
                         'f_job_id', 'f_component_name', 'f_task_id',
                         'f_role', 'f_party_id'
                 ] or v == getattr(Task, k).default:
                     continue
             except:
                 pass
             setattr(task, k, v)
         if is_insert:
             task.save(force_insert=True)
         else:
             task.save()
         return task
Exemplo n.º 11
0
def list_task(limit):
    if limit > 0:
        tasks = Task.select().order_by(Task.f_create_time.desc()).limit(limit)
    else:
        tasks = Task.select().order_by(Task.f_create_time.desc())
    return [task for task in tasks]
Exemplo n.º 12
0
 def get_tasks_asc(cls, job_id, role, party_id):
     tasks = Task.query(order_by="create_time", reverse=False, job_id=job_id, role=role, party_id=party_id)
     tasks_group = cls.get_latest_tasks(tasks=tasks)
     return tasks_group