示例#1
0
    def clean_queue():
        schedule_logger().info('get clean queue command')
        jobs = job_utils.query_job(is_initiator=1, status=JobStatus.WAITING)
        if jobs:
            for job in jobs:
                schedule_logger(job.f_job_id).info(
                    'start send {} job {} command success'.format(JobStatus.CANCELED, job.f_job_id))
                job_info = {'f_job_id': job.f_job_id, 'f_status': JobStatus.CANCELED}
                roles = json_loads(job.f_roles)
                job_work_mode = job.f_work_mode
                initiator_party_id = job.f_party_id

                TaskScheduler.sync_job_status(job_id=job.f_job_id, roles=roles, initiator_party_id=initiator_party_id,
                                              initiator_role=job.f_role,
                                              work_mode=job_work_mode,
                                              job_info=job_info)
                job_runtime_conf = json_loads(job.f_runtime_conf)
                event = job_utils.job_event(job.f_job_id,
                                            job_runtime_conf['initiator']['role'],
                                            job_runtime_conf['initiator']['party_id'])
                try:
                    RuntimeConfig.JOB_QUEUE.del_event(event)
                    schedule_logger(job.f_job_id).info(
                        'send {} job {} command success'.format(JobStatus.CANCELED, job.f_job_id))
                except Exception as e:
                    schedule_logger(job.f_job_id).error(e)
        else:
            raise Exception('There are no jobs in the queue')
示例#2
0
 def cancel_job(job_id, role, party_id, job_initiator):
     schedule_logger(job_id).info(
         '{} {} get cancel waiting job {} command'.format(
             role, party_id, job_id))
     jobs = job_utils.query_job(job_id=job_id, is_initiator=1)
     if jobs:
         job = jobs[0]
         job_runtime_conf = json_loads(job.f_runtime_conf)
         event = job_utils.job_event(
             job.f_job_id, job_runtime_conf['initiator']['role'],
             job_runtime_conf['initiator']['party_id'])
         try:
             RuntimeConfig.JOB_QUEUE.del_event(event)
         except:
             return False
         schedule_logger(job_id).info(
             'cancel waiting job successfully, job id is {}'.format(
                 job.f_job_id))
         return True
     else:
         jobs = job_utils.query_job(job_id=job_id)
         if jobs:
             raise Exception(
                 'role {} party id {} cancel waiting job {} failed, not is initiator'
                 .format(role, party_id, job_id))
         raise Exception(
             'role {} party id {} cancel waiting job failed, no find jod {}'
             .format(role, party_id, job_id))
示例#3
0
    def submit_job(job_data):
        job_id = generate_job_id()
        schedule_logger(job_id).info('submit job, job_id {}, body {}'.format(job_id, job_data))
        job_dsl = job_data.get('job_dsl', {})
        job_runtime_conf = job_data.get('job_runtime_conf', {})
        job_utils.check_pipeline_job_runtime_conf(job_runtime_conf)
        job_parameters = job_runtime_conf['job_parameters']
        job_initiator = job_runtime_conf['initiator']
        job_type = job_parameters.get('job_type', '')
        if job_type != 'predict':
            # generate job model info
            job_parameters['model_id'] = '#'.join([dtable_utils.all_party_key(job_runtime_conf['role']), 'model'])
            job_parameters['model_version'] = job_id
            train_runtime_conf = {}
        else:
            detect_utils.check_config(job_parameters, ['model_id', 'model_version'])
            # get inference dsl from pipeline model as job dsl
            job_tracker = Tracking(job_id=job_id, role=job_initiator['role'], party_id=job_initiator['party_id'],
                                   model_id=job_parameters['model_id'], model_version=job_parameters['model_version'])
            pipeline_model = job_tracker.get_output_model('pipeline')
            job_dsl = json_loads(pipeline_model['Pipeline'].inference_dsl)
            train_runtime_conf = json_loads(pipeline_model['Pipeline'].train_runtime_conf)
        path_dict = save_job_conf(job_id=job_id,
                                  job_dsl=job_dsl,
                                  job_runtime_conf=job_runtime_conf,
                                  train_runtime_conf=train_runtime_conf,
                                  pipeline_dsl=None)

        job = Job()
        job.f_job_id = job_id
        job.f_roles = json_dumps(job_runtime_conf['role'])
        job.f_work_mode = job_parameters['work_mode']
        job.f_initiator_party_id = job_initiator['party_id']
        job.f_dsl = json_dumps(job_dsl)
        job.f_runtime_conf = json_dumps(job_runtime_conf)
        job.f_train_runtime_conf = json_dumps(train_runtime_conf)
        job.f_run_ip = ''
        job.f_status = JobStatus.WAITING
        job.f_progress = 0
        job.f_create_time = current_timestamp()

        initiator_role = job_initiator['role']
        initiator_party_id = job_initiator['party_id']
        if initiator_party_id not in job_runtime_conf['role'][initiator_role]:
            schedule_logger(job_id).info("initiator party id error:{}".format(initiator_party_id))
            raise Exception("initiator party id error {}".format(initiator_party_id))

        get_job_dsl_parser(dsl=job_dsl,
                           runtime_conf=job_runtime_conf,
                           train_runtime_conf=train_runtime_conf)

        TaskScheduler.distribute_job(job=job, roles=job_runtime_conf['role'], job_initiator=job_initiator)

        # push into queue
        job_event = job_utils.job_event(job_id, initiator_role,  initiator_party_id)
        try:
            RuntimeConfig.JOB_QUEUE.put_event(job_event)
        except Exception as e:
            raise Exception('push job into queue failed')

        schedule_logger(job_id).info(
            'submit job successfully, job id is {}, model id is {}'.format(job.f_job_id, job_parameters['model_id']))
        board_url = BOARD_DASHBOARD_URL.format(job_id, job_initiator['role'], job_initiator['party_id'])
        logs_directory = get_job_log_directory(job_id)
        return job_id, path_dict['job_dsl_path'], path_dict['job_runtime_conf_path'], logs_directory, \
               {'model_id': job_parameters['model_id'],'model_version': job_parameters['model_version']}, board_url